e981bbeff069c17f054425ea2f33bcc513b5281e
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2014 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "stringpool.h"
27 #include "attribs.h"
28 #include "calls.h"
29 #include "stor-layout.h"
30 #include "varasm.h"
31 #include "tm_p.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-codes.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "except.h"
41 #include "function.h"
42 #include "recog.h"
43 #include "expr.h"
44 #include "optabs.h"
45 #include "diagnostic-core.h"
46 #include "toplev.h"
47 #include "basic-block.h"
48 #include "ggc.h"
49 #include "target.h"
50 #include "target-def.h"
51 #include "common/common-target.h"
52 #include "langhooks.h"
53 #include "reload.h"
54 #include "cgraph.h"
55 #include "pointer-set.h"
56 #include "hash-table.h"
57 #include "vec.h"
58 #include "basic-block.h"
59 #include "tree-ssa-alias.h"
60 #include "internal-fn.h"
61 #include "gimple-fold.h"
62 #include "tree-eh.h"
63 #include "gimple-expr.h"
64 #include "is-a.h"
65 #include "gimple.h"
66 #include "gimplify.h"
67 #include "cfgloop.h"
68 #include "dwarf2.h"
69 #include "df.h"
70 #include "tm-constrs.h"
71 #include "params.h"
72 #include "cselib.h"
73 #include "debug.h"
74 #include "sched-int.h"
75 #include "sbitmap.h"
76 #include "fibheap.h"
77 #include "opts.h"
78 #include "diagnostic.h"
79 #include "dumpfile.h"
80 #include "tree-pass.h"
81 #include "context.h"
82 #include "pass_manager.h"
83 #include "target-globals.h"
84
85 static rtx legitimize_dllimport_symbol (rtx, bool);
86 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
87 static rtx legitimize_pe_coff_symbol (rtx, bool);
88
89 #ifndef CHECK_STACK_LIMIT
90 #define CHECK_STACK_LIMIT (-1)
91 #endif
92
93 /* Return index of given mode in mult and division cost tables. */
94 #define MODE_INDEX(mode) \
95 ((mode) == QImode ? 0 \
96 : (mode) == HImode ? 1 \
97 : (mode) == SImode ? 2 \
98 : (mode) == DImode ? 3 \
99 : 4)
100
101 /* Processor costs (relative to an add) */
102 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
103 #define COSTS_N_BYTES(N) ((N) * 2)
104
105 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
106
107 static stringop_algs ix86_size_memcpy[2] = {
108 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
109 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
110 static stringop_algs ix86_size_memset[2] = {
111 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
112 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
113
114 const
115 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
116 COSTS_N_BYTES (2), /* cost of an add instruction */
117 COSTS_N_BYTES (3), /* cost of a lea instruction */
118 COSTS_N_BYTES (2), /* variable shift costs */
119 COSTS_N_BYTES (3), /* constant shift costs */
120 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
121 COSTS_N_BYTES (3), /* HI */
122 COSTS_N_BYTES (3), /* SI */
123 COSTS_N_BYTES (3), /* DI */
124 COSTS_N_BYTES (5)}, /* other */
125 0, /* cost of multiply per each bit set */
126 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
127 COSTS_N_BYTES (3), /* HI */
128 COSTS_N_BYTES (3), /* SI */
129 COSTS_N_BYTES (3), /* DI */
130 COSTS_N_BYTES (5)}, /* other */
131 COSTS_N_BYTES (3), /* cost of movsx */
132 COSTS_N_BYTES (3), /* cost of movzx */
133 0, /* "large" insn */
134 2, /* MOVE_RATIO */
135 2, /* cost for loading QImode using movzbl */
136 {2, 2, 2}, /* cost of loading integer registers
137 in QImode, HImode and SImode.
138 Relative to reg-reg move (2). */
139 {2, 2, 2}, /* cost of storing integer registers */
140 2, /* cost of reg,reg fld/fst */
141 {2, 2, 2}, /* cost of loading fp registers
142 in SFmode, DFmode and XFmode */
143 {2, 2, 2}, /* cost of storing fp registers
144 in SFmode, DFmode and XFmode */
145 3, /* cost of moving MMX register */
146 {3, 3}, /* cost of loading MMX registers
147 in SImode and DImode */
148 {3, 3}, /* cost of storing MMX registers
149 in SImode and DImode */
150 3, /* cost of moving SSE register */
151 {3, 3, 3}, /* cost of loading SSE registers
152 in SImode, DImode and TImode */
153 {3, 3, 3}, /* cost of storing SSE registers
154 in SImode, DImode and TImode */
155 3, /* MMX or SSE register to integer */
156 0, /* size of l1 cache */
157 0, /* size of l2 cache */
158 0, /* size of prefetch block */
159 0, /* number of parallel prefetches */
160 2, /* Branch cost */
161 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
162 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
163 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
164 COSTS_N_BYTES (2), /* cost of FABS instruction. */
165 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
166 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
167 ix86_size_memcpy,
168 ix86_size_memset,
169 1, /* scalar_stmt_cost. */
170 1, /* scalar load_cost. */
171 1, /* scalar_store_cost. */
172 1, /* vec_stmt_cost. */
173 1, /* vec_to_scalar_cost. */
174 1, /* scalar_to_vec_cost. */
175 1, /* vec_align_load_cost. */
176 1, /* vec_unalign_load_cost. */
177 1, /* vec_store_cost. */
178 1, /* cond_taken_branch_cost. */
179 1, /* cond_not_taken_branch_cost. */
180 };
181
182 /* Processor costs (relative to an add) */
183 static stringop_algs i386_memcpy[2] = {
184 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
185 DUMMY_STRINGOP_ALGS};
186 static stringop_algs i386_memset[2] = {
187 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
188 DUMMY_STRINGOP_ALGS};
189
190 static const
191 struct processor_costs i386_cost = { /* 386 specific costs */
192 COSTS_N_INSNS (1), /* cost of an add instruction */
193 COSTS_N_INSNS (1), /* cost of a lea instruction */
194 COSTS_N_INSNS (3), /* variable shift costs */
195 COSTS_N_INSNS (2), /* constant shift costs */
196 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
197 COSTS_N_INSNS (6), /* HI */
198 COSTS_N_INSNS (6), /* SI */
199 COSTS_N_INSNS (6), /* DI */
200 COSTS_N_INSNS (6)}, /* other */
201 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
202 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
203 COSTS_N_INSNS (23), /* HI */
204 COSTS_N_INSNS (23), /* SI */
205 COSTS_N_INSNS (23), /* DI */
206 COSTS_N_INSNS (23)}, /* other */
207 COSTS_N_INSNS (3), /* cost of movsx */
208 COSTS_N_INSNS (2), /* cost of movzx */
209 15, /* "large" insn */
210 3, /* MOVE_RATIO */
211 4, /* cost for loading QImode using movzbl */
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
214 Relative to reg-reg move (2). */
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {8, 8, 8}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
219 {8, 8, 8}, /* cost of storing fp registers
220 in SFmode, DFmode and XFmode */
221 2, /* cost of moving MMX register */
222 {4, 8}, /* cost of loading MMX registers
223 in SImode and DImode */
224 {4, 8}, /* cost of storing MMX registers
225 in SImode and DImode */
226 2, /* cost of moving SSE register */
227 {4, 8, 16}, /* cost of loading SSE registers
228 in SImode, DImode and TImode */
229 {4, 8, 16}, /* cost of storing SSE registers
230 in SImode, DImode and TImode */
231 3, /* MMX or SSE register to integer */
232 0, /* size of l1 cache */
233 0, /* size of l2 cache */
234 0, /* size of prefetch block */
235 0, /* number of parallel prefetches */
236 1, /* Branch cost */
237 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
238 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
239 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
240 COSTS_N_INSNS (22), /* cost of FABS instruction. */
241 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
242 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
243 i386_memcpy,
244 i386_memset,
245 1, /* scalar_stmt_cost. */
246 1, /* scalar load_cost. */
247 1, /* scalar_store_cost. */
248 1, /* vec_stmt_cost. */
249 1, /* vec_to_scalar_cost. */
250 1, /* scalar_to_vec_cost. */
251 1, /* vec_align_load_cost. */
252 2, /* vec_unalign_load_cost. */
253 1, /* vec_store_cost. */
254 3, /* cond_taken_branch_cost. */
255 1, /* cond_not_taken_branch_cost. */
256 };
257
258 static stringop_algs i486_memcpy[2] = {
259 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
260 DUMMY_STRINGOP_ALGS};
261 static stringop_algs i486_memset[2] = {
262 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
263 DUMMY_STRINGOP_ALGS};
264
265 static const
266 struct processor_costs i486_cost = { /* 486 specific costs */
267 COSTS_N_INSNS (1), /* cost of an add instruction */
268 COSTS_N_INSNS (1), /* cost of a lea instruction */
269 COSTS_N_INSNS (3), /* variable shift costs */
270 COSTS_N_INSNS (2), /* constant shift costs */
271 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
272 COSTS_N_INSNS (12), /* HI */
273 COSTS_N_INSNS (12), /* SI */
274 COSTS_N_INSNS (12), /* DI */
275 COSTS_N_INSNS (12)}, /* other */
276 1, /* cost of multiply per each bit set */
277 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
278 COSTS_N_INSNS (40), /* HI */
279 COSTS_N_INSNS (40), /* SI */
280 COSTS_N_INSNS (40), /* DI */
281 COSTS_N_INSNS (40)}, /* other */
282 COSTS_N_INSNS (3), /* cost of movsx */
283 COSTS_N_INSNS (2), /* cost of movzx */
284 15, /* "large" insn */
285 3, /* MOVE_RATIO */
286 4, /* cost for loading QImode using movzbl */
287 {2, 4, 2}, /* cost of loading integer registers
288 in QImode, HImode and SImode.
289 Relative to reg-reg move (2). */
290 {2, 4, 2}, /* cost of storing integer registers */
291 2, /* cost of reg,reg fld/fst */
292 {8, 8, 8}, /* cost of loading fp registers
293 in SFmode, DFmode and XFmode */
294 {8, 8, 8}, /* cost of storing fp registers
295 in SFmode, DFmode and XFmode */
296 2, /* cost of moving MMX register */
297 {4, 8}, /* cost of loading MMX registers
298 in SImode and DImode */
299 {4, 8}, /* cost of storing MMX registers
300 in SImode and DImode */
301 2, /* cost of moving SSE register */
302 {4, 8, 16}, /* cost of loading SSE registers
303 in SImode, DImode and TImode */
304 {4, 8, 16}, /* cost of storing SSE registers
305 in SImode, DImode and TImode */
306 3, /* MMX or SSE register to integer */
307 4, /* size of l1 cache. 486 has 8kB cache
308 shared for code and data, so 4kB is
309 not really precise. */
310 4, /* size of l2 cache */
311 0, /* size of prefetch block */
312 0, /* number of parallel prefetches */
313 1, /* Branch cost */
314 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
315 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
316 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
317 COSTS_N_INSNS (3), /* cost of FABS instruction. */
318 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
319 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
320 i486_memcpy,
321 i486_memset,
322 1, /* scalar_stmt_cost. */
323 1, /* scalar load_cost. */
324 1, /* scalar_store_cost. */
325 1, /* vec_stmt_cost. */
326 1, /* vec_to_scalar_cost. */
327 1, /* scalar_to_vec_cost. */
328 1, /* vec_align_load_cost. */
329 2, /* vec_unalign_load_cost. */
330 1, /* vec_store_cost. */
331 3, /* cond_taken_branch_cost. */
332 1, /* cond_not_taken_branch_cost. */
333 };
334
335 static stringop_algs pentium_memcpy[2] = {
336 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
337 DUMMY_STRINGOP_ALGS};
338 static stringop_algs pentium_memset[2] = {
339 {libcall, {{-1, rep_prefix_4_byte, false}}},
340 DUMMY_STRINGOP_ALGS};
341
342 static const
343 struct processor_costs pentium_cost = {
344 COSTS_N_INSNS (1), /* cost of an add instruction */
345 COSTS_N_INSNS (1), /* cost of a lea instruction */
346 COSTS_N_INSNS (4), /* variable shift costs */
347 COSTS_N_INSNS (1), /* constant shift costs */
348 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
349 COSTS_N_INSNS (11), /* HI */
350 COSTS_N_INSNS (11), /* SI */
351 COSTS_N_INSNS (11), /* DI */
352 COSTS_N_INSNS (11)}, /* other */
353 0, /* cost of multiply per each bit set */
354 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
355 COSTS_N_INSNS (25), /* HI */
356 COSTS_N_INSNS (25), /* SI */
357 COSTS_N_INSNS (25), /* DI */
358 COSTS_N_INSNS (25)}, /* other */
359 COSTS_N_INSNS (3), /* cost of movsx */
360 COSTS_N_INSNS (2), /* cost of movzx */
361 8, /* "large" insn */
362 6, /* MOVE_RATIO */
363 6, /* cost for loading QImode using movzbl */
364 {2, 4, 2}, /* cost of loading integer registers
365 in QImode, HImode and SImode.
366 Relative to reg-reg move (2). */
367 {2, 4, 2}, /* cost of storing integer registers */
368 2, /* cost of reg,reg fld/fst */
369 {2, 2, 6}, /* cost of loading fp registers
370 in SFmode, DFmode and XFmode */
371 {4, 4, 6}, /* cost of storing fp registers
372 in SFmode, DFmode and XFmode */
373 8, /* cost of moving MMX register */
374 {8, 8}, /* cost of loading MMX registers
375 in SImode and DImode */
376 {8, 8}, /* cost of storing MMX registers
377 in SImode and DImode */
378 2, /* cost of moving SSE register */
379 {4, 8, 16}, /* cost of loading SSE registers
380 in SImode, DImode and TImode */
381 {4, 8, 16}, /* cost of storing SSE registers
382 in SImode, DImode and TImode */
383 3, /* MMX or SSE register to integer */
384 8, /* size of l1 cache. */
385 8, /* size of l2 cache */
386 0, /* size of prefetch block */
387 0, /* number of parallel prefetches */
388 2, /* Branch cost */
389 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
390 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
391 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
392 COSTS_N_INSNS (1), /* cost of FABS instruction. */
393 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
394 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
395 pentium_memcpy,
396 pentium_memset,
397 1, /* scalar_stmt_cost. */
398 1, /* scalar load_cost. */
399 1, /* scalar_store_cost. */
400 1, /* vec_stmt_cost. */
401 1, /* vec_to_scalar_cost. */
402 1, /* scalar_to_vec_cost. */
403 1, /* vec_align_load_cost. */
404 2, /* vec_unalign_load_cost. */
405 1, /* vec_store_cost. */
406 3, /* cond_taken_branch_cost. */
407 1, /* cond_not_taken_branch_cost. */
408 };
409
410 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
411 (we ensure the alignment). For small blocks inline loop is still a
412 noticeable win, for bigger blocks either rep movsl or rep movsb is
413 way to go. Rep movsb has apparently more expensive startup time in CPU,
414 but after 4K the difference is down in the noise. */
415 static stringop_algs pentiumpro_memcpy[2] = {
416 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
417 {8192, rep_prefix_4_byte, false},
418 {-1, rep_prefix_1_byte, false}}},
419 DUMMY_STRINGOP_ALGS};
420 static stringop_algs pentiumpro_memset[2] = {
421 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
422 {8192, rep_prefix_4_byte, false},
423 {-1, libcall, false}}},
424 DUMMY_STRINGOP_ALGS};
425 static const
426 struct processor_costs pentiumpro_cost = {
427 COSTS_N_INSNS (1), /* cost of an add instruction */
428 COSTS_N_INSNS (1), /* cost of a lea instruction */
429 COSTS_N_INSNS (1), /* variable shift costs */
430 COSTS_N_INSNS (1), /* constant shift costs */
431 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
432 COSTS_N_INSNS (4), /* HI */
433 COSTS_N_INSNS (4), /* SI */
434 COSTS_N_INSNS (4), /* DI */
435 COSTS_N_INSNS (4)}, /* other */
436 0, /* cost of multiply per each bit set */
437 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
438 COSTS_N_INSNS (17), /* HI */
439 COSTS_N_INSNS (17), /* SI */
440 COSTS_N_INSNS (17), /* DI */
441 COSTS_N_INSNS (17)}, /* other */
442 COSTS_N_INSNS (1), /* cost of movsx */
443 COSTS_N_INSNS (1), /* cost of movzx */
444 8, /* "large" insn */
445 6, /* MOVE_RATIO */
446 2, /* cost for loading QImode using movzbl */
447 {4, 4, 4}, /* cost of loading integer registers
448 in QImode, HImode and SImode.
449 Relative to reg-reg move (2). */
450 {2, 2, 2}, /* cost of storing integer registers */
451 2, /* cost of reg,reg fld/fst */
452 {2, 2, 6}, /* cost of loading fp registers
453 in SFmode, DFmode and XFmode */
454 {4, 4, 6}, /* cost of storing fp registers
455 in SFmode, DFmode and XFmode */
456 2, /* cost of moving MMX register */
457 {2, 2}, /* cost of loading MMX registers
458 in SImode and DImode */
459 {2, 2}, /* cost of storing MMX registers
460 in SImode and DImode */
461 2, /* cost of moving SSE register */
462 {2, 2, 8}, /* cost of loading SSE registers
463 in SImode, DImode and TImode */
464 {2, 2, 8}, /* cost of storing SSE registers
465 in SImode, DImode and TImode */
466 3, /* MMX or SSE register to integer */
467 8, /* size of l1 cache. */
468 256, /* size of l2 cache */
469 32, /* size of prefetch block */
470 6, /* number of parallel prefetches */
471 2, /* Branch cost */
472 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
473 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
474 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
475 COSTS_N_INSNS (2), /* cost of FABS instruction. */
476 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
477 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
478 pentiumpro_memcpy,
479 pentiumpro_memset,
480 1, /* scalar_stmt_cost. */
481 1, /* scalar load_cost. */
482 1, /* scalar_store_cost. */
483 1, /* vec_stmt_cost. */
484 1, /* vec_to_scalar_cost. */
485 1, /* scalar_to_vec_cost. */
486 1, /* vec_align_load_cost. */
487 2, /* vec_unalign_load_cost. */
488 1, /* vec_store_cost. */
489 3, /* cond_taken_branch_cost. */
490 1, /* cond_not_taken_branch_cost. */
491 };
492
493 static stringop_algs geode_memcpy[2] = {
494 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
495 DUMMY_STRINGOP_ALGS};
496 static stringop_algs geode_memset[2] = {
497 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
498 DUMMY_STRINGOP_ALGS};
499 static const
500 struct processor_costs geode_cost = {
501 COSTS_N_INSNS (1), /* cost of an add instruction */
502 COSTS_N_INSNS (1), /* cost of a lea instruction */
503 COSTS_N_INSNS (2), /* variable shift costs */
504 COSTS_N_INSNS (1), /* constant shift costs */
505 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
506 COSTS_N_INSNS (4), /* HI */
507 COSTS_N_INSNS (7), /* SI */
508 COSTS_N_INSNS (7), /* DI */
509 COSTS_N_INSNS (7)}, /* other */
510 0, /* cost of multiply per each bit set */
511 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
512 COSTS_N_INSNS (23), /* HI */
513 COSTS_N_INSNS (39), /* SI */
514 COSTS_N_INSNS (39), /* DI */
515 COSTS_N_INSNS (39)}, /* other */
516 COSTS_N_INSNS (1), /* cost of movsx */
517 COSTS_N_INSNS (1), /* cost of movzx */
518 8, /* "large" insn */
519 4, /* MOVE_RATIO */
520 1, /* cost for loading QImode using movzbl */
521 {1, 1, 1}, /* cost of loading integer registers
522 in QImode, HImode and SImode.
523 Relative to reg-reg move (2). */
524 {1, 1, 1}, /* cost of storing integer registers */
525 1, /* cost of reg,reg fld/fst */
526 {1, 1, 1}, /* cost of loading fp registers
527 in SFmode, DFmode and XFmode */
528 {4, 6, 6}, /* cost of storing fp registers
529 in SFmode, DFmode and XFmode */
530
531 1, /* cost of moving MMX register */
532 {1, 1}, /* cost of loading MMX registers
533 in SImode and DImode */
534 {1, 1}, /* cost of storing MMX registers
535 in SImode and DImode */
536 1, /* cost of moving SSE register */
537 {1, 1, 1}, /* cost of loading SSE registers
538 in SImode, DImode and TImode */
539 {1, 1, 1}, /* cost of storing SSE registers
540 in SImode, DImode and TImode */
541 1, /* MMX or SSE register to integer */
542 64, /* size of l1 cache. */
543 128, /* size of l2 cache. */
544 32, /* size of prefetch block */
545 1, /* number of parallel prefetches */
546 1, /* Branch cost */
547 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
548 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
549 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
550 COSTS_N_INSNS (1), /* cost of FABS instruction. */
551 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
552 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
553 geode_memcpy,
554 geode_memset,
555 1, /* scalar_stmt_cost. */
556 1, /* scalar load_cost. */
557 1, /* scalar_store_cost. */
558 1, /* vec_stmt_cost. */
559 1, /* vec_to_scalar_cost. */
560 1, /* scalar_to_vec_cost. */
561 1, /* vec_align_load_cost. */
562 2, /* vec_unalign_load_cost. */
563 1, /* vec_store_cost. */
564 3, /* cond_taken_branch_cost. */
565 1, /* cond_not_taken_branch_cost. */
566 };
567
568 static stringop_algs k6_memcpy[2] = {
569 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
570 DUMMY_STRINGOP_ALGS};
571 static stringop_algs k6_memset[2] = {
572 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
573 DUMMY_STRINGOP_ALGS};
574 static const
575 struct processor_costs k6_cost = {
576 COSTS_N_INSNS (1), /* cost of an add instruction */
577 COSTS_N_INSNS (2), /* cost of a lea instruction */
578 COSTS_N_INSNS (1), /* variable shift costs */
579 COSTS_N_INSNS (1), /* constant shift costs */
580 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
581 COSTS_N_INSNS (3), /* HI */
582 COSTS_N_INSNS (3), /* SI */
583 COSTS_N_INSNS (3), /* DI */
584 COSTS_N_INSNS (3)}, /* other */
585 0, /* cost of multiply per each bit set */
586 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
587 COSTS_N_INSNS (18), /* HI */
588 COSTS_N_INSNS (18), /* SI */
589 COSTS_N_INSNS (18), /* DI */
590 COSTS_N_INSNS (18)}, /* other */
591 COSTS_N_INSNS (2), /* cost of movsx */
592 COSTS_N_INSNS (2), /* cost of movzx */
593 8, /* "large" insn */
594 4, /* MOVE_RATIO */
595 3, /* cost for loading QImode using movzbl */
596 {4, 5, 4}, /* cost of loading integer registers
597 in QImode, HImode and SImode.
598 Relative to reg-reg move (2). */
599 {2, 3, 2}, /* cost of storing integer registers */
600 4, /* cost of reg,reg fld/fst */
601 {6, 6, 6}, /* cost of loading fp registers
602 in SFmode, DFmode and XFmode */
603 {4, 4, 4}, /* cost of storing fp registers
604 in SFmode, DFmode and XFmode */
605 2, /* cost of moving MMX register */
606 {2, 2}, /* cost of loading MMX registers
607 in SImode and DImode */
608 {2, 2}, /* cost of storing MMX registers
609 in SImode and DImode */
610 2, /* cost of moving SSE register */
611 {2, 2, 8}, /* cost of loading SSE registers
612 in SImode, DImode and TImode */
613 {2, 2, 8}, /* cost of storing SSE registers
614 in SImode, DImode and TImode */
615 6, /* MMX or SSE register to integer */
616 32, /* size of l1 cache. */
617 32, /* size of l2 cache. Some models
618 have integrated l2 cache, but
619 optimizing for k6 is not important
620 enough to worry about that. */
621 32, /* size of prefetch block */
622 1, /* number of parallel prefetches */
623 1, /* Branch cost */
624 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
625 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
626 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
627 COSTS_N_INSNS (2), /* cost of FABS instruction. */
628 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
629 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
630 k6_memcpy,
631 k6_memset,
632 1, /* scalar_stmt_cost. */
633 1, /* scalar load_cost. */
634 1, /* scalar_store_cost. */
635 1, /* vec_stmt_cost. */
636 1, /* vec_to_scalar_cost. */
637 1, /* scalar_to_vec_cost. */
638 1, /* vec_align_load_cost. */
639 2, /* vec_unalign_load_cost. */
640 1, /* vec_store_cost. */
641 3, /* cond_taken_branch_cost. */
642 1, /* cond_not_taken_branch_cost. */
643 };
644
645 /* For some reason, Athlon deals better with REP prefix (relative to loops)
646 compared to K8. Alignment becomes important after 8 bytes for memcpy and
647 128 bytes for memset. */
648 static stringop_algs athlon_memcpy[2] = {
649 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
650 DUMMY_STRINGOP_ALGS};
651 static stringop_algs athlon_memset[2] = {
652 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
653 DUMMY_STRINGOP_ALGS};
654 static const
655 struct processor_costs athlon_cost = {
656 COSTS_N_INSNS (1), /* cost of an add instruction */
657 COSTS_N_INSNS (2), /* cost of a lea instruction */
658 COSTS_N_INSNS (1), /* variable shift costs */
659 COSTS_N_INSNS (1), /* constant shift costs */
660 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
661 COSTS_N_INSNS (5), /* HI */
662 COSTS_N_INSNS (5), /* SI */
663 COSTS_N_INSNS (5), /* DI */
664 COSTS_N_INSNS (5)}, /* other */
665 0, /* cost of multiply per each bit set */
666 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
667 COSTS_N_INSNS (26), /* HI */
668 COSTS_N_INSNS (42), /* SI */
669 COSTS_N_INSNS (74), /* DI */
670 COSTS_N_INSNS (74)}, /* other */
671 COSTS_N_INSNS (1), /* cost of movsx */
672 COSTS_N_INSNS (1), /* cost of movzx */
673 8, /* "large" insn */
674 9, /* MOVE_RATIO */
675 4, /* cost for loading QImode using movzbl */
676 {3, 4, 3}, /* cost of loading integer registers
677 in QImode, HImode and SImode.
678 Relative to reg-reg move (2). */
679 {3, 4, 3}, /* cost of storing integer registers */
680 4, /* cost of reg,reg fld/fst */
681 {4, 4, 12}, /* cost of loading fp registers
682 in SFmode, DFmode and XFmode */
683 {6, 6, 8}, /* cost of storing fp registers
684 in SFmode, DFmode and XFmode */
685 2, /* cost of moving MMX register */
686 {4, 4}, /* cost of loading MMX registers
687 in SImode and DImode */
688 {4, 4}, /* cost of storing MMX registers
689 in SImode and DImode */
690 2, /* cost of moving SSE register */
691 {4, 4, 6}, /* cost of loading SSE registers
692 in SImode, DImode and TImode */
693 {4, 4, 5}, /* cost of storing SSE registers
694 in SImode, DImode and TImode */
695 5, /* MMX or SSE register to integer */
696 64, /* size of l1 cache. */
697 256, /* size of l2 cache. */
698 64, /* size of prefetch block */
699 6, /* number of parallel prefetches */
700 5, /* Branch cost */
701 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
702 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
703 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
704 COSTS_N_INSNS (2), /* cost of FABS instruction. */
705 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
706 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
707 athlon_memcpy,
708 athlon_memset,
709 1, /* scalar_stmt_cost. */
710 1, /* scalar load_cost. */
711 1, /* scalar_store_cost. */
712 1, /* vec_stmt_cost. */
713 1, /* vec_to_scalar_cost. */
714 1, /* scalar_to_vec_cost. */
715 1, /* vec_align_load_cost. */
716 2, /* vec_unalign_load_cost. */
717 1, /* vec_store_cost. */
718 3, /* cond_taken_branch_cost. */
719 1, /* cond_not_taken_branch_cost. */
720 };
721
722 /* K8 has optimized REP instruction for medium sized blocks, but for very
723 small blocks it is better to use loop. For large blocks, libcall can
724 do nontemporary accesses and beat inline considerably. */
725 static stringop_algs k8_memcpy[2] = {
726 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
727 {-1, rep_prefix_4_byte, false}}},
728 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
729 {-1, libcall, false}}}};
730 static stringop_algs k8_memset[2] = {
731 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
732 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
733 {libcall, {{48, unrolled_loop, false},
734 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
735 static const
736 struct processor_costs k8_cost = {
737 COSTS_N_INSNS (1), /* cost of an add instruction */
738 COSTS_N_INSNS (2), /* cost of a lea instruction */
739 COSTS_N_INSNS (1), /* variable shift costs */
740 COSTS_N_INSNS (1), /* constant shift costs */
741 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
742 COSTS_N_INSNS (4), /* HI */
743 COSTS_N_INSNS (3), /* SI */
744 COSTS_N_INSNS (4), /* DI */
745 COSTS_N_INSNS (5)}, /* other */
746 0, /* cost of multiply per each bit set */
747 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
748 COSTS_N_INSNS (26), /* HI */
749 COSTS_N_INSNS (42), /* SI */
750 COSTS_N_INSNS (74), /* DI */
751 COSTS_N_INSNS (74)}, /* other */
752 COSTS_N_INSNS (1), /* cost of movsx */
753 COSTS_N_INSNS (1), /* cost of movzx */
754 8, /* "large" insn */
755 9, /* MOVE_RATIO */
756 4, /* cost for loading QImode using movzbl */
757 {3, 4, 3}, /* cost of loading integer registers
758 in QImode, HImode and SImode.
759 Relative to reg-reg move (2). */
760 {3, 4, 3}, /* cost of storing integer registers */
761 4, /* cost of reg,reg fld/fst */
762 {4, 4, 12}, /* cost of loading fp registers
763 in SFmode, DFmode and XFmode */
764 {6, 6, 8}, /* cost of storing fp registers
765 in SFmode, DFmode and XFmode */
766 2, /* cost of moving MMX register */
767 {3, 3}, /* cost of loading MMX registers
768 in SImode and DImode */
769 {4, 4}, /* cost of storing MMX registers
770 in SImode and DImode */
771 2, /* cost of moving SSE register */
772 {4, 3, 6}, /* cost of loading SSE registers
773 in SImode, DImode and TImode */
774 {4, 4, 5}, /* cost of storing SSE registers
775 in SImode, DImode and TImode */
776 5, /* MMX or SSE register to integer */
777 64, /* size of l1 cache. */
778 512, /* size of l2 cache. */
779 64, /* size of prefetch block */
780 /* New AMD processors never drop prefetches; if they cannot be performed
781 immediately, they are queued. We set number of simultaneous prefetches
782 to a large constant to reflect this (it probably is not a good idea not
783 to limit number of prefetches at all, as their execution also takes some
784 time). */
785 100, /* number of parallel prefetches */
786 3, /* Branch cost */
787 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
788 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
789 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
790 COSTS_N_INSNS (2), /* cost of FABS instruction. */
791 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
792 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
793
794 k8_memcpy,
795 k8_memset,
796 4, /* scalar_stmt_cost. */
797 2, /* scalar load_cost. */
798 2, /* scalar_store_cost. */
799 5, /* vec_stmt_cost. */
800 0, /* vec_to_scalar_cost. */
801 2, /* scalar_to_vec_cost. */
802 2, /* vec_align_load_cost. */
803 3, /* vec_unalign_load_cost. */
804 3, /* vec_store_cost. */
805 3, /* cond_taken_branch_cost. */
806 2, /* cond_not_taken_branch_cost. */
807 };
808
809 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
810 very small blocks it is better to use loop. For large blocks, libcall can
811 do nontemporary accesses and beat inline considerably. */
812 static stringop_algs amdfam10_memcpy[2] = {
813 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
814 {-1, rep_prefix_4_byte, false}}},
815 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
816 {-1, libcall, false}}}};
817 static stringop_algs amdfam10_memset[2] = {
818 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
819 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
820 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
821 {-1, libcall, false}}}};
822 struct processor_costs amdfam10_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (2), /* cost of a lea instruction */
825 COSTS_N_INSNS (1), /* variable shift costs */
826 COSTS_N_INSNS (1), /* constant shift costs */
827 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (4), /* HI */
829 COSTS_N_INSNS (3), /* SI */
830 COSTS_N_INSNS (4), /* DI */
831 COSTS_N_INSNS (5)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (35), /* HI */
835 COSTS_N_INSNS (51), /* SI */
836 COSTS_N_INSNS (83), /* DI */
837 COSTS_N_INSNS (83)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 8, /* "large" insn */
841 9, /* MOVE_RATIO */
842 4, /* cost for loading QImode using movzbl */
843 {3, 4, 3}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {3, 4, 3}, /* cost of storing integer registers */
847 4, /* cost of reg,reg fld/fst */
848 {4, 4, 12}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {6, 6, 8}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {3, 3}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {4, 4}, /* cost of storing MMX registers
856 in SImode and DImode */
857 2, /* cost of moving SSE register */
858 {4, 4, 3}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {4, 4, 5}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 3, /* MMX or SSE register to integer */
863 /* On K8:
864 MOVD reg64, xmmreg Double FSTORE 4
865 MOVD reg32, xmmreg Double FSTORE 4
866 On AMDFAM10:
867 MOVD reg64, xmmreg Double FADD 3
868 1/1 1/1
869 MOVD reg32, xmmreg Double FADD 3
870 1/1 1/1 */
871 64, /* size of l1 cache. */
872 512, /* size of l2 cache. */
873 64, /* size of prefetch block */
874 /* New AMD processors never drop prefetches; if they cannot be performed
875 immediately, they are queued. We set number of simultaneous prefetches
876 to a large constant to reflect this (it probably is not a good idea not
877 to limit number of prefetches at all, as their execution also takes some
878 time). */
879 100, /* number of parallel prefetches */
880 2, /* Branch cost */
881 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
882 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
883 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
884 COSTS_N_INSNS (2), /* cost of FABS instruction. */
885 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
886 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
887
888 amdfam10_memcpy,
889 amdfam10_memset,
890 4, /* scalar_stmt_cost. */
891 2, /* scalar load_cost. */
892 2, /* scalar_store_cost. */
893 6, /* vec_stmt_cost. */
894 0, /* vec_to_scalar_cost. */
895 2, /* scalar_to_vec_cost. */
896 2, /* vec_align_load_cost. */
897 2, /* vec_unalign_load_cost. */
898 2, /* vec_store_cost. */
899 2, /* cond_taken_branch_cost. */
900 1, /* cond_not_taken_branch_cost. */
901 };
902
903 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
904 very small blocks it is better to use loop. For large blocks, libcall
905 can do nontemporary accesses and beat inline considerably. */
906 static stringop_algs bdver1_memcpy[2] = {
907 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
908 {-1, rep_prefix_4_byte, false}}},
909 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
910 {-1, libcall, false}}}};
911 static stringop_algs bdver1_memset[2] = {
912 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
913 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
914 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
915 {-1, libcall, false}}}};
916
917 const struct processor_costs bdver1_cost = {
918 COSTS_N_INSNS (1), /* cost of an add instruction */
919 COSTS_N_INSNS (1), /* cost of a lea instruction */
920 COSTS_N_INSNS (1), /* variable shift costs */
921 COSTS_N_INSNS (1), /* constant shift costs */
922 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
923 COSTS_N_INSNS (4), /* HI */
924 COSTS_N_INSNS (4), /* SI */
925 COSTS_N_INSNS (6), /* DI */
926 COSTS_N_INSNS (6)}, /* other */
927 0, /* cost of multiply per each bit set */
928 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
929 COSTS_N_INSNS (35), /* HI */
930 COSTS_N_INSNS (51), /* SI */
931 COSTS_N_INSNS (83), /* DI */
932 COSTS_N_INSNS (83)}, /* other */
933 COSTS_N_INSNS (1), /* cost of movsx */
934 COSTS_N_INSNS (1), /* cost of movzx */
935 8, /* "large" insn */
936 9, /* MOVE_RATIO */
937 4, /* cost for loading QImode using movzbl */
938 {5, 5, 4}, /* cost of loading integer registers
939 in QImode, HImode and SImode.
940 Relative to reg-reg move (2). */
941 {4, 4, 4}, /* cost of storing integer registers */
942 2, /* cost of reg,reg fld/fst */
943 {5, 5, 12}, /* cost of loading fp registers
944 in SFmode, DFmode and XFmode */
945 {4, 4, 8}, /* cost of storing fp registers
946 in SFmode, DFmode and XFmode */
947 2, /* cost of moving MMX register */
948 {4, 4}, /* cost of loading MMX registers
949 in SImode and DImode */
950 {4, 4}, /* cost of storing MMX registers
951 in SImode and DImode */
952 2, /* cost of moving SSE register */
953 {4, 4, 4}, /* cost of loading SSE registers
954 in SImode, DImode and TImode */
955 {4, 4, 4}, /* cost of storing SSE registers
956 in SImode, DImode and TImode */
957 2, /* MMX or SSE register to integer */
958 /* On K8:
959 MOVD reg64, xmmreg Double FSTORE 4
960 MOVD reg32, xmmreg Double FSTORE 4
961 On AMDFAM10:
962 MOVD reg64, xmmreg Double FADD 3
963 1/1 1/1
964 MOVD reg32, xmmreg Double FADD 3
965 1/1 1/1 */
966 16, /* size of l1 cache. */
967 2048, /* size of l2 cache. */
968 64, /* size of prefetch block */
969 /* New AMD processors never drop prefetches; if they cannot be performed
970 immediately, they are queued. We set number of simultaneous prefetches
971 to a large constant to reflect this (it probably is not a good idea not
972 to limit number of prefetches at all, as their execution also takes some
973 time). */
974 100, /* number of parallel prefetches */
975 2, /* Branch cost */
976 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
977 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
978 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
979 COSTS_N_INSNS (2), /* cost of FABS instruction. */
980 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
981 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
982
983 bdver1_memcpy,
984 bdver1_memset,
985 6, /* scalar_stmt_cost. */
986 4, /* scalar load_cost. */
987 4, /* scalar_store_cost. */
988 6, /* vec_stmt_cost. */
989 0, /* vec_to_scalar_cost. */
990 2, /* scalar_to_vec_cost. */
991 4, /* vec_align_load_cost. */
992 4, /* vec_unalign_load_cost. */
993 4, /* vec_store_cost. */
994 2, /* cond_taken_branch_cost. */
995 1, /* cond_not_taken_branch_cost. */
996 };
997
998 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
999 very small blocks it is better to use loop. For large blocks, libcall
1000 can do nontemporary accesses and beat inline considerably. */
1001
1002 static stringop_algs bdver2_memcpy[2] = {
1003 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1004 {-1, rep_prefix_4_byte, false}}},
1005 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1006 {-1, libcall, false}}}};
1007 static stringop_algs bdver2_memset[2] = {
1008 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1009 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1010 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1011 {-1, libcall, false}}}};
1012
1013 const struct processor_costs bdver2_cost = {
1014 COSTS_N_INSNS (1), /* cost of an add instruction */
1015 COSTS_N_INSNS (1), /* cost of a lea instruction */
1016 COSTS_N_INSNS (1), /* variable shift costs */
1017 COSTS_N_INSNS (1), /* constant shift costs */
1018 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1019 COSTS_N_INSNS (4), /* HI */
1020 COSTS_N_INSNS (4), /* SI */
1021 COSTS_N_INSNS (6), /* DI */
1022 COSTS_N_INSNS (6)}, /* other */
1023 0, /* cost of multiply per each bit set */
1024 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1025 COSTS_N_INSNS (35), /* HI */
1026 COSTS_N_INSNS (51), /* SI */
1027 COSTS_N_INSNS (83), /* DI */
1028 COSTS_N_INSNS (83)}, /* other */
1029 COSTS_N_INSNS (1), /* cost of movsx */
1030 COSTS_N_INSNS (1), /* cost of movzx */
1031 8, /* "large" insn */
1032 9, /* MOVE_RATIO */
1033 4, /* cost for loading QImode using movzbl */
1034 {5, 5, 4}, /* cost of loading integer registers
1035 in QImode, HImode and SImode.
1036 Relative to reg-reg move (2). */
1037 {4, 4, 4}, /* cost of storing integer registers */
1038 2, /* cost of reg,reg fld/fst */
1039 {5, 5, 12}, /* cost of loading fp registers
1040 in SFmode, DFmode and XFmode */
1041 {4, 4, 8}, /* cost of storing fp registers
1042 in SFmode, DFmode and XFmode */
1043 2, /* cost of moving MMX register */
1044 {4, 4}, /* cost of loading MMX registers
1045 in SImode and DImode */
1046 {4, 4}, /* cost of storing MMX registers
1047 in SImode and DImode */
1048 2, /* cost of moving SSE register */
1049 {4, 4, 4}, /* cost of loading SSE registers
1050 in SImode, DImode and TImode */
1051 {4, 4, 4}, /* cost of storing SSE registers
1052 in SImode, DImode and TImode */
1053 2, /* MMX or SSE register to integer */
1054 /* On K8:
1055 MOVD reg64, xmmreg Double FSTORE 4
1056 MOVD reg32, xmmreg Double FSTORE 4
1057 On AMDFAM10:
1058 MOVD reg64, xmmreg Double FADD 3
1059 1/1 1/1
1060 MOVD reg32, xmmreg Double FADD 3
1061 1/1 1/1 */
1062 16, /* size of l1 cache. */
1063 2048, /* size of l2 cache. */
1064 64, /* size of prefetch block */
1065 /* New AMD processors never drop prefetches; if they cannot be performed
1066 immediately, they are queued. We set number of simultaneous prefetches
1067 to a large constant to reflect this (it probably is not a good idea not
1068 to limit number of prefetches at all, as their execution also takes some
1069 time). */
1070 100, /* number of parallel prefetches */
1071 2, /* Branch cost */
1072 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1073 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1074 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1075 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1076 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1077 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1078
1079 bdver2_memcpy,
1080 bdver2_memset,
1081 6, /* scalar_stmt_cost. */
1082 4, /* scalar load_cost. */
1083 4, /* scalar_store_cost. */
1084 6, /* vec_stmt_cost. */
1085 0, /* vec_to_scalar_cost. */
1086 2, /* scalar_to_vec_cost. */
1087 4, /* vec_align_load_cost. */
1088 4, /* vec_unalign_load_cost. */
1089 4, /* vec_store_cost. */
1090 2, /* cond_taken_branch_cost. */
1091 1, /* cond_not_taken_branch_cost. */
1092 };
1093
1094
1095 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1096 very small blocks it is better to use loop. For large blocks, libcall
1097 can do nontemporary accesses and beat inline considerably. */
1098 static stringop_algs bdver3_memcpy[2] = {
1099 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1100 {-1, rep_prefix_4_byte, false}}},
1101 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1102 {-1, libcall, false}}}};
1103 static stringop_algs bdver3_memset[2] = {
1104 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1105 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1106 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1107 {-1, libcall, false}}}};
1108 struct processor_costs bdver3_cost = {
1109 COSTS_N_INSNS (1), /* cost of an add instruction */
1110 COSTS_N_INSNS (1), /* cost of a lea instruction */
1111 COSTS_N_INSNS (1), /* variable shift costs */
1112 COSTS_N_INSNS (1), /* constant shift costs */
1113 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1114 COSTS_N_INSNS (4), /* HI */
1115 COSTS_N_INSNS (4), /* SI */
1116 COSTS_N_INSNS (6), /* DI */
1117 COSTS_N_INSNS (6)}, /* other */
1118 0, /* cost of multiply per each bit set */
1119 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1120 COSTS_N_INSNS (35), /* HI */
1121 COSTS_N_INSNS (51), /* SI */
1122 COSTS_N_INSNS (83), /* DI */
1123 COSTS_N_INSNS (83)}, /* other */
1124 COSTS_N_INSNS (1), /* cost of movsx */
1125 COSTS_N_INSNS (1), /* cost of movzx */
1126 8, /* "large" insn */
1127 9, /* MOVE_RATIO */
1128 4, /* cost for loading QImode using movzbl */
1129 {5, 5, 4}, /* cost of loading integer registers
1130 in QImode, HImode and SImode.
1131 Relative to reg-reg move (2). */
1132 {4, 4, 4}, /* cost of storing integer registers */
1133 2, /* cost of reg,reg fld/fst */
1134 {5, 5, 12}, /* cost of loading fp registers
1135 in SFmode, DFmode and XFmode */
1136 {4, 4, 8}, /* cost of storing fp registers
1137 in SFmode, DFmode and XFmode */
1138 2, /* cost of moving MMX register */
1139 {4, 4}, /* cost of loading MMX registers
1140 in SImode and DImode */
1141 {4, 4}, /* cost of storing MMX registers
1142 in SImode and DImode */
1143 2, /* cost of moving SSE register */
1144 {4, 4, 4}, /* cost of loading SSE registers
1145 in SImode, DImode and TImode */
1146 {4, 4, 4}, /* cost of storing SSE registers
1147 in SImode, DImode and TImode */
1148 2, /* MMX or SSE register to integer */
1149 16, /* size of l1 cache. */
1150 2048, /* size of l2 cache. */
1151 64, /* size of prefetch block */
1152 /* New AMD processors never drop prefetches; if they cannot be performed
1153 immediately, they are queued. We set number of simultaneous prefetches
1154 to a large constant to reflect this (it probably is not a good idea not
1155 to limit number of prefetches at all, as their execution also takes some
1156 time). */
1157 100, /* number of parallel prefetches */
1158 2, /* Branch cost */
1159 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1160 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1161 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1162 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1163 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1164 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1165
1166 bdver3_memcpy,
1167 bdver3_memset,
1168 6, /* scalar_stmt_cost. */
1169 4, /* scalar load_cost. */
1170 4, /* scalar_store_cost. */
1171 6, /* vec_stmt_cost. */
1172 0, /* vec_to_scalar_cost. */
1173 2, /* scalar_to_vec_cost. */
1174 4, /* vec_align_load_cost. */
1175 4, /* vec_unalign_load_cost. */
1176 4, /* vec_store_cost. */
1177 2, /* cond_taken_branch_cost. */
1178 1, /* cond_not_taken_branch_cost. */
1179 };
1180
1181 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1182 very small blocks it is better to use loop. For large blocks, libcall
1183 can do nontemporary accesses and beat inline considerably. */
1184 static stringop_algs bdver4_memcpy[2] = {
1185 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1186 {-1, rep_prefix_4_byte, false}}},
1187 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1188 {-1, libcall, false}}}};
1189 static stringop_algs bdver4_memset[2] = {
1190 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1191 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1192 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1193 {-1, libcall, false}}}};
1194 struct processor_costs bdver4_cost = {
1195 COSTS_N_INSNS (1), /* cost of an add instruction */
1196 COSTS_N_INSNS (1), /* cost of a lea instruction */
1197 COSTS_N_INSNS (1), /* variable shift costs */
1198 COSTS_N_INSNS (1), /* constant shift costs */
1199 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1200 COSTS_N_INSNS (4), /* HI */
1201 COSTS_N_INSNS (4), /* SI */
1202 COSTS_N_INSNS (6), /* DI */
1203 COSTS_N_INSNS (6)}, /* other */
1204 0, /* cost of multiply per each bit set */
1205 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1206 COSTS_N_INSNS (35), /* HI */
1207 COSTS_N_INSNS (51), /* SI */
1208 COSTS_N_INSNS (83), /* DI */
1209 COSTS_N_INSNS (83)}, /* other */
1210 COSTS_N_INSNS (1), /* cost of movsx */
1211 COSTS_N_INSNS (1), /* cost of movzx */
1212 8, /* "large" insn */
1213 9, /* MOVE_RATIO */
1214 4, /* cost for loading QImode using movzbl */
1215 {5, 5, 4}, /* cost of loading integer registers
1216 in QImode, HImode and SImode.
1217 Relative to reg-reg move (2). */
1218 {4, 4, 4}, /* cost of storing integer registers */
1219 2, /* cost of reg,reg fld/fst */
1220 {5, 5, 12}, /* cost of loading fp registers
1221 in SFmode, DFmode and XFmode */
1222 {4, 4, 8}, /* cost of storing fp registers
1223 in SFmode, DFmode and XFmode */
1224 2, /* cost of moving MMX register */
1225 {4, 4}, /* cost of loading MMX registers
1226 in SImode and DImode */
1227 {4, 4}, /* cost of storing MMX registers
1228 in SImode and DImode */
1229 2, /* cost of moving SSE register */
1230 {4, 4, 4}, /* cost of loading SSE registers
1231 in SImode, DImode and TImode */
1232 {4, 4, 4}, /* cost of storing SSE registers
1233 in SImode, DImode and TImode */
1234 2, /* MMX or SSE register to integer */
1235 16, /* size of l1 cache. */
1236 2048, /* size of l2 cache. */
1237 64, /* size of prefetch block */
1238 /* New AMD processors never drop prefetches; if they cannot be performed
1239 immediately, they are queued. We set number of simultaneous prefetches
1240 to a large constant to reflect this (it probably is not a good idea not
1241 to limit number of prefetches at all, as their execution also takes some
1242 time). */
1243 100, /* number of parallel prefetches */
1244 2, /* Branch cost */
1245 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1246 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1247 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1248 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1249 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1250 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1251
1252 bdver4_memcpy,
1253 bdver4_memset,
1254 6, /* scalar_stmt_cost. */
1255 4, /* scalar load_cost. */
1256 4, /* scalar_store_cost. */
1257 6, /* vec_stmt_cost. */
1258 0, /* vec_to_scalar_cost. */
1259 2, /* scalar_to_vec_cost. */
1260 4, /* vec_align_load_cost. */
1261 4, /* vec_unalign_load_cost. */
1262 4, /* vec_store_cost. */
1263 2, /* cond_taken_branch_cost. */
1264 1, /* cond_not_taken_branch_cost. */
1265 };
1266
1267 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1268 very small blocks it is better to use loop. For large blocks, libcall can
1269 do nontemporary accesses and beat inline considerably. */
1270 static stringop_algs btver1_memcpy[2] = {
1271 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1272 {-1, rep_prefix_4_byte, false}}},
1273 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1274 {-1, libcall, false}}}};
1275 static stringop_algs btver1_memset[2] = {
1276 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1277 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1278 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1279 {-1, libcall, false}}}};
1280 const struct processor_costs btver1_cost = {
1281 COSTS_N_INSNS (1), /* cost of an add instruction */
1282 COSTS_N_INSNS (2), /* cost of a lea instruction */
1283 COSTS_N_INSNS (1), /* variable shift costs */
1284 COSTS_N_INSNS (1), /* constant shift costs */
1285 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1286 COSTS_N_INSNS (4), /* HI */
1287 COSTS_N_INSNS (3), /* SI */
1288 COSTS_N_INSNS (4), /* DI */
1289 COSTS_N_INSNS (5)}, /* other */
1290 0, /* cost of multiply per each bit set */
1291 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1292 COSTS_N_INSNS (35), /* HI */
1293 COSTS_N_INSNS (51), /* SI */
1294 COSTS_N_INSNS (83), /* DI */
1295 COSTS_N_INSNS (83)}, /* other */
1296 COSTS_N_INSNS (1), /* cost of movsx */
1297 COSTS_N_INSNS (1), /* cost of movzx */
1298 8, /* "large" insn */
1299 9, /* MOVE_RATIO */
1300 4, /* cost for loading QImode using movzbl */
1301 {3, 4, 3}, /* cost of loading integer registers
1302 in QImode, HImode and SImode.
1303 Relative to reg-reg move (2). */
1304 {3, 4, 3}, /* cost of storing integer registers */
1305 4, /* cost of reg,reg fld/fst */
1306 {4, 4, 12}, /* cost of loading fp registers
1307 in SFmode, DFmode and XFmode */
1308 {6, 6, 8}, /* cost of storing fp registers
1309 in SFmode, DFmode and XFmode */
1310 2, /* cost of moving MMX register */
1311 {3, 3}, /* cost of loading MMX registers
1312 in SImode and DImode */
1313 {4, 4}, /* cost of storing MMX registers
1314 in SImode and DImode */
1315 2, /* cost of moving SSE register */
1316 {4, 4, 3}, /* cost of loading SSE registers
1317 in SImode, DImode and TImode */
1318 {4, 4, 5}, /* cost of storing SSE registers
1319 in SImode, DImode and TImode */
1320 3, /* MMX or SSE register to integer */
1321 /* On K8:
1322 MOVD reg64, xmmreg Double FSTORE 4
1323 MOVD reg32, xmmreg Double FSTORE 4
1324 On AMDFAM10:
1325 MOVD reg64, xmmreg Double FADD 3
1326 1/1 1/1
1327 MOVD reg32, xmmreg Double FADD 3
1328 1/1 1/1 */
1329 32, /* size of l1 cache. */
1330 512, /* size of l2 cache. */
1331 64, /* size of prefetch block */
1332 100, /* number of parallel prefetches */
1333 2, /* Branch cost */
1334 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1335 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1336 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1337 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1338 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1339 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1340
1341 btver1_memcpy,
1342 btver1_memset,
1343 4, /* scalar_stmt_cost. */
1344 2, /* scalar load_cost. */
1345 2, /* scalar_store_cost. */
1346 6, /* vec_stmt_cost. */
1347 0, /* vec_to_scalar_cost. */
1348 2, /* scalar_to_vec_cost. */
1349 2, /* vec_align_load_cost. */
1350 2, /* vec_unalign_load_cost. */
1351 2, /* vec_store_cost. */
1352 2, /* cond_taken_branch_cost. */
1353 1, /* cond_not_taken_branch_cost. */
1354 };
1355
1356 static stringop_algs btver2_memcpy[2] = {
1357 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1358 {-1, rep_prefix_4_byte, false}}},
1359 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1360 {-1, libcall, false}}}};
1361 static stringop_algs btver2_memset[2] = {
1362 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1363 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1364 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1365 {-1, libcall, false}}}};
1366 const struct processor_costs btver2_cost = {
1367 COSTS_N_INSNS (1), /* cost of an add instruction */
1368 COSTS_N_INSNS (2), /* cost of a lea instruction */
1369 COSTS_N_INSNS (1), /* variable shift costs */
1370 COSTS_N_INSNS (1), /* constant shift costs */
1371 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1372 COSTS_N_INSNS (4), /* HI */
1373 COSTS_N_INSNS (3), /* SI */
1374 COSTS_N_INSNS (4), /* DI */
1375 COSTS_N_INSNS (5)}, /* other */
1376 0, /* cost of multiply per each bit set */
1377 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1378 COSTS_N_INSNS (35), /* HI */
1379 COSTS_N_INSNS (51), /* SI */
1380 COSTS_N_INSNS (83), /* DI */
1381 COSTS_N_INSNS (83)}, /* other */
1382 COSTS_N_INSNS (1), /* cost of movsx */
1383 COSTS_N_INSNS (1), /* cost of movzx */
1384 8, /* "large" insn */
1385 9, /* MOVE_RATIO */
1386 4, /* cost for loading QImode using movzbl */
1387 {3, 4, 3}, /* cost of loading integer registers
1388 in QImode, HImode and SImode.
1389 Relative to reg-reg move (2). */
1390 {3, 4, 3}, /* cost of storing integer registers */
1391 4, /* cost of reg,reg fld/fst */
1392 {4, 4, 12}, /* cost of loading fp registers
1393 in SFmode, DFmode and XFmode */
1394 {6, 6, 8}, /* cost of storing fp registers
1395 in SFmode, DFmode and XFmode */
1396 2, /* cost of moving MMX register */
1397 {3, 3}, /* cost of loading MMX registers
1398 in SImode and DImode */
1399 {4, 4}, /* cost of storing MMX registers
1400 in SImode and DImode */
1401 2, /* cost of moving SSE register */
1402 {4, 4, 3}, /* cost of loading SSE registers
1403 in SImode, DImode and TImode */
1404 {4, 4, 5}, /* cost of storing SSE registers
1405 in SImode, DImode and TImode */
1406 3, /* MMX or SSE register to integer */
1407 /* On K8:
1408 MOVD reg64, xmmreg Double FSTORE 4
1409 MOVD reg32, xmmreg Double FSTORE 4
1410 On AMDFAM10:
1411 MOVD reg64, xmmreg Double FADD 3
1412 1/1 1/1
1413 MOVD reg32, xmmreg Double FADD 3
1414 1/1 1/1 */
1415 32, /* size of l1 cache. */
1416 2048, /* size of l2 cache. */
1417 64, /* size of prefetch block */
1418 100, /* number of parallel prefetches */
1419 2, /* Branch cost */
1420 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1421 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1422 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1423 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1424 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1425 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1426 btver2_memcpy,
1427 btver2_memset,
1428 4, /* scalar_stmt_cost. */
1429 2, /* scalar load_cost. */
1430 2, /* scalar_store_cost. */
1431 6, /* vec_stmt_cost. */
1432 0, /* vec_to_scalar_cost. */
1433 2, /* scalar_to_vec_cost. */
1434 2, /* vec_align_load_cost. */
1435 2, /* vec_unalign_load_cost. */
1436 2, /* vec_store_cost. */
1437 2, /* cond_taken_branch_cost. */
1438 1, /* cond_not_taken_branch_cost. */
1439 };
1440
1441 static stringop_algs pentium4_memcpy[2] = {
1442 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1443 DUMMY_STRINGOP_ALGS};
1444 static stringop_algs pentium4_memset[2] = {
1445 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1446 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1447 DUMMY_STRINGOP_ALGS};
1448
1449 static const
1450 struct processor_costs pentium4_cost = {
1451 COSTS_N_INSNS (1), /* cost of an add instruction */
1452 COSTS_N_INSNS (3), /* cost of a lea instruction */
1453 COSTS_N_INSNS (4), /* variable shift costs */
1454 COSTS_N_INSNS (4), /* constant shift costs */
1455 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1456 COSTS_N_INSNS (15), /* HI */
1457 COSTS_N_INSNS (15), /* SI */
1458 COSTS_N_INSNS (15), /* DI */
1459 COSTS_N_INSNS (15)}, /* other */
1460 0, /* cost of multiply per each bit set */
1461 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1462 COSTS_N_INSNS (56), /* HI */
1463 COSTS_N_INSNS (56), /* SI */
1464 COSTS_N_INSNS (56), /* DI */
1465 COSTS_N_INSNS (56)}, /* other */
1466 COSTS_N_INSNS (1), /* cost of movsx */
1467 COSTS_N_INSNS (1), /* cost of movzx */
1468 16, /* "large" insn */
1469 6, /* MOVE_RATIO */
1470 2, /* cost for loading QImode using movzbl */
1471 {4, 5, 4}, /* cost of loading integer registers
1472 in QImode, HImode and SImode.
1473 Relative to reg-reg move (2). */
1474 {2, 3, 2}, /* cost of storing integer registers */
1475 2, /* cost of reg,reg fld/fst */
1476 {2, 2, 6}, /* cost of loading fp registers
1477 in SFmode, DFmode and XFmode */
1478 {4, 4, 6}, /* cost of storing fp registers
1479 in SFmode, DFmode and XFmode */
1480 2, /* cost of moving MMX register */
1481 {2, 2}, /* cost of loading MMX registers
1482 in SImode and DImode */
1483 {2, 2}, /* cost of storing MMX registers
1484 in SImode and DImode */
1485 12, /* cost of moving SSE register */
1486 {12, 12, 12}, /* cost of loading SSE registers
1487 in SImode, DImode and TImode */
1488 {2, 2, 8}, /* cost of storing SSE registers
1489 in SImode, DImode and TImode */
1490 10, /* MMX or SSE register to integer */
1491 8, /* size of l1 cache. */
1492 256, /* size of l2 cache. */
1493 64, /* size of prefetch block */
1494 6, /* number of parallel prefetches */
1495 2, /* Branch cost */
1496 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1497 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1498 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1499 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1500 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1501 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1502 pentium4_memcpy,
1503 pentium4_memset,
1504 1, /* scalar_stmt_cost. */
1505 1, /* scalar load_cost. */
1506 1, /* scalar_store_cost. */
1507 1, /* vec_stmt_cost. */
1508 1, /* vec_to_scalar_cost. */
1509 1, /* scalar_to_vec_cost. */
1510 1, /* vec_align_load_cost. */
1511 2, /* vec_unalign_load_cost. */
1512 1, /* vec_store_cost. */
1513 3, /* cond_taken_branch_cost. */
1514 1, /* cond_not_taken_branch_cost. */
1515 };
1516
1517 static stringop_algs nocona_memcpy[2] = {
1518 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1519 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1520 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1521
1522 static stringop_algs nocona_memset[2] = {
1523 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1524 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1525 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1526 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1527
1528 static const
1529 struct processor_costs nocona_cost = {
1530 COSTS_N_INSNS (1), /* cost of an add instruction */
1531 COSTS_N_INSNS (1), /* cost of a lea instruction */
1532 COSTS_N_INSNS (1), /* variable shift costs */
1533 COSTS_N_INSNS (1), /* constant shift costs */
1534 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1535 COSTS_N_INSNS (10), /* HI */
1536 COSTS_N_INSNS (10), /* SI */
1537 COSTS_N_INSNS (10), /* DI */
1538 COSTS_N_INSNS (10)}, /* other */
1539 0, /* cost of multiply per each bit set */
1540 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1541 COSTS_N_INSNS (66), /* HI */
1542 COSTS_N_INSNS (66), /* SI */
1543 COSTS_N_INSNS (66), /* DI */
1544 COSTS_N_INSNS (66)}, /* other */
1545 COSTS_N_INSNS (1), /* cost of movsx */
1546 COSTS_N_INSNS (1), /* cost of movzx */
1547 16, /* "large" insn */
1548 17, /* MOVE_RATIO */
1549 4, /* cost for loading QImode using movzbl */
1550 {4, 4, 4}, /* cost of loading integer registers
1551 in QImode, HImode and SImode.
1552 Relative to reg-reg move (2). */
1553 {4, 4, 4}, /* cost of storing integer registers */
1554 3, /* cost of reg,reg fld/fst */
1555 {12, 12, 12}, /* cost of loading fp registers
1556 in SFmode, DFmode and XFmode */
1557 {4, 4, 4}, /* cost of storing fp registers
1558 in SFmode, DFmode and XFmode */
1559 6, /* cost of moving MMX register */
1560 {12, 12}, /* cost of loading MMX registers
1561 in SImode and DImode */
1562 {12, 12}, /* cost of storing MMX registers
1563 in SImode and DImode */
1564 6, /* cost of moving SSE register */
1565 {12, 12, 12}, /* cost of loading SSE registers
1566 in SImode, DImode and TImode */
1567 {12, 12, 12}, /* cost of storing SSE registers
1568 in SImode, DImode and TImode */
1569 8, /* MMX or SSE register to integer */
1570 8, /* size of l1 cache. */
1571 1024, /* size of l2 cache. */
1572 64, /* size of prefetch block */
1573 8, /* number of parallel prefetches */
1574 1, /* Branch cost */
1575 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1576 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1577 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1578 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1579 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1580 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1581 nocona_memcpy,
1582 nocona_memset,
1583 1, /* scalar_stmt_cost. */
1584 1, /* scalar load_cost. */
1585 1, /* scalar_store_cost. */
1586 1, /* vec_stmt_cost. */
1587 1, /* vec_to_scalar_cost. */
1588 1, /* scalar_to_vec_cost. */
1589 1, /* vec_align_load_cost. */
1590 2, /* vec_unalign_load_cost. */
1591 1, /* vec_store_cost. */
1592 3, /* cond_taken_branch_cost. */
1593 1, /* cond_not_taken_branch_cost. */
1594 };
1595
1596 static stringop_algs atom_memcpy[2] = {
1597 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1598 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1599 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1600 static stringop_algs atom_memset[2] = {
1601 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1602 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1603 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1604 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1605 static const
1606 struct processor_costs atom_cost = {
1607 COSTS_N_INSNS (1), /* cost of an add instruction */
1608 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1609 COSTS_N_INSNS (1), /* variable shift costs */
1610 COSTS_N_INSNS (1), /* constant shift costs */
1611 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1612 COSTS_N_INSNS (4), /* HI */
1613 COSTS_N_INSNS (3), /* SI */
1614 COSTS_N_INSNS (4), /* DI */
1615 COSTS_N_INSNS (2)}, /* other */
1616 0, /* cost of multiply per each bit set */
1617 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1618 COSTS_N_INSNS (26), /* HI */
1619 COSTS_N_INSNS (42), /* SI */
1620 COSTS_N_INSNS (74), /* DI */
1621 COSTS_N_INSNS (74)}, /* other */
1622 COSTS_N_INSNS (1), /* cost of movsx */
1623 COSTS_N_INSNS (1), /* cost of movzx */
1624 8, /* "large" insn */
1625 17, /* MOVE_RATIO */
1626 4, /* cost for loading QImode using movzbl */
1627 {4, 4, 4}, /* cost of loading integer registers
1628 in QImode, HImode and SImode.
1629 Relative to reg-reg move (2). */
1630 {4, 4, 4}, /* cost of storing integer registers */
1631 4, /* cost of reg,reg fld/fst */
1632 {12, 12, 12}, /* cost of loading fp registers
1633 in SFmode, DFmode and XFmode */
1634 {6, 6, 8}, /* cost of storing fp registers
1635 in SFmode, DFmode and XFmode */
1636 2, /* cost of moving MMX register */
1637 {8, 8}, /* cost of loading MMX registers
1638 in SImode and DImode */
1639 {8, 8}, /* cost of storing MMX registers
1640 in SImode and DImode */
1641 2, /* cost of moving SSE register */
1642 {8, 8, 8}, /* cost of loading SSE registers
1643 in SImode, DImode and TImode */
1644 {8, 8, 8}, /* cost of storing SSE registers
1645 in SImode, DImode and TImode */
1646 5, /* MMX or SSE register to integer */
1647 32, /* size of l1 cache. */
1648 256, /* size of l2 cache. */
1649 64, /* size of prefetch block */
1650 6, /* number of parallel prefetches */
1651 3, /* Branch cost */
1652 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1653 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1654 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1655 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1656 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1657 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1658 atom_memcpy,
1659 atom_memset,
1660 1, /* scalar_stmt_cost. */
1661 1, /* scalar load_cost. */
1662 1, /* scalar_store_cost. */
1663 1, /* vec_stmt_cost. */
1664 1, /* vec_to_scalar_cost. */
1665 1, /* scalar_to_vec_cost. */
1666 1, /* vec_align_load_cost. */
1667 2, /* vec_unalign_load_cost. */
1668 1, /* vec_store_cost. */
1669 3, /* cond_taken_branch_cost. */
1670 1, /* cond_not_taken_branch_cost. */
1671 };
1672
1673 static stringop_algs slm_memcpy[2] = {
1674 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1675 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1676 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1677 static stringop_algs slm_memset[2] = {
1678 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1679 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1680 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1681 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1682 static const
1683 struct processor_costs slm_cost = {
1684 COSTS_N_INSNS (1), /* cost of an add instruction */
1685 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1686 COSTS_N_INSNS (1), /* variable shift costs */
1687 COSTS_N_INSNS (1), /* constant shift costs */
1688 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1689 COSTS_N_INSNS (3), /* HI */
1690 COSTS_N_INSNS (3), /* SI */
1691 COSTS_N_INSNS (4), /* DI */
1692 COSTS_N_INSNS (2)}, /* other */
1693 0, /* cost of multiply per each bit set */
1694 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1695 COSTS_N_INSNS (26), /* HI */
1696 COSTS_N_INSNS (42), /* SI */
1697 COSTS_N_INSNS (74), /* DI */
1698 COSTS_N_INSNS (74)}, /* other */
1699 COSTS_N_INSNS (1), /* cost of movsx */
1700 COSTS_N_INSNS (1), /* cost of movzx */
1701 8, /* "large" insn */
1702 17, /* MOVE_RATIO */
1703 4, /* cost for loading QImode using movzbl */
1704 {4, 4, 4}, /* cost of loading integer registers
1705 in QImode, HImode and SImode.
1706 Relative to reg-reg move (2). */
1707 {4, 4, 4}, /* cost of storing integer registers */
1708 4, /* cost of reg,reg fld/fst */
1709 {12, 12, 12}, /* cost of loading fp registers
1710 in SFmode, DFmode and XFmode */
1711 {6, 6, 8}, /* cost of storing fp registers
1712 in SFmode, DFmode and XFmode */
1713 2, /* cost of moving MMX register */
1714 {8, 8}, /* cost of loading MMX registers
1715 in SImode and DImode */
1716 {8, 8}, /* cost of storing MMX registers
1717 in SImode and DImode */
1718 2, /* cost of moving SSE register */
1719 {8, 8, 8}, /* cost of loading SSE registers
1720 in SImode, DImode and TImode */
1721 {8, 8, 8}, /* cost of storing SSE registers
1722 in SImode, DImode and TImode */
1723 5, /* MMX or SSE register to integer */
1724 32, /* size of l1 cache. */
1725 256, /* size of l2 cache. */
1726 64, /* size of prefetch block */
1727 6, /* number of parallel prefetches */
1728 3, /* Branch cost */
1729 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1730 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1731 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1732 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1733 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1734 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1735 slm_memcpy,
1736 slm_memset,
1737 1, /* scalar_stmt_cost. */
1738 1, /* scalar load_cost. */
1739 1, /* scalar_store_cost. */
1740 1, /* vec_stmt_cost. */
1741 1, /* vec_to_scalar_cost. */
1742 1, /* scalar_to_vec_cost. */
1743 1, /* vec_align_load_cost. */
1744 2, /* vec_unalign_load_cost. */
1745 1, /* vec_store_cost. */
1746 3, /* cond_taken_branch_cost. */
1747 1, /* cond_not_taken_branch_cost. */
1748 };
1749
1750 /* Generic should produce code tuned for Core-i7 (and newer chips)
1751 and btver1 (and newer chips). */
1752
1753 static stringop_algs generic_memcpy[2] = {
1754 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1755 {-1, libcall, false}}},
1756 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1757 {-1, libcall, false}}}};
1758 static stringop_algs generic_memset[2] = {
1759 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1760 {-1, libcall, false}}},
1761 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1762 {-1, libcall, false}}}};
1763 static const
1764 struct processor_costs generic_cost = {
1765 COSTS_N_INSNS (1), /* cost of an add instruction */
1766 /* On all chips taken into consideration lea is 2 cycles and more. With
1767 this cost however our current implementation of synth_mult results in
1768 use of unnecessary temporary registers causing regression on several
1769 SPECfp benchmarks. */
1770 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1771 COSTS_N_INSNS (1), /* variable shift costs */
1772 COSTS_N_INSNS (1), /* constant shift costs */
1773 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1774 COSTS_N_INSNS (4), /* HI */
1775 COSTS_N_INSNS (3), /* SI */
1776 COSTS_N_INSNS (4), /* DI */
1777 COSTS_N_INSNS (2)}, /* other */
1778 0, /* cost of multiply per each bit set */
1779 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1780 COSTS_N_INSNS (26), /* HI */
1781 COSTS_N_INSNS (42), /* SI */
1782 COSTS_N_INSNS (74), /* DI */
1783 COSTS_N_INSNS (74)}, /* other */
1784 COSTS_N_INSNS (1), /* cost of movsx */
1785 COSTS_N_INSNS (1), /* cost of movzx */
1786 8, /* "large" insn */
1787 17, /* MOVE_RATIO */
1788 4, /* cost for loading QImode using movzbl */
1789 {4, 4, 4}, /* cost of loading integer registers
1790 in QImode, HImode and SImode.
1791 Relative to reg-reg move (2). */
1792 {4, 4, 4}, /* cost of storing integer registers */
1793 4, /* cost of reg,reg fld/fst */
1794 {12, 12, 12}, /* cost of loading fp registers
1795 in SFmode, DFmode and XFmode */
1796 {6, 6, 8}, /* cost of storing fp registers
1797 in SFmode, DFmode and XFmode */
1798 2, /* cost of moving MMX register */
1799 {8, 8}, /* cost of loading MMX registers
1800 in SImode and DImode */
1801 {8, 8}, /* cost of storing MMX registers
1802 in SImode and DImode */
1803 2, /* cost of moving SSE register */
1804 {8, 8, 8}, /* cost of loading SSE registers
1805 in SImode, DImode and TImode */
1806 {8, 8, 8}, /* cost of storing SSE registers
1807 in SImode, DImode and TImode */
1808 5, /* MMX or SSE register to integer */
1809 32, /* size of l1 cache. */
1810 512, /* size of l2 cache. */
1811 64, /* size of prefetch block */
1812 6, /* number of parallel prefetches */
1813 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1814 value is increased to perhaps more appropriate value of 5. */
1815 3, /* Branch cost */
1816 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1817 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1818 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1819 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1820 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1821 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1822 generic_memcpy,
1823 generic_memset,
1824 1, /* scalar_stmt_cost. */
1825 1, /* scalar load_cost. */
1826 1, /* scalar_store_cost. */
1827 1, /* vec_stmt_cost. */
1828 1, /* vec_to_scalar_cost. */
1829 1, /* scalar_to_vec_cost. */
1830 1, /* vec_align_load_cost. */
1831 2, /* vec_unalign_load_cost. */
1832 1, /* vec_store_cost. */
1833 3, /* cond_taken_branch_cost. */
1834 1, /* cond_not_taken_branch_cost. */
1835 };
1836
1837 /* core_cost should produce code tuned for Core familly of CPUs. */
1838 static stringop_algs core_memcpy[2] = {
1839 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1840 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1841 {-1, libcall, false}}}};
1842 static stringop_algs core_memset[2] = {
1843 {libcall, {{6, loop_1_byte, true},
1844 {24, loop, true},
1845 {8192, rep_prefix_4_byte, true},
1846 {-1, libcall, false}}},
1847 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1848 {-1, libcall, false}}}};
1849
1850 static const
1851 struct processor_costs core_cost = {
1852 COSTS_N_INSNS (1), /* cost of an add instruction */
1853 /* On all chips taken into consideration lea is 2 cycles and more. With
1854 this cost however our current implementation of synth_mult results in
1855 use of unnecessary temporary registers causing regression on several
1856 SPECfp benchmarks. */
1857 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1858 COSTS_N_INSNS (1), /* variable shift costs */
1859 COSTS_N_INSNS (1), /* constant shift costs */
1860 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1861 COSTS_N_INSNS (4), /* HI */
1862 COSTS_N_INSNS (3), /* SI */
1863 COSTS_N_INSNS (4), /* DI */
1864 COSTS_N_INSNS (2)}, /* other */
1865 0, /* cost of multiply per each bit set */
1866 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1867 COSTS_N_INSNS (26), /* HI */
1868 COSTS_N_INSNS (42), /* SI */
1869 COSTS_N_INSNS (74), /* DI */
1870 COSTS_N_INSNS (74)}, /* other */
1871 COSTS_N_INSNS (1), /* cost of movsx */
1872 COSTS_N_INSNS (1), /* cost of movzx */
1873 8, /* "large" insn */
1874 17, /* MOVE_RATIO */
1875 4, /* cost for loading QImode using movzbl */
1876 {4, 4, 4}, /* cost of loading integer registers
1877 in QImode, HImode and SImode.
1878 Relative to reg-reg move (2). */
1879 {4, 4, 4}, /* cost of storing integer registers */
1880 4, /* cost of reg,reg fld/fst */
1881 {12, 12, 12}, /* cost of loading fp registers
1882 in SFmode, DFmode and XFmode */
1883 {6, 6, 8}, /* cost of storing fp registers
1884 in SFmode, DFmode and XFmode */
1885 2, /* cost of moving MMX register */
1886 {8, 8}, /* cost of loading MMX registers
1887 in SImode and DImode */
1888 {8, 8}, /* cost of storing MMX registers
1889 in SImode and DImode */
1890 2, /* cost of moving SSE register */
1891 {8, 8, 8}, /* cost of loading SSE registers
1892 in SImode, DImode and TImode */
1893 {8, 8, 8}, /* cost of storing SSE registers
1894 in SImode, DImode and TImode */
1895 5, /* MMX or SSE register to integer */
1896 64, /* size of l1 cache. */
1897 512, /* size of l2 cache. */
1898 64, /* size of prefetch block */
1899 6, /* number of parallel prefetches */
1900 /* FIXME perhaps more appropriate value is 5. */
1901 3, /* Branch cost */
1902 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1903 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1904 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1905 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1906 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1907 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1908 core_memcpy,
1909 core_memset,
1910 1, /* scalar_stmt_cost. */
1911 1, /* scalar load_cost. */
1912 1, /* scalar_store_cost. */
1913 1, /* vec_stmt_cost. */
1914 1, /* vec_to_scalar_cost. */
1915 1, /* scalar_to_vec_cost. */
1916 1, /* vec_align_load_cost. */
1917 2, /* vec_unalign_load_cost. */
1918 1, /* vec_store_cost. */
1919 3, /* cond_taken_branch_cost. */
1920 1, /* cond_not_taken_branch_cost. */
1921 };
1922
1923
1924 /* Set by -mtune. */
1925 const struct processor_costs *ix86_tune_cost = &pentium_cost;
1926
1927 /* Set by -mtune or -Os. */
1928 const struct processor_costs *ix86_cost = &pentium_cost;
1929
1930 /* Processor feature/optimization bitmasks. */
1931 #define m_386 (1<<PROCESSOR_I386)
1932 #define m_486 (1<<PROCESSOR_I486)
1933 #define m_PENT (1<<PROCESSOR_PENTIUM)
1934 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1935 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1936 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1937 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1938 #define m_CORE2 (1<<PROCESSOR_CORE2)
1939 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
1940 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
1941 #define m_HASWELL (1<<PROCESSOR_HASWELL)
1942 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
1943 #define m_BONNELL (1<<PROCESSOR_BONNELL)
1944 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
1945
1946 #define m_GEODE (1<<PROCESSOR_GEODE)
1947 #define m_K6 (1<<PROCESSOR_K6)
1948 #define m_K6_GEODE (m_K6 | m_GEODE)
1949 #define m_K8 (1<<PROCESSOR_K8)
1950 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1951 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1952 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1953 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1954 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1955 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
1956 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
1957 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1958 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
1959 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
1960 #define m_BTVER (m_BTVER1 | m_BTVER2)
1961 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
1962
1963 #define m_GENERIC (1<<PROCESSOR_GENERIC)
1964
1965 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
1966 #undef DEF_TUNE
1967 #define DEF_TUNE(tune, name, selector) name,
1968 #include "x86-tune.def"
1969 #undef DEF_TUNE
1970 };
1971
1972 /* Feature tests against the various tunings. */
1973 unsigned char ix86_tune_features[X86_TUNE_LAST];
1974
1975 /* Feature tests against the various tunings used to create ix86_tune_features
1976 based on the processor mask. */
1977 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1978 #undef DEF_TUNE
1979 #define DEF_TUNE(tune, name, selector) selector,
1980 #include "x86-tune.def"
1981 #undef DEF_TUNE
1982 };
1983
1984 /* Feature tests against the various architecture variations. */
1985 unsigned char ix86_arch_features[X86_ARCH_LAST];
1986
1987 /* Feature tests against the various architecture variations, used to create
1988 ix86_arch_features based on the processor mask. */
1989 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1990 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
1991 ~(m_386 | m_486 | m_PENT | m_K6),
1992
1993 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1994 ~m_386,
1995
1996 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1997 ~(m_386 | m_486),
1998
1999 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2000 ~m_386,
2001
2002 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2003 ~m_386,
2004 };
2005
2006 /* In case the average insn count for single function invocation is
2007 lower than this constant, emit fast (but longer) prologue and
2008 epilogue code. */
2009 #define FAST_PROLOGUE_INSN_COUNT 20
2010
2011 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2012 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2013 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2014 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2015
2016 /* Array of the smallest class containing reg number REGNO, indexed by
2017 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2018
2019 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2020 {
2021 /* ax, dx, cx, bx */
2022 AREG, DREG, CREG, BREG,
2023 /* si, di, bp, sp */
2024 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2025 /* FP registers */
2026 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2027 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2028 /* arg pointer */
2029 NON_Q_REGS,
2030 /* flags, fpsr, fpcr, frame */
2031 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2032 /* SSE registers */
2033 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2034 SSE_REGS, SSE_REGS,
2035 /* MMX registers */
2036 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2037 MMX_REGS, MMX_REGS,
2038 /* REX registers */
2039 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2040 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2041 /* SSE REX registers */
2042 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2043 SSE_REGS, SSE_REGS,
2044 /* AVX-512 SSE registers */
2045 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2046 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2047 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2048 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2049 /* Mask registers. */
2050 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2051 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2052 };
2053
2054 /* The "default" register map used in 32bit mode. */
2055
2056 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2057 {
2058 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2059 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2060 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2061 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2062 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2063 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2064 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2065 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2066 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2067 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2068 };
2069
2070 /* The "default" register map used in 64bit mode. */
2071
2072 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2073 {
2074 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2075 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2076 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2077 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2078 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2079 8,9,10,11,12,13,14,15, /* extended integer registers */
2080 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2081 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2082 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2083 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2084 };
2085
2086 /* Define the register numbers to be used in Dwarf debugging information.
2087 The SVR4 reference port C compiler uses the following register numbers
2088 in its Dwarf output code:
2089 0 for %eax (gcc regno = 0)
2090 1 for %ecx (gcc regno = 2)
2091 2 for %edx (gcc regno = 1)
2092 3 for %ebx (gcc regno = 3)
2093 4 for %esp (gcc regno = 7)
2094 5 for %ebp (gcc regno = 6)
2095 6 for %esi (gcc regno = 4)
2096 7 for %edi (gcc regno = 5)
2097 The following three DWARF register numbers are never generated by
2098 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2099 believes these numbers have these meanings.
2100 8 for %eip (no gcc equivalent)
2101 9 for %eflags (gcc regno = 17)
2102 10 for %trapno (no gcc equivalent)
2103 It is not at all clear how we should number the FP stack registers
2104 for the x86 architecture. If the version of SDB on x86/svr4 were
2105 a bit less brain dead with respect to floating-point then we would
2106 have a precedent to follow with respect to DWARF register numbers
2107 for x86 FP registers, but the SDB on x86/svr4 is so completely
2108 broken with respect to FP registers that it is hardly worth thinking
2109 of it as something to strive for compatibility with.
2110 The version of x86/svr4 SDB I have at the moment does (partially)
2111 seem to believe that DWARF register number 11 is associated with
2112 the x86 register %st(0), but that's about all. Higher DWARF
2113 register numbers don't seem to be associated with anything in
2114 particular, and even for DWARF regno 11, SDB only seems to under-
2115 stand that it should say that a variable lives in %st(0) (when
2116 asked via an `=' command) if we said it was in DWARF regno 11,
2117 but SDB still prints garbage when asked for the value of the
2118 variable in question (via a `/' command).
2119 (Also note that the labels SDB prints for various FP stack regs
2120 when doing an `x' command are all wrong.)
2121 Note that these problems generally don't affect the native SVR4
2122 C compiler because it doesn't allow the use of -O with -g and
2123 because when it is *not* optimizing, it allocates a memory
2124 location for each floating-point variable, and the memory
2125 location is what gets described in the DWARF AT_location
2126 attribute for the variable in question.
2127 Regardless of the severe mental illness of the x86/svr4 SDB, we
2128 do something sensible here and we use the following DWARF
2129 register numbers. Note that these are all stack-top-relative
2130 numbers.
2131 11 for %st(0) (gcc regno = 8)
2132 12 for %st(1) (gcc regno = 9)
2133 13 for %st(2) (gcc regno = 10)
2134 14 for %st(3) (gcc regno = 11)
2135 15 for %st(4) (gcc regno = 12)
2136 16 for %st(5) (gcc regno = 13)
2137 17 for %st(6) (gcc regno = 14)
2138 18 for %st(7) (gcc regno = 15)
2139 */
2140 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2141 {
2142 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2143 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2144 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2145 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2146 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2147 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2148 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2149 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2150 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2151 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2152 };
2153
2154 /* Define parameter passing and return registers. */
2155
2156 static int const x86_64_int_parameter_registers[6] =
2157 {
2158 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2159 };
2160
2161 static int const x86_64_ms_abi_int_parameter_registers[4] =
2162 {
2163 CX_REG, DX_REG, R8_REG, R9_REG
2164 };
2165
2166 static int const x86_64_int_return_registers[4] =
2167 {
2168 AX_REG, DX_REG, DI_REG, SI_REG
2169 };
2170
2171 /* Additional registers that are clobbered by SYSV calls. */
2172
2173 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2174 {
2175 SI_REG, DI_REG,
2176 XMM6_REG, XMM7_REG,
2177 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2178 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2179 };
2180
2181 /* Define the structure for the machine field in struct function. */
2182
2183 struct GTY(()) stack_local_entry {
2184 unsigned short mode;
2185 unsigned short n;
2186 rtx rtl;
2187 struct stack_local_entry *next;
2188 };
2189
2190 /* Structure describing stack frame layout.
2191 Stack grows downward:
2192
2193 [arguments]
2194 <- ARG_POINTER
2195 saved pc
2196
2197 saved static chain if ix86_static_chain_on_stack
2198
2199 saved frame pointer if frame_pointer_needed
2200 <- HARD_FRAME_POINTER
2201 [saved regs]
2202 <- regs_save_offset
2203 [padding0]
2204
2205 [saved SSE regs]
2206 <- sse_regs_save_offset
2207 [padding1] |
2208 | <- FRAME_POINTER
2209 [va_arg registers] |
2210 |
2211 [frame] |
2212 |
2213 [padding2] | = to_allocate
2214 <- STACK_POINTER
2215 */
2216 struct ix86_frame
2217 {
2218 int nsseregs;
2219 int nregs;
2220 int va_arg_size;
2221 int red_zone_size;
2222 int outgoing_arguments_size;
2223
2224 /* The offsets relative to ARG_POINTER. */
2225 HOST_WIDE_INT frame_pointer_offset;
2226 HOST_WIDE_INT hard_frame_pointer_offset;
2227 HOST_WIDE_INT stack_pointer_offset;
2228 HOST_WIDE_INT hfp_save_offset;
2229 HOST_WIDE_INT reg_save_offset;
2230 HOST_WIDE_INT sse_reg_save_offset;
2231
2232 /* When save_regs_using_mov is set, emit prologue using
2233 move instead of push instructions. */
2234 bool save_regs_using_mov;
2235 };
2236
2237 /* Which cpu are we scheduling for. */
2238 enum attr_cpu ix86_schedule;
2239
2240 /* Which cpu are we optimizing for. */
2241 enum processor_type ix86_tune;
2242
2243 /* Which instruction set architecture to use. */
2244 enum processor_type ix86_arch;
2245
2246 /* True if processor has SSE prefetch instruction. */
2247 unsigned char x86_prefetch_sse;
2248
2249 /* -mstackrealign option */
2250 static const char ix86_force_align_arg_pointer_string[]
2251 = "force_align_arg_pointer";
2252
2253 static rtx (*ix86_gen_leave) (void);
2254 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2255 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2256 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2257 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2258 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2259 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2260 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2261 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2262 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2263 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2264 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2265
2266 /* Preferred alignment for stack boundary in bits. */
2267 unsigned int ix86_preferred_stack_boundary;
2268
2269 /* Alignment for incoming stack boundary in bits specified at
2270 command line. */
2271 static unsigned int ix86_user_incoming_stack_boundary;
2272
2273 /* Default alignment for incoming stack boundary in bits. */
2274 static unsigned int ix86_default_incoming_stack_boundary;
2275
2276 /* Alignment for incoming stack boundary in bits. */
2277 unsigned int ix86_incoming_stack_boundary;
2278
2279 /* Calling abi specific va_list type nodes. */
2280 static GTY(()) tree sysv_va_list_type_node;
2281 static GTY(()) tree ms_va_list_type_node;
2282
2283 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2284 char internal_label_prefix[16];
2285 int internal_label_prefix_len;
2286
2287 /* Fence to use after loop using movnt. */
2288 tree x86_mfence;
2289
2290 /* Register class used for passing given 64bit part of the argument.
2291 These represent classes as documented by the PS ABI, with the exception
2292 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2293 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2294
2295 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2296 whenever possible (upper half does contain padding). */
2297 enum x86_64_reg_class
2298 {
2299 X86_64_NO_CLASS,
2300 X86_64_INTEGER_CLASS,
2301 X86_64_INTEGERSI_CLASS,
2302 X86_64_SSE_CLASS,
2303 X86_64_SSESF_CLASS,
2304 X86_64_SSEDF_CLASS,
2305 X86_64_SSEUP_CLASS,
2306 X86_64_X87_CLASS,
2307 X86_64_X87UP_CLASS,
2308 X86_64_COMPLEX_X87_CLASS,
2309 X86_64_MEMORY_CLASS
2310 };
2311
2312 #define MAX_CLASSES 8
2313
2314 /* Table of constants used by fldpi, fldln2, etc.... */
2315 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2316 static bool ext_80387_constants_init = 0;
2317
2318 \f
2319 static struct machine_function * ix86_init_machine_status (void);
2320 static rtx ix86_function_value (const_tree, const_tree, bool);
2321 static bool ix86_function_value_regno_p (const unsigned int);
2322 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2323 const_tree);
2324 static rtx ix86_static_chain (const_tree, bool);
2325 static int ix86_function_regparm (const_tree, const_tree);
2326 static void ix86_compute_frame_layout (struct ix86_frame *);
2327 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2328 rtx, rtx, int);
2329 static void ix86_add_new_builtins (HOST_WIDE_INT);
2330 static tree ix86_canonical_va_list_type (tree);
2331 static void predict_jump (int);
2332 static unsigned int split_stack_prologue_scratch_regno (void);
2333 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2334
2335 enum ix86_function_specific_strings
2336 {
2337 IX86_FUNCTION_SPECIFIC_ARCH,
2338 IX86_FUNCTION_SPECIFIC_TUNE,
2339 IX86_FUNCTION_SPECIFIC_MAX
2340 };
2341
2342 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2343 const char *, enum fpmath_unit, bool);
2344 static void ix86_function_specific_save (struct cl_target_option *,
2345 struct gcc_options *opts);
2346 static void ix86_function_specific_restore (struct gcc_options *opts,
2347 struct cl_target_option *);
2348 static void ix86_function_specific_print (FILE *, int,
2349 struct cl_target_option *);
2350 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2351 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2352 struct gcc_options *,
2353 struct gcc_options *,
2354 struct gcc_options *);
2355 static bool ix86_can_inline_p (tree, tree);
2356 static void ix86_set_current_function (tree);
2357 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2358
2359 static enum calling_abi ix86_function_abi (const_tree);
2360
2361 \f
2362 #ifndef SUBTARGET32_DEFAULT_CPU
2363 #define SUBTARGET32_DEFAULT_CPU "i386"
2364 #endif
2365
2366 /* Whether -mtune= or -march= were specified */
2367 static int ix86_tune_defaulted;
2368 static int ix86_arch_specified;
2369
2370 /* Vectorization library interface and handlers. */
2371 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2372
2373 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2374 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2375
2376 /* Processor target table, indexed by processor number */
2377 struct ptt
2378 {
2379 const char *const name; /* processor name */
2380 const struct processor_costs *cost; /* Processor costs */
2381 const int align_loop; /* Default alignments. */
2382 const int align_loop_max_skip;
2383 const int align_jump;
2384 const int align_jump_max_skip;
2385 const int align_func;
2386 };
2387
2388 /* This table must be in sync with enum processor_type in i386.h. */
2389 static const struct ptt processor_target_table[PROCESSOR_max] =
2390 {
2391 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2392 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2393 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2394 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2395 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2396 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2397 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2398 {"core2", &core_cost, 16, 10, 16, 10, 16},
2399 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2400 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2401 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2402 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2403 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2404 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2405 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2406 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2407 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2408 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2409 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2410 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2411 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2412 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2413 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2414 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2415 };
2416 \f
2417 static bool
2418 gate_insert_vzeroupper (void)
2419 {
2420 return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER;
2421 }
2422
2423 static unsigned int
2424 rest_of_handle_insert_vzeroupper (void)
2425 {
2426 int i;
2427
2428 /* vzeroupper instructions are inserted immediately after reload to
2429 account for possible spills from 256bit registers. The pass
2430 reuses mode switching infrastructure by re-running mode insertion
2431 pass, so disable entities that have already been processed. */
2432 for (i = 0; i < MAX_386_ENTITIES; i++)
2433 ix86_optimize_mode_switching[i] = 0;
2434
2435 ix86_optimize_mode_switching[AVX_U128] = 1;
2436
2437 /* Call optimize_mode_switching. */
2438 g->get_passes ()->execute_pass_mode_switching ();
2439 return 0;
2440 }
2441
2442 namespace {
2443
2444 const pass_data pass_data_insert_vzeroupper =
2445 {
2446 RTL_PASS, /* type */
2447 "vzeroupper", /* name */
2448 OPTGROUP_NONE, /* optinfo_flags */
2449 true, /* has_gate */
2450 true, /* has_execute */
2451 TV_NONE, /* tv_id */
2452 0, /* properties_required */
2453 0, /* properties_provided */
2454 0, /* properties_destroyed */
2455 0, /* todo_flags_start */
2456 ( TODO_df_finish | TODO_verify_rtl_sharing | 0 ), /* todo_flags_finish */
2457 };
2458
2459 class pass_insert_vzeroupper : public rtl_opt_pass
2460 {
2461 public:
2462 pass_insert_vzeroupper(gcc::context *ctxt)
2463 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2464 {}
2465
2466 /* opt_pass methods: */
2467 bool gate () { return gate_insert_vzeroupper (); }
2468 unsigned int execute () { return rest_of_handle_insert_vzeroupper (); }
2469
2470 }; // class pass_insert_vzeroupper
2471
2472 } // anon namespace
2473
2474 rtl_opt_pass *
2475 make_pass_insert_vzeroupper (gcc::context *ctxt)
2476 {
2477 return new pass_insert_vzeroupper (ctxt);
2478 }
2479
2480 /* Return true if a red-zone is in use. */
2481
2482 static inline bool
2483 ix86_using_red_zone (void)
2484 {
2485 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2486 }
2487 \f
2488 /* Return a string that documents the current -m options. The caller is
2489 responsible for freeing the string. */
2490
2491 static char *
2492 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2493 const char *tune, enum fpmath_unit fpmath,
2494 bool add_nl_p)
2495 {
2496 struct ix86_target_opts
2497 {
2498 const char *option; /* option string */
2499 HOST_WIDE_INT mask; /* isa mask options */
2500 };
2501
2502 /* This table is ordered so that options like -msse4.2 that imply
2503 preceding options while match those first. */
2504 static struct ix86_target_opts isa_opts[] =
2505 {
2506 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2507 { "-mfma", OPTION_MASK_ISA_FMA },
2508 { "-mxop", OPTION_MASK_ISA_XOP },
2509 { "-mlwp", OPTION_MASK_ISA_LWP },
2510 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2511 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2512 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2513 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2514 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2515 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2516 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2517 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2518 { "-msse3", OPTION_MASK_ISA_SSE3 },
2519 { "-msse2", OPTION_MASK_ISA_SSE2 },
2520 { "-msse", OPTION_MASK_ISA_SSE },
2521 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2522 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2523 { "-mmmx", OPTION_MASK_ISA_MMX },
2524 { "-mabm", OPTION_MASK_ISA_ABM },
2525 { "-mbmi", OPTION_MASK_ISA_BMI },
2526 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2527 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2528 { "-mhle", OPTION_MASK_ISA_HLE },
2529 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2530 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2531 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2532 { "-madx", OPTION_MASK_ISA_ADX },
2533 { "-mtbm", OPTION_MASK_ISA_TBM },
2534 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2535 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2536 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2537 { "-maes", OPTION_MASK_ISA_AES },
2538 { "-msha", OPTION_MASK_ISA_SHA },
2539 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2540 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2541 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2542 { "-mf16c", OPTION_MASK_ISA_F16C },
2543 { "-mrtm", OPTION_MASK_ISA_RTM },
2544 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2545 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2546 };
2547
2548 /* Flag options. */
2549 static struct ix86_target_opts flag_opts[] =
2550 {
2551 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2552 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2553 { "-m80387", MASK_80387 },
2554 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2555 { "-malign-double", MASK_ALIGN_DOUBLE },
2556 { "-mcld", MASK_CLD },
2557 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2558 { "-mieee-fp", MASK_IEEE_FP },
2559 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2560 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2561 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2562 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2563 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2564 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2565 { "-mno-red-zone", MASK_NO_RED_ZONE },
2566 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2567 { "-mrecip", MASK_RECIP },
2568 { "-mrtd", MASK_RTD },
2569 { "-msseregparm", MASK_SSEREGPARM },
2570 { "-mstack-arg-probe", MASK_STACK_PROBE },
2571 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2572 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2573 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2574 { "-mvzeroupper", MASK_VZEROUPPER },
2575 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2576 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2577 { "-mprefer-avx128", MASK_PREFER_AVX128},
2578 };
2579
2580 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2581
2582 char isa_other[40];
2583 char target_other[40];
2584 unsigned num = 0;
2585 unsigned i, j;
2586 char *ret;
2587 char *ptr;
2588 size_t len;
2589 size_t line_len;
2590 size_t sep_len;
2591 const char *abi;
2592
2593 memset (opts, '\0', sizeof (opts));
2594
2595 /* Add -march= option. */
2596 if (arch)
2597 {
2598 opts[num][0] = "-march=";
2599 opts[num++][1] = arch;
2600 }
2601
2602 /* Add -mtune= option. */
2603 if (tune)
2604 {
2605 opts[num][0] = "-mtune=";
2606 opts[num++][1] = tune;
2607 }
2608
2609 /* Add -m32/-m64/-mx32. */
2610 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2611 {
2612 if ((isa & OPTION_MASK_ABI_64) != 0)
2613 abi = "-m64";
2614 else
2615 abi = "-mx32";
2616 isa &= ~ (OPTION_MASK_ISA_64BIT
2617 | OPTION_MASK_ABI_64
2618 | OPTION_MASK_ABI_X32);
2619 }
2620 else
2621 abi = "-m32";
2622 opts[num++][0] = abi;
2623
2624 /* Pick out the options in isa options. */
2625 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2626 {
2627 if ((isa & isa_opts[i].mask) != 0)
2628 {
2629 opts[num++][0] = isa_opts[i].option;
2630 isa &= ~ isa_opts[i].mask;
2631 }
2632 }
2633
2634 if (isa && add_nl_p)
2635 {
2636 opts[num++][0] = isa_other;
2637 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2638 isa);
2639 }
2640
2641 /* Add flag options. */
2642 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2643 {
2644 if ((flags & flag_opts[i].mask) != 0)
2645 {
2646 opts[num++][0] = flag_opts[i].option;
2647 flags &= ~ flag_opts[i].mask;
2648 }
2649 }
2650
2651 if (flags && add_nl_p)
2652 {
2653 opts[num++][0] = target_other;
2654 sprintf (target_other, "(other flags: %#x)", flags);
2655 }
2656
2657 /* Add -fpmath= option. */
2658 if (fpmath)
2659 {
2660 opts[num][0] = "-mfpmath=";
2661 switch ((int) fpmath)
2662 {
2663 case FPMATH_387:
2664 opts[num++][1] = "387";
2665 break;
2666
2667 case FPMATH_SSE:
2668 opts[num++][1] = "sse";
2669 break;
2670
2671 case FPMATH_387 | FPMATH_SSE:
2672 opts[num++][1] = "sse+387";
2673 break;
2674
2675 default:
2676 gcc_unreachable ();
2677 }
2678 }
2679
2680 /* Any options? */
2681 if (num == 0)
2682 return NULL;
2683
2684 gcc_assert (num < ARRAY_SIZE (opts));
2685
2686 /* Size the string. */
2687 len = 0;
2688 sep_len = (add_nl_p) ? 3 : 1;
2689 for (i = 0; i < num; i++)
2690 {
2691 len += sep_len;
2692 for (j = 0; j < 2; j++)
2693 if (opts[i][j])
2694 len += strlen (opts[i][j]);
2695 }
2696
2697 /* Build the string. */
2698 ret = ptr = (char *) xmalloc (len);
2699 line_len = 0;
2700
2701 for (i = 0; i < num; i++)
2702 {
2703 size_t len2[2];
2704
2705 for (j = 0; j < 2; j++)
2706 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2707
2708 if (i != 0)
2709 {
2710 *ptr++ = ' ';
2711 line_len++;
2712
2713 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2714 {
2715 *ptr++ = '\\';
2716 *ptr++ = '\n';
2717 line_len = 0;
2718 }
2719 }
2720
2721 for (j = 0; j < 2; j++)
2722 if (opts[i][j])
2723 {
2724 memcpy (ptr, opts[i][j], len2[j]);
2725 ptr += len2[j];
2726 line_len += len2[j];
2727 }
2728 }
2729
2730 *ptr = '\0';
2731 gcc_assert (ret + len >= ptr);
2732
2733 return ret;
2734 }
2735
2736 /* Return true, if profiling code should be emitted before
2737 prologue. Otherwise it returns false.
2738 Note: For x86 with "hotfix" it is sorried. */
2739 static bool
2740 ix86_profile_before_prologue (void)
2741 {
2742 return flag_fentry != 0;
2743 }
2744
2745 /* Function that is callable from the debugger to print the current
2746 options. */
2747 void ATTRIBUTE_UNUSED
2748 ix86_debug_options (void)
2749 {
2750 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2751 ix86_arch_string, ix86_tune_string,
2752 ix86_fpmath, true);
2753
2754 if (opts)
2755 {
2756 fprintf (stderr, "%s\n\n", opts);
2757 free (opts);
2758 }
2759 else
2760 fputs ("<no options>\n\n", stderr);
2761
2762 return;
2763 }
2764
2765 static const char *stringop_alg_names[] = {
2766 #define DEF_ENUM
2767 #define DEF_ALG(alg, name) #name,
2768 #include "stringop.def"
2769 #undef DEF_ENUM
2770 #undef DEF_ALG
2771 };
2772
2773 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2774 The string is of the following form (or comma separated list of it):
2775
2776 strategy_alg:max_size:[align|noalign]
2777
2778 where the full size range for the strategy is either [0, max_size] or
2779 [min_size, max_size], in which min_size is the max_size + 1 of the
2780 preceding range. The last size range must have max_size == -1.
2781
2782 Examples:
2783
2784 1.
2785 -mmemcpy-strategy=libcall:-1:noalign
2786
2787 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2788
2789
2790 2.
2791 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2792
2793 This is to tell the compiler to use the following strategy for memset
2794 1) when the expected size is between [1, 16], use rep_8byte strategy;
2795 2) when the size is between [17, 2048], use vector_loop;
2796 3) when the size is > 2048, use libcall. */
2797
2798 struct stringop_size_range
2799 {
2800 int max;
2801 stringop_alg alg;
2802 bool noalign;
2803 };
2804
2805 static void
2806 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2807 {
2808 const struct stringop_algs *default_algs;
2809 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2810 char *curr_range_str, *next_range_str;
2811 int i = 0, n = 0;
2812
2813 if (is_memset)
2814 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2815 else
2816 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2817
2818 curr_range_str = strategy_str;
2819
2820 do
2821 {
2822 int maxs;
2823 char alg_name[128];
2824 char align[16];
2825 next_range_str = strchr (curr_range_str, ',');
2826 if (next_range_str)
2827 *next_range_str++ = '\0';
2828
2829 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2830 alg_name, &maxs, align))
2831 {
2832 error ("wrong arg %s to option %s", curr_range_str,
2833 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2834 return;
2835 }
2836
2837 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2838 {
2839 error ("size ranges of option %s should be increasing",
2840 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2841 return;
2842 }
2843
2844 for (i = 0; i < last_alg; i++)
2845 if (!strcmp (alg_name, stringop_alg_names[i]))
2846 break;
2847
2848 if (i == last_alg)
2849 {
2850 error ("wrong stringop strategy name %s specified for option %s",
2851 alg_name,
2852 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2853 return;
2854 }
2855
2856 input_ranges[n].max = maxs;
2857 input_ranges[n].alg = (stringop_alg) i;
2858 if (!strcmp (align, "align"))
2859 input_ranges[n].noalign = false;
2860 else if (!strcmp (align, "noalign"))
2861 input_ranges[n].noalign = true;
2862 else
2863 {
2864 error ("unknown alignment %s specified for option %s",
2865 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2866 return;
2867 }
2868 n++;
2869 curr_range_str = next_range_str;
2870 }
2871 while (curr_range_str);
2872
2873 if (input_ranges[n - 1].max != -1)
2874 {
2875 error ("the max value for the last size range should be -1"
2876 " for option %s",
2877 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2878 return;
2879 }
2880
2881 if (n > MAX_STRINGOP_ALGS)
2882 {
2883 error ("too many size ranges specified in option %s",
2884 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2885 return;
2886 }
2887
2888 /* Now override the default algs array. */
2889 for (i = 0; i < n; i++)
2890 {
2891 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
2892 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
2893 = input_ranges[i].alg;
2894 *const_cast<int *>(&default_algs->size[i].noalign)
2895 = input_ranges[i].noalign;
2896 }
2897 }
2898
2899 \f
2900 /* parse -mtune-ctrl= option. When DUMP is true,
2901 print the features that are explicitly set. */
2902
2903 static void
2904 parse_mtune_ctrl_str (bool dump)
2905 {
2906 if (!ix86_tune_ctrl_string)
2907 return;
2908
2909 char *next_feature_string = NULL;
2910 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
2911 char *orig = curr_feature_string;
2912 int i;
2913 do
2914 {
2915 bool clear = false;
2916
2917 next_feature_string = strchr (curr_feature_string, ',');
2918 if (next_feature_string)
2919 *next_feature_string++ = '\0';
2920 if (*curr_feature_string == '^')
2921 {
2922 curr_feature_string++;
2923 clear = true;
2924 }
2925 for (i = 0; i < X86_TUNE_LAST; i++)
2926 {
2927 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
2928 {
2929 ix86_tune_features[i] = !clear;
2930 if (dump)
2931 fprintf (stderr, "Explicitly %s feature %s\n",
2932 clear ? "clear" : "set", ix86_tune_feature_names[i]);
2933 break;
2934 }
2935 }
2936 if (i == X86_TUNE_LAST)
2937 error ("Unknown parameter to option -mtune-ctrl: %s",
2938 clear ? curr_feature_string - 1 : curr_feature_string);
2939 curr_feature_string = next_feature_string;
2940 }
2941 while (curr_feature_string);
2942 free (orig);
2943 }
2944
2945 /* Helper function to set ix86_tune_features. IX86_TUNE is the
2946 processor type. */
2947
2948 static void
2949 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
2950 {
2951 unsigned int ix86_tune_mask = 1u << ix86_tune;
2952 int i;
2953
2954 for (i = 0; i < X86_TUNE_LAST; ++i)
2955 {
2956 if (ix86_tune_no_default)
2957 ix86_tune_features[i] = 0;
2958 else
2959 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
2960 }
2961
2962 if (dump)
2963 {
2964 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
2965 for (i = 0; i < X86_TUNE_LAST; i++)
2966 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
2967 ix86_tune_features[i] ? "on" : "off");
2968 }
2969
2970 parse_mtune_ctrl_str (dump);
2971 }
2972
2973
2974 /* Override various settings based on options. If MAIN_ARGS_P, the
2975 options are from the command line, otherwise they are from
2976 attributes. */
2977
2978 static void
2979 ix86_option_override_internal (bool main_args_p,
2980 struct gcc_options *opts,
2981 struct gcc_options *opts_set)
2982 {
2983 int i;
2984 unsigned int ix86_arch_mask;
2985 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
2986 const char *prefix;
2987 const char *suffix;
2988 const char *sw;
2989
2990 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
2991 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
2992 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
2993 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
2994 #define PTA_AES (HOST_WIDE_INT_1 << 4)
2995 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
2996 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
2997 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
2998 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
2999 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3000 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3001 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3002 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3003 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3004 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3005 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3006 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3007 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3008 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3009 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3010 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3011 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3012 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3013 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3014 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3015 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3016 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3017 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3018 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3019 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3020 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3021 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3022 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3023 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3024 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3025 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3026 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3027 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3028 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3029 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3030 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3031 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3032 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3033 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3034 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3035
3036 #define PTA_CORE2 \
3037 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3038 | PTA_CX16 | PTA_FXSR)
3039 #define PTA_NEHALEM \
3040 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3041 #define PTA_WESTMERE \
3042 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3043 #define PTA_SANDYBRIDGE \
3044 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3045 #define PTA_IVYBRIDGE \
3046 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3047 #define PTA_HASWELL \
3048 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3049 | PTA_FMA | PTA_MOVBE | PTA_RTM | PTA_HLE)
3050 #define PTA_BROADWELL \
3051 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3052 #define PTA_BONNELL \
3053 (PTA_CORE2 | PTA_MOVBE)
3054 #define PTA_SILVERMONT \
3055 (PTA_WESTMERE | PTA_MOVBE)
3056
3057 /* if this reaches 64, need to widen struct pta flags below */
3058
3059 static struct pta
3060 {
3061 const char *const name; /* processor name or nickname. */
3062 const enum processor_type processor;
3063 const enum attr_cpu schedule;
3064 const unsigned HOST_WIDE_INT flags;
3065 }
3066 const processor_alias_table[] =
3067 {
3068 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3069 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3070 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3071 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3072 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3073 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3074 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3075 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3076 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3077 PTA_MMX | PTA_SSE | PTA_FXSR},
3078 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3079 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3080 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3081 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3082 PTA_MMX | PTA_SSE | PTA_FXSR},
3083 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3084 PTA_MMX | PTA_SSE | PTA_FXSR},
3085 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3086 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3087 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3088 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3089 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3090 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3091 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3092 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3093 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3094 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3095 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3096 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3097 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3098 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3099 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3100 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3101 PTA_SANDYBRIDGE},
3102 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3103 PTA_SANDYBRIDGE},
3104 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3105 PTA_IVYBRIDGE},
3106 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3107 PTA_IVYBRIDGE},
3108 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3109 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3110 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3111 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3112 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3113 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3114 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3115 {"intel", PROCESSOR_SILVERMONT, CPU_SLM, PTA_NEHALEM},
3116 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3117 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3118 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3119 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3120 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3121 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3122 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3123 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3124 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3125 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3126 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3127 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3128 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3129 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3130 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3131 {"x86-64", PROCESSOR_K8, CPU_K8,
3132 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3133 {"k8", PROCESSOR_K8, CPU_K8,
3134 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3135 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3136 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3137 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3138 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3139 {"opteron", PROCESSOR_K8, CPU_K8,
3140 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3141 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3142 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3143 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3144 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3145 {"athlon64", PROCESSOR_K8, CPU_K8,
3146 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3147 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3148 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3149 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3150 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3151 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3152 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3153 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3154 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3155 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3156 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3157 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3158 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3159 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3160 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3161 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3162 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3163 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3164 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3165 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3166 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3167 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3168 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3169 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3170 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3171 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3172 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3173 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3174 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3175 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3176 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3177 | PTA_XSAVEOPT | PTA_FSGSBASE},
3178 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3179 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3180 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3181 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3182 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3183 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3184 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE},
3185 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3186 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3187 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3188 | PTA_FXSR | PTA_XSAVE},
3189 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3190 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3191 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3192 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3193 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3194 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3195
3196 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3197 PTA_64BIT
3198 | PTA_HLE /* flags are only used for -march switch. */ },
3199 };
3200
3201 /* -mrecip options. */
3202 static struct
3203 {
3204 const char *string; /* option name */
3205 unsigned int mask; /* mask bits to set */
3206 }
3207 const recip_options[] =
3208 {
3209 { "all", RECIP_MASK_ALL },
3210 { "none", RECIP_MASK_NONE },
3211 { "div", RECIP_MASK_DIV },
3212 { "sqrt", RECIP_MASK_SQRT },
3213 { "vec-div", RECIP_MASK_VEC_DIV },
3214 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3215 };
3216
3217 int const pta_size = ARRAY_SIZE (processor_alias_table);
3218
3219 /* Set up prefix/suffix so the error messages refer to either the command
3220 line argument, or the attribute(target). */
3221 if (main_args_p)
3222 {
3223 prefix = "-m";
3224 suffix = "";
3225 sw = "switch";
3226 }
3227 else
3228 {
3229 prefix = "option(\"";
3230 suffix = "\")";
3231 sw = "attribute";
3232 }
3233
3234 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3235 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3236 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3237 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3238 #ifdef TARGET_BI_ARCH
3239 else
3240 {
3241 #if TARGET_BI_ARCH == 1
3242 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3243 is on and OPTION_MASK_ABI_X32 is off. We turn off
3244 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3245 -mx32. */
3246 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3247 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3248 #else
3249 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3250 on and OPTION_MASK_ABI_64 is off. We turn off
3251 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3252 -m64. */
3253 if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3254 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3255 #endif
3256 }
3257 #endif
3258
3259 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3260 {
3261 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3262 OPTION_MASK_ABI_64 for TARGET_X32. */
3263 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3264 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3265 }
3266 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3267 {
3268 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3269 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3270 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3271 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3272 }
3273
3274 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3275 SUBTARGET_OVERRIDE_OPTIONS;
3276 #endif
3277
3278 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3279 SUBSUBTARGET_OVERRIDE_OPTIONS;
3280 #endif
3281
3282 /* -fPIC is the default for x86_64. */
3283 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3284 opts->x_flag_pic = 2;
3285
3286 /* Need to check -mtune=generic first. */
3287 if (opts->x_ix86_tune_string)
3288 {
3289 /* As special support for cross compilers we read -mtune=native
3290 as -mtune=generic. With native compilers we won't see the
3291 -mtune=native, as it was changed by the driver. */
3292 if (!strcmp (opts->x_ix86_tune_string, "native"))
3293 {
3294 opts->x_ix86_tune_string = "generic";
3295 }
3296 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3297 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3298 "%stune=k8%s or %stune=generic%s instead as appropriate",
3299 prefix, suffix, prefix, suffix, prefix, suffix);
3300 }
3301 else
3302 {
3303 if (opts->x_ix86_arch_string)
3304 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3305 if (!opts->x_ix86_tune_string)
3306 {
3307 opts->x_ix86_tune_string
3308 = processor_target_table[TARGET_CPU_DEFAULT].name;
3309 ix86_tune_defaulted = 1;
3310 }
3311
3312 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3313 or defaulted. We need to use a sensible tune option. */
3314 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3315 {
3316 opts->x_ix86_tune_string = "generic";
3317 }
3318 }
3319
3320 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3321 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3322 {
3323 /* rep; movq isn't available in 32-bit code. */
3324 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3325 opts->x_ix86_stringop_alg = no_stringop;
3326 }
3327
3328 if (!opts->x_ix86_arch_string)
3329 opts->x_ix86_arch_string
3330 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3331 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3332 else
3333 ix86_arch_specified = 1;
3334
3335 if (opts_set->x_ix86_pmode)
3336 {
3337 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3338 && opts->x_ix86_pmode == PMODE_SI)
3339 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3340 && opts->x_ix86_pmode == PMODE_DI))
3341 error ("address mode %qs not supported in the %s bit mode",
3342 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3343 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3344 }
3345 else
3346 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3347 ? PMODE_DI : PMODE_SI;
3348
3349 if (!opts_set->x_ix86_abi)
3350 opts->x_ix86_abi = DEFAULT_ABI;
3351
3352 /* For targets using ms ABI enable ms-extensions, if not
3353 explicit turned off. For non-ms ABI we turn off this
3354 option. */
3355 if (!opts_set->x_flag_ms_extensions)
3356 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3357
3358 if (opts_set->x_ix86_cmodel)
3359 {
3360 switch (opts->x_ix86_cmodel)
3361 {
3362 case CM_SMALL:
3363 case CM_SMALL_PIC:
3364 if (opts->x_flag_pic)
3365 opts->x_ix86_cmodel = CM_SMALL_PIC;
3366 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3367 error ("code model %qs not supported in the %s bit mode",
3368 "small", "32");
3369 break;
3370
3371 case CM_MEDIUM:
3372 case CM_MEDIUM_PIC:
3373 if (opts->x_flag_pic)
3374 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3375 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3376 error ("code model %qs not supported in the %s bit mode",
3377 "medium", "32");
3378 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3379 error ("code model %qs not supported in x32 mode",
3380 "medium");
3381 break;
3382
3383 case CM_LARGE:
3384 case CM_LARGE_PIC:
3385 if (opts->x_flag_pic)
3386 opts->x_ix86_cmodel = CM_LARGE_PIC;
3387 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3388 error ("code model %qs not supported in the %s bit mode",
3389 "large", "32");
3390 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3391 error ("code model %qs not supported in x32 mode",
3392 "large");
3393 break;
3394
3395 case CM_32:
3396 if (opts->x_flag_pic)
3397 error ("code model %s does not support PIC mode", "32");
3398 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3399 error ("code model %qs not supported in the %s bit mode",
3400 "32", "64");
3401 break;
3402
3403 case CM_KERNEL:
3404 if (opts->x_flag_pic)
3405 {
3406 error ("code model %s does not support PIC mode", "kernel");
3407 opts->x_ix86_cmodel = CM_32;
3408 }
3409 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3410 error ("code model %qs not supported in the %s bit mode",
3411 "kernel", "32");
3412 break;
3413
3414 default:
3415 gcc_unreachable ();
3416 }
3417 }
3418 else
3419 {
3420 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3421 use of rip-relative addressing. This eliminates fixups that
3422 would otherwise be needed if this object is to be placed in a
3423 DLL, and is essentially just as efficient as direct addressing. */
3424 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3425 && (TARGET_RDOS || TARGET_PECOFF))
3426 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3427 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3428 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3429 else
3430 opts->x_ix86_cmodel = CM_32;
3431 }
3432 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3433 {
3434 error ("-masm=intel not supported in this configuration");
3435 opts->x_ix86_asm_dialect = ASM_ATT;
3436 }
3437 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3438 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3439 sorry ("%i-bit mode not compiled in",
3440 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3441
3442 for (i = 0; i < pta_size; i++)
3443 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3444 {
3445 ix86_schedule = processor_alias_table[i].schedule;
3446 ix86_arch = processor_alias_table[i].processor;
3447 /* Default cpu tuning to the architecture. */
3448 ix86_tune = ix86_arch;
3449
3450 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3451 && !(processor_alias_table[i].flags & PTA_64BIT))
3452 error ("CPU you selected does not support x86-64 "
3453 "instruction set");
3454
3455 if (processor_alias_table[i].flags & PTA_MMX
3456 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3457 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3458 if (processor_alias_table[i].flags & PTA_3DNOW
3459 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3460 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3461 if (processor_alias_table[i].flags & PTA_3DNOW_A
3462 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3463 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3464 if (processor_alias_table[i].flags & PTA_SSE
3465 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3466 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3467 if (processor_alias_table[i].flags & PTA_SSE2
3468 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3469 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3470 if (processor_alias_table[i].flags & PTA_SSE3
3471 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3472 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3473 if (processor_alias_table[i].flags & PTA_SSSE3
3474 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3475 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3476 if (processor_alias_table[i].flags & PTA_SSE4_1
3477 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3478 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3479 if (processor_alias_table[i].flags & PTA_SSE4_2
3480 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3481 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3482 if (processor_alias_table[i].flags & PTA_AVX
3483 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3484 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3485 if (processor_alias_table[i].flags & PTA_AVX2
3486 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3487 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3488 if (processor_alias_table[i].flags & PTA_FMA
3489 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3490 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3491 if (processor_alias_table[i].flags & PTA_SSE4A
3492 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3493 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3494 if (processor_alias_table[i].flags & PTA_FMA4
3495 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3496 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3497 if (processor_alias_table[i].flags & PTA_XOP
3498 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3499 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3500 if (processor_alias_table[i].flags & PTA_LWP
3501 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3502 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3503 if (processor_alias_table[i].flags & PTA_ABM
3504 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3505 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3506 if (processor_alias_table[i].flags & PTA_BMI
3507 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3508 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3509 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3510 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3511 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3512 if (processor_alias_table[i].flags & PTA_TBM
3513 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3514 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3515 if (processor_alias_table[i].flags & PTA_BMI2
3516 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3517 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3518 if (processor_alias_table[i].flags & PTA_CX16
3519 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3520 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3521 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3522 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3523 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3524 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3525 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3526 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3527 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3528 if (processor_alias_table[i].flags & PTA_MOVBE
3529 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3530 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3531 if (processor_alias_table[i].flags & PTA_AES
3532 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3533 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3534 if (processor_alias_table[i].flags & PTA_SHA
3535 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3536 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3537 if (processor_alias_table[i].flags & PTA_PCLMUL
3538 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3539 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3540 if (processor_alias_table[i].flags & PTA_FSGSBASE
3541 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3542 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3543 if (processor_alias_table[i].flags & PTA_RDRND
3544 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3545 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3546 if (processor_alias_table[i].flags & PTA_F16C
3547 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3548 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3549 if (processor_alias_table[i].flags & PTA_RTM
3550 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3551 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3552 if (processor_alias_table[i].flags & PTA_HLE
3553 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3554 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3555 if (processor_alias_table[i].flags & PTA_PRFCHW
3556 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3557 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3558 if (processor_alias_table[i].flags & PTA_RDSEED
3559 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3560 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3561 if (processor_alias_table[i].flags & PTA_ADX
3562 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3563 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3564 if (processor_alias_table[i].flags & PTA_FXSR
3565 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3566 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3567 if (processor_alias_table[i].flags & PTA_XSAVE
3568 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3569 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3570 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3571 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3572 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3573 if (processor_alias_table[i].flags & PTA_AVX512F
3574 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3575 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3576 if (processor_alias_table[i].flags & PTA_AVX512ER
3577 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3578 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3579 if (processor_alias_table[i].flags & PTA_AVX512PF
3580 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3581 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3582 if (processor_alias_table[i].flags & PTA_AVX512CD
3583 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3584 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3585 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3586 x86_prefetch_sse = true;
3587
3588 break;
3589 }
3590
3591 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3592 error ("generic CPU can be used only for %stune=%s %s",
3593 prefix, suffix, sw);
3594 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3595 error ("intel CPU can be used only for %stune=%s %s",
3596 prefix, suffix, sw);
3597 else if (i == pta_size)
3598 error ("bad value (%s) for %sarch=%s %s",
3599 opts->x_ix86_arch_string, prefix, suffix, sw);
3600
3601 ix86_arch_mask = 1u << ix86_arch;
3602 for (i = 0; i < X86_ARCH_LAST; ++i)
3603 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3604
3605 for (i = 0; i < pta_size; i++)
3606 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3607 {
3608 ix86_schedule = processor_alias_table[i].schedule;
3609 ix86_tune = processor_alias_table[i].processor;
3610 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3611 {
3612 if (!(processor_alias_table[i].flags & PTA_64BIT))
3613 {
3614 if (ix86_tune_defaulted)
3615 {
3616 opts->x_ix86_tune_string = "x86-64";
3617 for (i = 0; i < pta_size; i++)
3618 if (! strcmp (opts->x_ix86_tune_string,
3619 processor_alias_table[i].name))
3620 break;
3621 ix86_schedule = processor_alias_table[i].schedule;
3622 ix86_tune = processor_alias_table[i].processor;
3623 }
3624 else
3625 error ("CPU you selected does not support x86-64 "
3626 "instruction set");
3627 }
3628 }
3629 /* Intel CPUs have always interpreted SSE prefetch instructions as
3630 NOPs; so, we can enable SSE prefetch instructions even when
3631 -mtune (rather than -march) points us to a processor that has them.
3632 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3633 higher processors. */
3634 if (TARGET_CMOV
3635 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3636 x86_prefetch_sse = true;
3637 break;
3638 }
3639
3640 if (ix86_tune_specified && i == pta_size)
3641 error ("bad value (%s) for %stune=%s %s",
3642 opts->x_ix86_tune_string, prefix, suffix, sw);
3643
3644 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3645
3646 #ifndef USE_IX86_FRAME_POINTER
3647 #define USE_IX86_FRAME_POINTER 0
3648 #endif
3649
3650 #ifndef USE_X86_64_FRAME_POINTER
3651 #define USE_X86_64_FRAME_POINTER 0
3652 #endif
3653
3654 /* Set the default values for switches whose default depends on TARGET_64BIT
3655 in case they weren't overwritten by command line options. */
3656 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3657 {
3658 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3659 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3660 if (opts->x_flag_asynchronous_unwind_tables
3661 && !opts_set->x_flag_unwind_tables
3662 && TARGET_64BIT_MS_ABI)
3663 opts->x_flag_unwind_tables = 1;
3664 if (opts->x_flag_asynchronous_unwind_tables == 2)
3665 opts->x_flag_unwind_tables
3666 = opts->x_flag_asynchronous_unwind_tables = 1;
3667 if (opts->x_flag_pcc_struct_return == 2)
3668 opts->x_flag_pcc_struct_return = 0;
3669 }
3670 else
3671 {
3672 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3673 opts->x_flag_omit_frame_pointer
3674 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3675 if (opts->x_flag_asynchronous_unwind_tables == 2)
3676 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3677 if (opts->x_flag_pcc_struct_return == 2)
3678 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3679 }
3680
3681 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3682 if (opts->x_optimize_size)
3683 ix86_cost = &ix86_size_cost;
3684 else
3685 ix86_cost = ix86_tune_cost;
3686
3687 /* Arrange to set up i386_stack_locals for all functions. */
3688 init_machine_status = ix86_init_machine_status;
3689
3690 /* Validate -mregparm= value. */
3691 if (opts_set->x_ix86_regparm)
3692 {
3693 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3694 warning (0, "-mregparm is ignored in 64-bit mode");
3695 if (opts->x_ix86_regparm > REGPARM_MAX)
3696 {
3697 error ("-mregparm=%d is not between 0 and %d",
3698 opts->x_ix86_regparm, REGPARM_MAX);
3699 opts->x_ix86_regparm = 0;
3700 }
3701 }
3702 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3703 opts->x_ix86_regparm = REGPARM_MAX;
3704
3705 /* Default align_* from the processor table. */
3706 if (opts->x_align_loops == 0)
3707 {
3708 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3709 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3710 }
3711 if (opts->x_align_jumps == 0)
3712 {
3713 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3714 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3715 }
3716 if (opts->x_align_functions == 0)
3717 {
3718 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3719 }
3720
3721 /* Provide default for -mbranch-cost= value. */
3722 if (!opts_set->x_ix86_branch_cost)
3723 opts->x_ix86_branch_cost = ix86_cost->branch_cost;
3724
3725 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3726 {
3727 opts->x_target_flags
3728 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3729
3730 /* Enable by default the SSE and MMX builtins. Do allow the user to
3731 explicitly disable any of these. In particular, disabling SSE and
3732 MMX for kernel code is extremely useful. */
3733 if (!ix86_arch_specified)
3734 opts->x_ix86_isa_flags
3735 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3736 | TARGET_SUBTARGET64_ISA_DEFAULT)
3737 & ~opts->x_ix86_isa_flags_explicit);
3738
3739 if (TARGET_RTD_P (opts->x_target_flags))
3740 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3741 }
3742 else
3743 {
3744 opts->x_target_flags
3745 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3746
3747 if (!ix86_arch_specified)
3748 opts->x_ix86_isa_flags
3749 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3750
3751 /* i386 ABI does not specify red zone. It still makes sense to use it
3752 when programmer takes care to stack from being destroyed. */
3753 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3754 opts->x_target_flags |= MASK_NO_RED_ZONE;
3755 }
3756
3757 /* Keep nonleaf frame pointers. */
3758 if (opts->x_flag_omit_frame_pointer)
3759 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3760 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3761 opts->x_flag_omit_frame_pointer = 1;
3762
3763 /* If we're doing fast math, we don't care about comparison order
3764 wrt NaNs. This lets us use a shorter comparison sequence. */
3765 if (opts->x_flag_finite_math_only)
3766 opts->x_target_flags &= ~MASK_IEEE_FP;
3767
3768 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3769 since the insns won't need emulation. */
3770 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3771 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3772
3773 /* Likewise, if the target doesn't have a 387, or we've specified
3774 software floating point, don't use 387 inline intrinsics. */
3775 if (!TARGET_80387_P (opts->x_target_flags))
3776 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3777
3778 /* Turn on MMX builtins for -msse. */
3779 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3780 opts->x_ix86_isa_flags
3781 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
3782
3783 /* Enable SSE prefetch. */
3784 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
3785 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
3786 x86_prefetch_sse = true;
3787
3788 /* Enable prefetch{,w} instructions for -m3dnow. */
3789 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags))
3790 opts->x_ix86_isa_flags
3791 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
3792
3793 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3794 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
3795 || TARGET_ABM_P (opts->x_ix86_isa_flags))
3796 opts->x_ix86_isa_flags
3797 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
3798
3799 /* Enable lzcnt instruction for -mabm. */
3800 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
3801 opts->x_ix86_isa_flags
3802 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
3803
3804 /* Validate -mpreferred-stack-boundary= value or default it to
3805 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3806 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3807 if (opts_set->x_ix86_preferred_stack_boundary_arg)
3808 {
3809 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3810 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
3811 int max = (TARGET_SEH ? 4 : 12);
3812
3813 if (opts->x_ix86_preferred_stack_boundary_arg < min
3814 || opts->x_ix86_preferred_stack_boundary_arg > max)
3815 {
3816 if (min == max)
3817 error ("-mpreferred-stack-boundary is not supported "
3818 "for this target");
3819 else
3820 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3821 opts->x_ix86_preferred_stack_boundary_arg, min, max);
3822 }
3823 else
3824 ix86_preferred_stack_boundary
3825 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3826 }
3827
3828 /* Set the default value for -mstackrealign. */
3829 if (opts->x_ix86_force_align_arg_pointer == -1)
3830 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3831
3832 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3833
3834 /* Validate -mincoming-stack-boundary= value or default it to
3835 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3836 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3837 if (opts_set->x_ix86_incoming_stack_boundary_arg)
3838 {
3839 if (opts->x_ix86_incoming_stack_boundary_arg
3840 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
3841 || opts->x_ix86_incoming_stack_boundary_arg > 12)
3842 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3843 opts->x_ix86_incoming_stack_boundary_arg,
3844 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
3845 else
3846 {
3847 ix86_user_incoming_stack_boundary
3848 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3849 ix86_incoming_stack_boundary
3850 = ix86_user_incoming_stack_boundary;
3851 }
3852 }
3853
3854 /* Accept -msseregparm only if at least SSE support is enabled. */
3855 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
3856 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
3857 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3858
3859 if (opts_set->x_ix86_fpmath)
3860 {
3861 if (opts->x_ix86_fpmath & FPMATH_SSE)
3862 {
3863 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
3864 {
3865 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3866 opts->x_ix86_fpmath = FPMATH_387;
3867 }
3868 else if ((opts->x_ix86_fpmath & FPMATH_387)
3869 && !TARGET_80387_P (opts->x_target_flags))
3870 {
3871 warning (0, "387 instruction set disabled, using SSE arithmetics");
3872 opts->x_ix86_fpmath = FPMATH_SSE;
3873 }
3874 }
3875 }
3876 /* For all chips supporting SSE2, -mfpmath=sse performs better than
3877 fpmath=387. The second is however default at many targets since the
3878 extra 80bit precision of temporaries is considered to be part of ABI.
3879 Overwrite the default at least for -ffast-math.
3880 TODO: -mfpmath=both seems to produce same performing code with bit
3881 smaller binaries. It is however not clear if register allocation is
3882 ready for this setting.
3883 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
3884 codegen. We may switch to 387 with -ffast-math for size optimized
3885 functions. */
3886 else if (fast_math_flags_set_p (&global_options)
3887 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
3888 opts->x_ix86_fpmath = FPMATH_SSE;
3889 else
3890 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
3891
3892 /* If the i387 is disabled, then do not return values in it. */
3893 if (!TARGET_80387_P (opts->x_target_flags))
3894 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
3895
3896 /* Use external vectorized library in vectorizing intrinsics. */
3897 if (opts_set->x_ix86_veclibabi_type)
3898 switch (opts->x_ix86_veclibabi_type)
3899 {
3900 case ix86_veclibabi_type_svml:
3901 ix86_veclib_handler = ix86_veclibabi_svml;
3902 break;
3903
3904 case ix86_veclibabi_type_acml:
3905 ix86_veclib_handler = ix86_veclibabi_acml;
3906 break;
3907
3908 default:
3909 gcc_unreachable ();
3910 }
3911
3912 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
3913 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
3914 && !opts->x_optimize_size)
3915 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3916
3917 /* If stack probes are required, the space used for large function
3918 arguments on the stack must also be probed, so enable
3919 -maccumulate-outgoing-args so this happens in the prologue. */
3920 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
3921 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3922 {
3923 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
3924 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3925 "for correctness", prefix, suffix);
3926 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3927 }
3928
3929 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3930 {
3931 char *p;
3932 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3933 p = strchr (internal_label_prefix, 'X');
3934 internal_label_prefix_len = p - internal_label_prefix;
3935 *p = '\0';
3936 }
3937
3938 /* When scheduling description is not available, disable scheduler pass
3939 so it won't slow down the compilation and make x87 code slower. */
3940 if (!TARGET_SCHEDULE)
3941 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
3942
3943 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3944 ix86_tune_cost->simultaneous_prefetches,
3945 opts->x_param_values,
3946 opts_set->x_param_values);
3947 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3948 ix86_tune_cost->prefetch_block,
3949 opts->x_param_values,
3950 opts_set->x_param_values);
3951 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3952 ix86_tune_cost->l1_cache_size,
3953 opts->x_param_values,
3954 opts_set->x_param_values);
3955 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
3956 ix86_tune_cost->l2_cache_size,
3957 opts->x_param_values,
3958 opts_set->x_param_values);
3959
3960 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3961 if (opts->x_flag_prefetch_loop_arrays < 0
3962 && HAVE_prefetch
3963 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
3964 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
3965 opts->x_flag_prefetch_loop_arrays = 1;
3966
3967 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3968 can be opts->x_optimized to ap = __builtin_next_arg (0). */
3969 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
3970 targetm.expand_builtin_va_start = NULL;
3971
3972 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3973 {
3974 ix86_gen_leave = gen_leave_rex64;
3975 if (Pmode == DImode)
3976 {
3977 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
3978 ix86_gen_tls_local_dynamic_base_64
3979 = gen_tls_local_dynamic_base_64_di;
3980 }
3981 else
3982 {
3983 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
3984 ix86_gen_tls_local_dynamic_base_64
3985 = gen_tls_local_dynamic_base_64_si;
3986 }
3987 }
3988 else
3989 ix86_gen_leave = gen_leave;
3990
3991 if (Pmode == DImode)
3992 {
3993 ix86_gen_add3 = gen_adddi3;
3994 ix86_gen_sub3 = gen_subdi3;
3995 ix86_gen_sub3_carry = gen_subdi3_carry;
3996 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3997 ix86_gen_andsp = gen_anddi3;
3998 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
3999 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4000 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4001 ix86_gen_monitor = gen_sse3_monitor_di;
4002 }
4003 else
4004 {
4005 ix86_gen_add3 = gen_addsi3;
4006 ix86_gen_sub3 = gen_subsi3;
4007 ix86_gen_sub3_carry = gen_subsi3_carry;
4008 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4009 ix86_gen_andsp = gen_andsi3;
4010 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4011 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4012 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4013 ix86_gen_monitor = gen_sse3_monitor_si;
4014 }
4015
4016 #ifdef USE_IX86_CLD
4017 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4018 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4019 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4020 #endif
4021
4022 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4023 {
4024 if (opts->x_flag_fentry > 0)
4025 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4026 "with -fpic");
4027 opts->x_flag_fentry = 0;
4028 }
4029 else if (TARGET_SEH)
4030 {
4031 if (opts->x_flag_fentry == 0)
4032 sorry ("-mno-fentry isn%'t compatible with SEH");
4033 opts->x_flag_fentry = 1;
4034 }
4035 else if (opts->x_flag_fentry < 0)
4036 {
4037 #if defined(PROFILE_BEFORE_PROLOGUE)
4038 opts->x_flag_fentry = 1;
4039 #else
4040 opts->x_flag_fentry = 0;
4041 #endif
4042 }
4043
4044 /* When not opts->x_optimize for size, enable vzeroupper optimization for
4045 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4046 AVX unaligned load/store. */
4047 if (!opts->x_optimize_size)
4048 {
4049 if (flag_expensive_optimizations
4050 && !(opts_set->x_target_flags & MASK_VZEROUPPER))
4051 opts->x_target_flags |= MASK_VZEROUPPER;
4052 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4053 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4054 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4055 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4056 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4057 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4058 /* Enable 128-bit AVX instruction generation
4059 for the auto-vectorizer. */
4060 if (TARGET_AVX128_OPTIMAL
4061 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4062 opts->x_target_flags |= MASK_PREFER_AVX128;
4063 }
4064
4065 if (opts->x_ix86_recip_name)
4066 {
4067 char *p = ASTRDUP (opts->x_ix86_recip_name);
4068 char *q;
4069 unsigned int mask, i;
4070 bool invert;
4071
4072 while ((q = strtok (p, ",")) != NULL)
4073 {
4074 p = NULL;
4075 if (*q == '!')
4076 {
4077 invert = true;
4078 q++;
4079 }
4080 else
4081 invert = false;
4082
4083 if (!strcmp (q, "default"))
4084 mask = RECIP_MASK_ALL;
4085 else
4086 {
4087 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4088 if (!strcmp (q, recip_options[i].string))
4089 {
4090 mask = recip_options[i].mask;
4091 break;
4092 }
4093
4094 if (i == ARRAY_SIZE (recip_options))
4095 {
4096 error ("unknown option for -mrecip=%s", q);
4097 invert = false;
4098 mask = RECIP_MASK_NONE;
4099 }
4100 }
4101
4102 opts->x_recip_mask_explicit |= mask;
4103 if (invert)
4104 opts->x_recip_mask &= ~mask;
4105 else
4106 opts->x_recip_mask |= mask;
4107 }
4108 }
4109
4110 if (TARGET_RECIP_P (opts->x_target_flags))
4111 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4112 else if (opts_set->x_target_flags & MASK_RECIP)
4113 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4114
4115 /* Default long double to 64-bit for Bionic. */
4116 if (TARGET_HAS_BIONIC
4117 && !(opts_set->x_target_flags & MASK_LONG_DOUBLE_64))
4118 opts->x_target_flags |= MASK_LONG_DOUBLE_64;
4119
4120 /* Save the initial options in case the user does function specific
4121 options. */
4122 if (main_args_p)
4123 target_option_default_node = target_option_current_node
4124 = build_target_option_node (opts);
4125
4126 /* Handle stack protector */
4127 if (!opts_set->x_ix86_stack_protector_guard)
4128 opts->x_ix86_stack_protector_guard
4129 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4130
4131 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4132 if (opts->x_ix86_tune_memcpy_strategy)
4133 {
4134 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4135 ix86_parse_stringop_strategy_string (str, false);
4136 free (str);
4137 }
4138
4139 if (opts->x_ix86_tune_memset_strategy)
4140 {
4141 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4142 ix86_parse_stringop_strategy_string (str, true);
4143 free (str);
4144 }
4145 }
4146
4147 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4148
4149 static void
4150 ix86_option_override (void)
4151 {
4152 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4153 static struct register_pass_info insert_vzeroupper_info
4154 = { pass_insert_vzeroupper, "reload",
4155 1, PASS_POS_INSERT_AFTER
4156 };
4157
4158 ix86_option_override_internal (true, &global_options, &global_options_set);
4159
4160
4161 /* This needs to be done at start up. It's convenient to do it here. */
4162 register_pass (&insert_vzeroupper_info);
4163 }
4164
4165 /* Update register usage after having seen the compiler flags. */
4166
4167 static void
4168 ix86_conditional_register_usage (void)
4169 {
4170 int i, c_mask;
4171 unsigned int j;
4172
4173 /* The PIC register, if it exists, is fixed. */
4174 j = PIC_OFFSET_TABLE_REGNUM;
4175 if (j != INVALID_REGNUM)
4176 fixed_regs[j] = call_used_regs[j] = 1;
4177
4178 /* For 32-bit targets, squash the REX registers. */
4179 if (! TARGET_64BIT)
4180 {
4181 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4182 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4183 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4184 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4185 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4186 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4187 }
4188
4189 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4190 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4191 : TARGET_64BIT ? (1 << 2)
4192 : (1 << 1));
4193
4194 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4195
4196 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4197 {
4198 /* Set/reset conditionally defined registers from
4199 CALL_USED_REGISTERS initializer. */
4200 if (call_used_regs[i] > 1)
4201 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4202
4203 /* Calculate registers of CLOBBERED_REGS register set
4204 as call used registers from GENERAL_REGS register set. */
4205 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4206 && call_used_regs[i])
4207 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4208 }
4209
4210 /* If MMX is disabled, squash the registers. */
4211 if (! TARGET_MMX)
4212 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4213 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4214 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4215
4216 /* If SSE is disabled, squash the registers. */
4217 if (! TARGET_SSE)
4218 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4219 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4220 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4221
4222 /* If the FPU is disabled, squash the registers. */
4223 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4224 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4225 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4226 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4227
4228 /* If AVX512F is disabled, squash the registers. */
4229 if (! TARGET_AVX512F)
4230 {
4231 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4232 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4233
4234 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4235 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4236 }
4237 }
4238
4239 \f
4240 /* Save the current options */
4241
4242 static void
4243 ix86_function_specific_save (struct cl_target_option *ptr,
4244 struct gcc_options *opts)
4245 {
4246 ptr->arch = ix86_arch;
4247 ptr->schedule = ix86_schedule;
4248 ptr->tune = ix86_tune;
4249 ptr->branch_cost = ix86_branch_cost;
4250 ptr->tune_defaulted = ix86_tune_defaulted;
4251 ptr->arch_specified = ix86_arch_specified;
4252 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4253 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4254 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4255 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4256 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4257 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4258 ptr->x_ix86_abi = opts->x_ix86_abi;
4259 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4260 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4261 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4262 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4263 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4264 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4265 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4266 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4267 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4268 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4269 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4270 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4271 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4272 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4273 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4274 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4275 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4276 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4277 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4278 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4279
4280 /* The fields are char but the variables are not; make sure the
4281 values fit in the fields. */
4282 gcc_assert (ptr->arch == ix86_arch);
4283 gcc_assert (ptr->schedule == ix86_schedule);
4284 gcc_assert (ptr->tune == ix86_tune);
4285 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4286 }
4287
4288 /* Restore the current options */
4289
4290 static void
4291 ix86_function_specific_restore (struct gcc_options *opts,
4292 struct cl_target_option *ptr)
4293 {
4294 enum processor_type old_tune = ix86_tune;
4295 enum processor_type old_arch = ix86_arch;
4296 unsigned int ix86_arch_mask;
4297 int i;
4298
4299 /* We don't change -fPIC. */
4300 opts->x_flag_pic = flag_pic;
4301
4302 ix86_arch = (enum processor_type) ptr->arch;
4303 ix86_schedule = (enum attr_cpu) ptr->schedule;
4304 ix86_tune = (enum processor_type) ptr->tune;
4305 opts->x_ix86_branch_cost = ptr->branch_cost;
4306 ix86_tune_defaulted = ptr->tune_defaulted;
4307 ix86_arch_specified = ptr->arch_specified;
4308 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4309 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4310 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4311 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4312 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4313 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4314 opts->x_ix86_abi = ptr->x_ix86_abi;
4315 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4316 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4317 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4318 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4319 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4320 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4321 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4322 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4323 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4324 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4325 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4326 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4327 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4328 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4329 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4330 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4331 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4332 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4333 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4334 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4335
4336 /* Recreate the arch feature tests if the arch changed */
4337 if (old_arch != ix86_arch)
4338 {
4339 ix86_arch_mask = 1u << ix86_arch;
4340 for (i = 0; i < X86_ARCH_LAST; ++i)
4341 ix86_arch_features[i]
4342 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4343 }
4344
4345 /* Recreate the tune optimization tests */
4346 if (old_tune != ix86_tune)
4347 set_ix86_tune_features (ix86_tune, false);
4348 }
4349
4350 /* Print the current options */
4351
4352 static void
4353 ix86_function_specific_print (FILE *file, int indent,
4354 struct cl_target_option *ptr)
4355 {
4356 char *target_string
4357 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4358 NULL, NULL, ptr->x_ix86_fpmath, false);
4359
4360 gcc_assert (ptr->arch < PROCESSOR_max);
4361 fprintf (file, "%*sarch = %d (%s)\n",
4362 indent, "",
4363 ptr->arch, processor_target_table[ptr->arch].name);
4364
4365 gcc_assert (ptr->tune < PROCESSOR_max);
4366 fprintf (file, "%*stune = %d (%s)\n",
4367 indent, "",
4368 ptr->tune, processor_target_table[ptr->tune].name);
4369
4370 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4371
4372 if (target_string)
4373 {
4374 fprintf (file, "%*s%s\n", indent, "", target_string);
4375 free (target_string);
4376 }
4377 }
4378
4379 \f
4380 /* Inner function to process the attribute((target(...))), take an argument and
4381 set the current options from the argument. If we have a list, recursively go
4382 over the list. */
4383
4384 static bool
4385 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4386 struct gcc_options *opts,
4387 struct gcc_options *opts_set,
4388 struct gcc_options *enum_opts_set)
4389 {
4390 char *next_optstr;
4391 bool ret = true;
4392
4393 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4394 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4395 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4396 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4397 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4398
4399 enum ix86_opt_type
4400 {
4401 ix86_opt_unknown,
4402 ix86_opt_yes,
4403 ix86_opt_no,
4404 ix86_opt_str,
4405 ix86_opt_enum,
4406 ix86_opt_isa
4407 };
4408
4409 static const struct
4410 {
4411 const char *string;
4412 size_t len;
4413 enum ix86_opt_type type;
4414 int opt;
4415 int mask;
4416 } attrs[] = {
4417 /* isa options */
4418 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4419 IX86_ATTR_ISA ("abm", OPT_mabm),
4420 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4421 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4422 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4423 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4424 IX86_ATTR_ISA ("aes", OPT_maes),
4425 IX86_ATTR_ISA ("sha", OPT_msha),
4426 IX86_ATTR_ISA ("avx", OPT_mavx),
4427 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4428 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4429 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4430 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4431 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4432 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4433 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4434 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4435 IX86_ATTR_ISA ("sse", OPT_msse),
4436 IX86_ATTR_ISA ("sse2", OPT_msse2),
4437 IX86_ATTR_ISA ("sse3", OPT_msse3),
4438 IX86_ATTR_ISA ("sse4", OPT_msse4),
4439 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4440 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4441 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4442 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4443 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4444 IX86_ATTR_ISA ("fma", OPT_mfma),
4445 IX86_ATTR_ISA ("xop", OPT_mxop),
4446 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4447 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4448 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4449 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4450 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4451 IX86_ATTR_ISA ("hle", OPT_mhle),
4452 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4453 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4454 IX86_ATTR_ISA ("adx", OPT_madx),
4455 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4456 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4457 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4458
4459 /* enum options */
4460 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4461
4462 /* string options */
4463 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4464 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4465
4466 /* flag options */
4467 IX86_ATTR_YES ("cld",
4468 OPT_mcld,
4469 MASK_CLD),
4470
4471 IX86_ATTR_NO ("fancy-math-387",
4472 OPT_mfancy_math_387,
4473 MASK_NO_FANCY_MATH_387),
4474
4475 IX86_ATTR_YES ("ieee-fp",
4476 OPT_mieee_fp,
4477 MASK_IEEE_FP),
4478
4479 IX86_ATTR_YES ("inline-all-stringops",
4480 OPT_minline_all_stringops,
4481 MASK_INLINE_ALL_STRINGOPS),
4482
4483 IX86_ATTR_YES ("inline-stringops-dynamically",
4484 OPT_minline_stringops_dynamically,
4485 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4486
4487 IX86_ATTR_NO ("align-stringops",
4488 OPT_mno_align_stringops,
4489 MASK_NO_ALIGN_STRINGOPS),
4490
4491 IX86_ATTR_YES ("recip",
4492 OPT_mrecip,
4493 MASK_RECIP),
4494
4495 };
4496
4497 /* If this is a list, recurse to get the options. */
4498 if (TREE_CODE (args) == TREE_LIST)
4499 {
4500 bool ret = true;
4501
4502 for (; args; args = TREE_CHAIN (args))
4503 if (TREE_VALUE (args)
4504 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4505 p_strings, opts, opts_set,
4506 enum_opts_set))
4507 ret = false;
4508
4509 return ret;
4510 }
4511
4512 else if (TREE_CODE (args) != STRING_CST)
4513 {
4514 error ("attribute %<target%> argument not a string");
4515 return false;
4516 }
4517
4518 /* Handle multiple arguments separated by commas. */
4519 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4520
4521 while (next_optstr && *next_optstr != '\0')
4522 {
4523 char *p = next_optstr;
4524 char *orig_p = p;
4525 char *comma = strchr (next_optstr, ',');
4526 const char *opt_string;
4527 size_t len, opt_len;
4528 int opt;
4529 bool opt_set_p;
4530 char ch;
4531 unsigned i;
4532 enum ix86_opt_type type = ix86_opt_unknown;
4533 int mask = 0;
4534
4535 if (comma)
4536 {
4537 *comma = '\0';
4538 len = comma - next_optstr;
4539 next_optstr = comma + 1;
4540 }
4541 else
4542 {
4543 len = strlen (p);
4544 next_optstr = NULL;
4545 }
4546
4547 /* Recognize no-xxx. */
4548 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4549 {
4550 opt_set_p = false;
4551 p += 3;
4552 len -= 3;
4553 }
4554 else
4555 opt_set_p = true;
4556
4557 /* Find the option. */
4558 ch = *p;
4559 opt = N_OPTS;
4560 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4561 {
4562 type = attrs[i].type;
4563 opt_len = attrs[i].len;
4564 if (ch == attrs[i].string[0]
4565 && ((type != ix86_opt_str && type != ix86_opt_enum)
4566 ? len == opt_len
4567 : len > opt_len)
4568 && memcmp (p, attrs[i].string, opt_len) == 0)
4569 {
4570 opt = attrs[i].opt;
4571 mask = attrs[i].mask;
4572 opt_string = attrs[i].string;
4573 break;
4574 }
4575 }
4576
4577 /* Process the option. */
4578 if (opt == N_OPTS)
4579 {
4580 error ("attribute(target(\"%s\")) is unknown", orig_p);
4581 ret = false;
4582 }
4583
4584 else if (type == ix86_opt_isa)
4585 {
4586 struct cl_decoded_option decoded;
4587
4588 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4589 ix86_handle_option (opts, opts_set,
4590 &decoded, input_location);
4591 }
4592
4593 else if (type == ix86_opt_yes || type == ix86_opt_no)
4594 {
4595 if (type == ix86_opt_no)
4596 opt_set_p = !opt_set_p;
4597
4598 if (opt_set_p)
4599 opts->x_target_flags |= mask;
4600 else
4601 opts->x_target_flags &= ~mask;
4602 }
4603
4604 else if (type == ix86_opt_str)
4605 {
4606 if (p_strings[opt])
4607 {
4608 error ("option(\"%s\") was already specified", opt_string);
4609 ret = false;
4610 }
4611 else
4612 p_strings[opt] = xstrdup (p + opt_len);
4613 }
4614
4615 else if (type == ix86_opt_enum)
4616 {
4617 bool arg_ok;
4618 int value;
4619
4620 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4621 if (arg_ok)
4622 set_option (opts, enum_opts_set, opt, value,
4623 p + opt_len, DK_UNSPECIFIED, input_location,
4624 global_dc);
4625 else
4626 {
4627 error ("attribute(target(\"%s\")) is unknown", orig_p);
4628 ret = false;
4629 }
4630 }
4631
4632 else
4633 gcc_unreachable ();
4634 }
4635
4636 return ret;
4637 }
4638
4639 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4640
4641 tree
4642 ix86_valid_target_attribute_tree (tree args,
4643 struct gcc_options *opts,
4644 struct gcc_options *opts_set)
4645 {
4646 const char *orig_arch_string = opts->x_ix86_arch_string;
4647 const char *orig_tune_string = opts->x_ix86_tune_string;
4648 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4649 int orig_tune_defaulted = ix86_tune_defaulted;
4650 int orig_arch_specified = ix86_arch_specified;
4651 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4652 tree t = NULL_TREE;
4653 int i;
4654 struct cl_target_option *def
4655 = TREE_TARGET_OPTION (target_option_default_node);
4656 struct gcc_options enum_opts_set;
4657
4658 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4659
4660 /* Process each of the options on the chain. */
4661 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4662 opts_set, &enum_opts_set))
4663 return error_mark_node;
4664
4665 /* If the changed options are different from the default, rerun
4666 ix86_option_override_internal, and then save the options away.
4667 The string options are are attribute options, and will be undone
4668 when we copy the save structure. */
4669 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4670 || opts->x_target_flags != def->x_target_flags
4671 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4672 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4673 || enum_opts_set.x_ix86_fpmath)
4674 {
4675 /* If we are using the default tune= or arch=, undo the string assigned,
4676 and use the default. */
4677 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4678 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4679 else if (!orig_arch_specified)
4680 opts->x_ix86_arch_string = NULL;
4681
4682 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4683 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4684 else if (orig_tune_defaulted)
4685 opts->x_ix86_tune_string = NULL;
4686
4687 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4688 if (enum_opts_set.x_ix86_fpmath)
4689 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4690 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4691 && TARGET_SSE_P (opts->x_ix86_isa_flags))
4692 {
4693 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4694 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4695 }
4696
4697 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4698 ix86_option_override_internal (false, opts, opts_set);
4699
4700 /* Add any builtin functions with the new isa if any. */
4701 ix86_add_new_builtins (opts->x_ix86_isa_flags);
4702
4703 /* Save the current options unless we are validating options for
4704 #pragma. */
4705 t = build_target_option_node (opts);
4706
4707 opts->x_ix86_arch_string = orig_arch_string;
4708 opts->x_ix86_tune_string = orig_tune_string;
4709 opts_set->x_ix86_fpmath = orig_fpmath_set;
4710
4711 /* Free up memory allocated to hold the strings */
4712 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4713 free (option_strings[i]);
4714 }
4715
4716 return t;
4717 }
4718
4719 /* Hook to validate attribute((target("string"))). */
4720
4721 static bool
4722 ix86_valid_target_attribute_p (tree fndecl,
4723 tree ARG_UNUSED (name),
4724 tree args,
4725 int ARG_UNUSED (flags))
4726 {
4727 struct gcc_options func_options;
4728 tree new_target, new_optimize;
4729 bool ret = true;
4730
4731 /* attribute((target("default"))) does nothing, beyond
4732 affecting multi-versioning. */
4733 if (TREE_VALUE (args)
4734 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
4735 && TREE_CHAIN (args) == NULL_TREE
4736 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
4737 return true;
4738
4739 tree old_optimize = build_optimization_node (&global_options);
4740
4741 /* Get the optimization options of the current function. */
4742 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4743
4744 if (!func_optimize)
4745 func_optimize = old_optimize;
4746
4747 /* Init func_options. */
4748 memset (&func_options, 0, sizeof (func_options));
4749 init_options_struct (&func_options, NULL);
4750 lang_hooks.init_options_struct (&func_options);
4751
4752 cl_optimization_restore (&func_options,
4753 TREE_OPTIMIZATION (func_optimize));
4754
4755 /* Initialize func_options to the default before its target options can
4756 be set. */
4757 cl_target_option_restore (&func_options,
4758 TREE_TARGET_OPTION (target_option_default_node));
4759
4760 new_target = ix86_valid_target_attribute_tree (args, &func_options,
4761 &global_options_set);
4762
4763 new_optimize = build_optimization_node (&func_options);
4764
4765 if (new_target == error_mark_node)
4766 ret = false;
4767
4768 else if (fndecl && new_target)
4769 {
4770 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4771
4772 if (old_optimize != new_optimize)
4773 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4774 }
4775
4776 return ret;
4777 }
4778
4779 \f
4780 /* Hook to determine if one function can safely inline another. */
4781
4782 static bool
4783 ix86_can_inline_p (tree caller, tree callee)
4784 {
4785 bool ret = false;
4786 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4787 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4788
4789 /* If callee has no option attributes, then it is ok to inline. */
4790 if (!callee_tree)
4791 ret = true;
4792
4793 /* If caller has no option attributes, but callee does then it is not ok to
4794 inline. */
4795 else if (!caller_tree)
4796 ret = false;
4797
4798 else
4799 {
4800 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4801 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4802
4803 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4804 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4805 function. */
4806 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4807 != callee_opts->x_ix86_isa_flags)
4808 ret = false;
4809
4810 /* See if we have the same non-isa options. */
4811 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4812 ret = false;
4813
4814 /* See if arch, tune, etc. are the same. */
4815 else if (caller_opts->arch != callee_opts->arch)
4816 ret = false;
4817
4818 else if (caller_opts->tune != callee_opts->tune)
4819 ret = false;
4820
4821 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4822 ret = false;
4823
4824 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4825 ret = false;
4826
4827 else
4828 ret = true;
4829 }
4830
4831 return ret;
4832 }
4833
4834 \f
4835 /* Remember the last target of ix86_set_current_function. */
4836 static GTY(()) tree ix86_previous_fndecl;
4837
4838 /* Invalidate ix86_previous_fndecl cache. */
4839 void
4840 ix86_reset_previous_fndecl (void)
4841 {
4842 ix86_previous_fndecl = NULL_TREE;
4843 }
4844
4845 /* Establish appropriate back-end context for processing the function
4846 FNDECL. The argument might be NULL to indicate processing at top
4847 level, outside of any function scope. */
4848 static void
4849 ix86_set_current_function (tree fndecl)
4850 {
4851 /* Only change the context if the function changes. This hook is called
4852 several times in the course of compiling a function, and we don't want to
4853 slow things down too much or call target_reinit when it isn't safe. */
4854 if (fndecl && fndecl != ix86_previous_fndecl)
4855 {
4856 tree old_tree = (ix86_previous_fndecl
4857 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4858 : NULL_TREE);
4859
4860 tree new_tree = (fndecl
4861 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4862 : NULL_TREE);
4863
4864 ix86_previous_fndecl = fndecl;
4865 if (old_tree == new_tree)
4866 ;
4867
4868 else if (new_tree)
4869 {
4870 cl_target_option_restore (&global_options,
4871 TREE_TARGET_OPTION (new_tree));
4872 if (TREE_TARGET_GLOBALS (new_tree))
4873 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
4874 else
4875 TREE_TARGET_GLOBALS (new_tree)
4876 = save_target_globals_default_opts ();
4877 }
4878
4879 else if (old_tree)
4880 {
4881 new_tree = target_option_current_node;
4882 cl_target_option_restore (&global_options,
4883 TREE_TARGET_OPTION (new_tree));
4884 if (TREE_TARGET_GLOBALS (new_tree))
4885 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
4886 else if (new_tree == target_option_default_node)
4887 restore_target_globals (&default_target_globals);
4888 else
4889 TREE_TARGET_GLOBALS (new_tree)
4890 = save_target_globals_default_opts ();
4891 }
4892 }
4893 }
4894
4895 \f
4896 /* Return true if this goes in large data/bss. */
4897
4898 static bool
4899 ix86_in_large_data_p (tree exp)
4900 {
4901 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4902 return false;
4903
4904 /* Functions are never large data. */
4905 if (TREE_CODE (exp) == FUNCTION_DECL)
4906 return false;
4907
4908 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4909 {
4910 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4911 if (strcmp (section, ".ldata") == 0
4912 || strcmp (section, ".lbss") == 0)
4913 return true;
4914 return false;
4915 }
4916 else
4917 {
4918 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4919
4920 /* If this is an incomplete type with size 0, then we can't put it
4921 in data because it might be too big when completed. */
4922 if (!size || size > ix86_section_threshold)
4923 return true;
4924 }
4925
4926 return false;
4927 }
4928
4929 /* Switch to the appropriate section for output of DECL.
4930 DECL is either a `VAR_DECL' node or a constant of some sort.
4931 RELOC indicates whether forming the initial value of DECL requires
4932 link-time relocations. */
4933
4934 ATTRIBUTE_UNUSED static section *
4935 x86_64_elf_select_section (tree decl, int reloc,
4936 unsigned HOST_WIDE_INT align)
4937 {
4938 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4939 && ix86_in_large_data_p (decl))
4940 {
4941 const char *sname = NULL;
4942 unsigned int flags = SECTION_WRITE;
4943 switch (categorize_decl_for_section (decl, reloc))
4944 {
4945 case SECCAT_DATA:
4946 sname = ".ldata";
4947 break;
4948 case SECCAT_DATA_REL:
4949 sname = ".ldata.rel";
4950 break;
4951 case SECCAT_DATA_REL_LOCAL:
4952 sname = ".ldata.rel.local";
4953 break;
4954 case SECCAT_DATA_REL_RO:
4955 sname = ".ldata.rel.ro";
4956 break;
4957 case SECCAT_DATA_REL_RO_LOCAL:
4958 sname = ".ldata.rel.ro.local";
4959 break;
4960 case SECCAT_BSS:
4961 sname = ".lbss";
4962 flags |= SECTION_BSS;
4963 break;
4964 case SECCAT_RODATA:
4965 case SECCAT_RODATA_MERGE_STR:
4966 case SECCAT_RODATA_MERGE_STR_INIT:
4967 case SECCAT_RODATA_MERGE_CONST:
4968 sname = ".lrodata";
4969 flags = 0;
4970 break;
4971 case SECCAT_SRODATA:
4972 case SECCAT_SDATA:
4973 case SECCAT_SBSS:
4974 gcc_unreachable ();
4975 case SECCAT_TEXT:
4976 case SECCAT_TDATA:
4977 case SECCAT_TBSS:
4978 /* We don't split these for medium model. Place them into
4979 default sections and hope for best. */
4980 break;
4981 }
4982 if (sname)
4983 {
4984 /* We might get called with string constants, but get_named_section
4985 doesn't like them as they are not DECLs. Also, we need to set
4986 flags in that case. */
4987 if (!DECL_P (decl))
4988 return get_section (sname, flags, NULL);
4989 return get_named_section (decl, sname, reloc);
4990 }
4991 }
4992 return default_elf_select_section (decl, reloc, align);
4993 }
4994
4995 /* Select a set of attributes for section NAME based on the properties
4996 of DECL and whether or not RELOC indicates that DECL's initializer
4997 might contain runtime relocations. */
4998
4999 static unsigned int ATTRIBUTE_UNUSED
5000 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5001 {
5002 unsigned int flags = default_section_type_flags (decl, name, reloc);
5003
5004 if (decl == NULL_TREE
5005 && (strcmp (name, ".ldata.rel.ro") == 0
5006 || strcmp (name, ".ldata.rel.ro.local") == 0))
5007 flags |= SECTION_RELRO;
5008
5009 if (strcmp (name, ".lbss") == 0
5010 || strncmp (name, ".lbss.", 5) == 0
5011 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5012 flags |= SECTION_BSS;
5013
5014 return flags;
5015 }
5016
5017 /* Build up a unique section name, expressed as a
5018 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5019 RELOC indicates whether the initial value of EXP requires
5020 link-time relocations. */
5021
5022 static void ATTRIBUTE_UNUSED
5023 x86_64_elf_unique_section (tree decl, int reloc)
5024 {
5025 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5026 && ix86_in_large_data_p (decl))
5027 {
5028 const char *prefix = NULL;
5029 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5030 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
5031
5032 switch (categorize_decl_for_section (decl, reloc))
5033 {
5034 case SECCAT_DATA:
5035 case SECCAT_DATA_REL:
5036 case SECCAT_DATA_REL_LOCAL:
5037 case SECCAT_DATA_REL_RO:
5038 case SECCAT_DATA_REL_RO_LOCAL:
5039 prefix = one_only ? ".ld" : ".ldata";
5040 break;
5041 case SECCAT_BSS:
5042 prefix = one_only ? ".lb" : ".lbss";
5043 break;
5044 case SECCAT_RODATA:
5045 case SECCAT_RODATA_MERGE_STR:
5046 case SECCAT_RODATA_MERGE_STR_INIT:
5047 case SECCAT_RODATA_MERGE_CONST:
5048 prefix = one_only ? ".lr" : ".lrodata";
5049 break;
5050 case SECCAT_SRODATA:
5051 case SECCAT_SDATA:
5052 case SECCAT_SBSS:
5053 gcc_unreachable ();
5054 case SECCAT_TEXT:
5055 case SECCAT_TDATA:
5056 case SECCAT_TBSS:
5057 /* We don't split these for medium model. Place them into
5058 default sections and hope for best. */
5059 break;
5060 }
5061 if (prefix)
5062 {
5063 const char *name, *linkonce;
5064 char *string;
5065
5066 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5067 name = targetm.strip_name_encoding (name);
5068
5069 /* If we're using one_only, then there needs to be a .gnu.linkonce
5070 prefix to the section name. */
5071 linkonce = one_only ? ".gnu.linkonce" : "";
5072
5073 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5074
5075 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
5076 return;
5077 }
5078 }
5079 default_unique_section (decl, reloc);
5080 }
5081
5082 #ifdef COMMON_ASM_OP
5083 /* This says how to output assembler code to declare an
5084 uninitialized external linkage data object.
5085
5086 For medium model x86-64 we need to use .largecomm opcode for
5087 large objects. */
5088 void
5089 x86_elf_aligned_common (FILE *file,
5090 const char *name, unsigned HOST_WIDE_INT size,
5091 int align)
5092 {
5093 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5094 && size > (unsigned int)ix86_section_threshold)
5095 fputs (".largecomm\t", file);
5096 else
5097 fputs (COMMON_ASM_OP, file);
5098 assemble_name (file, name);
5099 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5100 size, align / BITS_PER_UNIT);
5101 }
5102 #endif
5103
5104 /* Utility function for targets to use in implementing
5105 ASM_OUTPUT_ALIGNED_BSS. */
5106
5107 void
5108 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
5109 const char *name, unsigned HOST_WIDE_INT size,
5110 int align)
5111 {
5112 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5113 && size > (unsigned int)ix86_section_threshold)
5114 switch_to_section (get_named_section (decl, ".lbss", 0));
5115 else
5116 switch_to_section (bss_section);
5117 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5118 #ifdef ASM_DECLARE_OBJECT_NAME
5119 last_assemble_variable_decl = decl;
5120 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5121 #else
5122 /* Standard thing is just output label for the object. */
5123 ASM_OUTPUT_LABEL (file, name);
5124 #endif /* ASM_DECLARE_OBJECT_NAME */
5125 ASM_OUTPUT_SKIP (file, size ? size : 1);
5126 }
5127 \f
5128 /* Decide whether we must probe the stack before any space allocation
5129 on this target. It's essentially TARGET_STACK_PROBE except when
5130 -fstack-check causes the stack to be already probed differently. */
5131
5132 bool
5133 ix86_target_stack_probe (void)
5134 {
5135 /* Do not probe the stack twice if static stack checking is enabled. */
5136 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5137 return false;
5138
5139 return TARGET_STACK_PROBE;
5140 }
5141 \f
5142 /* Decide whether we can make a sibling call to a function. DECL is the
5143 declaration of the function being targeted by the call and EXP is the
5144 CALL_EXPR representing the call. */
5145
5146 static bool
5147 ix86_function_ok_for_sibcall (tree decl, tree exp)
5148 {
5149 tree type, decl_or_type;
5150 rtx a, b;
5151
5152 /* If we are generating position-independent code, we cannot sibcall
5153 optimize any indirect call, or a direct call to a global function,
5154 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5155 if (!TARGET_MACHO
5156 && !TARGET_64BIT
5157 && flag_pic
5158 && (!decl || !targetm.binds_local_p (decl)))
5159 return false;
5160
5161 /* If we need to align the outgoing stack, then sibcalling would
5162 unalign the stack, which may break the called function. */
5163 if (ix86_minimum_incoming_stack_boundary (true)
5164 < PREFERRED_STACK_BOUNDARY)
5165 return false;
5166
5167 if (decl)
5168 {
5169 decl_or_type = decl;
5170 type = TREE_TYPE (decl);
5171 }
5172 else
5173 {
5174 /* We're looking at the CALL_EXPR, we need the type of the function. */
5175 type = CALL_EXPR_FN (exp); /* pointer expression */
5176 type = TREE_TYPE (type); /* pointer type */
5177 type = TREE_TYPE (type); /* function type */
5178 decl_or_type = type;
5179 }
5180
5181 /* Check that the return value locations are the same. Like
5182 if we are returning floats on the 80387 register stack, we cannot
5183 make a sibcall from a function that doesn't return a float to a
5184 function that does or, conversely, from a function that does return
5185 a float to a function that doesn't; the necessary stack adjustment
5186 would not be executed. This is also the place we notice
5187 differences in the return value ABI. Note that it is ok for one
5188 of the functions to have void return type as long as the return
5189 value of the other is passed in a register. */
5190 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5191 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5192 cfun->decl, false);
5193 if (STACK_REG_P (a) || STACK_REG_P (b))
5194 {
5195 if (!rtx_equal_p (a, b))
5196 return false;
5197 }
5198 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5199 ;
5200 else if (!rtx_equal_p (a, b))
5201 return false;
5202
5203 if (TARGET_64BIT)
5204 {
5205 /* The SYSV ABI has more call-clobbered registers;
5206 disallow sibcalls from MS to SYSV. */
5207 if (cfun->machine->call_abi == MS_ABI
5208 && ix86_function_type_abi (type) == SYSV_ABI)
5209 return false;
5210 }
5211 else
5212 {
5213 /* If this call is indirect, we'll need to be able to use a
5214 call-clobbered register for the address of the target function.
5215 Make sure that all such registers are not used for passing
5216 parameters. Note that DLLIMPORT functions are indirect. */
5217 if (!decl
5218 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5219 {
5220 if (ix86_function_regparm (type, NULL) >= 3)
5221 {
5222 /* ??? Need to count the actual number of registers to be used,
5223 not the possible number of registers. Fix later. */
5224 return false;
5225 }
5226 }
5227 }
5228
5229 /* Otherwise okay. That also includes certain types of indirect calls. */
5230 return true;
5231 }
5232
5233 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5234 and "sseregparm" calling convention attributes;
5235 arguments as in struct attribute_spec.handler. */
5236
5237 static tree
5238 ix86_handle_cconv_attribute (tree *node, tree name,
5239 tree args,
5240 int flags ATTRIBUTE_UNUSED,
5241 bool *no_add_attrs)
5242 {
5243 if (TREE_CODE (*node) != FUNCTION_TYPE
5244 && TREE_CODE (*node) != METHOD_TYPE
5245 && TREE_CODE (*node) != FIELD_DECL
5246 && TREE_CODE (*node) != TYPE_DECL)
5247 {
5248 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5249 name);
5250 *no_add_attrs = true;
5251 return NULL_TREE;
5252 }
5253
5254 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5255 if (is_attribute_p ("regparm", name))
5256 {
5257 tree cst;
5258
5259 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5260 {
5261 error ("fastcall and regparm attributes are not compatible");
5262 }
5263
5264 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5265 {
5266 error ("regparam and thiscall attributes are not compatible");
5267 }
5268
5269 cst = TREE_VALUE (args);
5270 if (TREE_CODE (cst) != INTEGER_CST)
5271 {
5272 warning (OPT_Wattributes,
5273 "%qE attribute requires an integer constant argument",
5274 name);
5275 *no_add_attrs = true;
5276 }
5277 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5278 {
5279 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5280 name, REGPARM_MAX);
5281 *no_add_attrs = true;
5282 }
5283
5284 return NULL_TREE;
5285 }
5286
5287 if (TARGET_64BIT)
5288 {
5289 /* Do not warn when emulating the MS ABI. */
5290 if ((TREE_CODE (*node) != FUNCTION_TYPE
5291 && TREE_CODE (*node) != METHOD_TYPE)
5292 || ix86_function_type_abi (*node) != MS_ABI)
5293 warning (OPT_Wattributes, "%qE attribute ignored",
5294 name);
5295 *no_add_attrs = true;
5296 return NULL_TREE;
5297 }
5298
5299 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5300 if (is_attribute_p ("fastcall", name))
5301 {
5302 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5303 {
5304 error ("fastcall and cdecl attributes are not compatible");
5305 }
5306 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5307 {
5308 error ("fastcall and stdcall attributes are not compatible");
5309 }
5310 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5311 {
5312 error ("fastcall and regparm attributes are not compatible");
5313 }
5314 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5315 {
5316 error ("fastcall and thiscall attributes are not compatible");
5317 }
5318 }
5319
5320 /* Can combine stdcall with fastcall (redundant), regparm and
5321 sseregparm. */
5322 else if (is_attribute_p ("stdcall", name))
5323 {
5324 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5325 {
5326 error ("stdcall and cdecl attributes are not compatible");
5327 }
5328 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5329 {
5330 error ("stdcall and fastcall attributes are not compatible");
5331 }
5332 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5333 {
5334 error ("stdcall and thiscall attributes are not compatible");
5335 }
5336 }
5337
5338 /* Can combine cdecl with regparm and sseregparm. */
5339 else if (is_attribute_p ("cdecl", name))
5340 {
5341 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5342 {
5343 error ("stdcall and cdecl attributes are not compatible");
5344 }
5345 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5346 {
5347 error ("fastcall and cdecl attributes are not compatible");
5348 }
5349 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5350 {
5351 error ("cdecl and thiscall attributes are not compatible");
5352 }
5353 }
5354 else if (is_attribute_p ("thiscall", name))
5355 {
5356 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5357 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5358 name);
5359 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5360 {
5361 error ("stdcall and thiscall attributes are not compatible");
5362 }
5363 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5364 {
5365 error ("fastcall and thiscall attributes are not compatible");
5366 }
5367 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5368 {
5369 error ("cdecl and thiscall attributes are not compatible");
5370 }
5371 }
5372
5373 /* Can combine sseregparm with all attributes. */
5374
5375 return NULL_TREE;
5376 }
5377
5378 /* The transactional memory builtins are implicitly regparm or fastcall
5379 depending on the ABI. Override the generic do-nothing attribute that
5380 these builtins were declared with, and replace it with one of the two
5381 attributes that we expect elsewhere. */
5382
5383 static tree
5384 ix86_handle_tm_regparm_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
5385 tree args ATTRIBUTE_UNUSED,
5386 int flags, bool *no_add_attrs)
5387 {
5388 tree alt;
5389
5390 /* In no case do we want to add the placeholder attribute. */
5391 *no_add_attrs = true;
5392
5393 /* The 64-bit ABI is unchanged for transactional memory. */
5394 if (TARGET_64BIT)
5395 return NULL_TREE;
5396
5397 /* ??? Is there a better way to validate 32-bit windows? We have
5398 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5399 if (CHECK_STACK_LIMIT > 0)
5400 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5401 else
5402 {
5403 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5404 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5405 }
5406 decl_attributes (node, alt, flags);
5407
5408 return NULL_TREE;
5409 }
5410
5411 /* This function determines from TYPE the calling-convention. */
5412
5413 unsigned int
5414 ix86_get_callcvt (const_tree type)
5415 {
5416 unsigned int ret = 0;
5417 bool is_stdarg;
5418 tree attrs;
5419
5420 if (TARGET_64BIT)
5421 return IX86_CALLCVT_CDECL;
5422
5423 attrs = TYPE_ATTRIBUTES (type);
5424 if (attrs != NULL_TREE)
5425 {
5426 if (lookup_attribute ("cdecl", attrs))
5427 ret |= IX86_CALLCVT_CDECL;
5428 else if (lookup_attribute ("stdcall", attrs))
5429 ret |= IX86_CALLCVT_STDCALL;
5430 else if (lookup_attribute ("fastcall", attrs))
5431 ret |= IX86_CALLCVT_FASTCALL;
5432 else if (lookup_attribute ("thiscall", attrs))
5433 ret |= IX86_CALLCVT_THISCALL;
5434
5435 /* Regparam isn't allowed for thiscall and fastcall. */
5436 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5437 {
5438 if (lookup_attribute ("regparm", attrs))
5439 ret |= IX86_CALLCVT_REGPARM;
5440 if (lookup_attribute ("sseregparm", attrs))
5441 ret |= IX86_CALLCVT_SSEREGPARM;
5442 }
5443
5444 if (IX86_BASE_CALLCVT(ret) != 0)
5445 return ret;
5446 }
5447
5448 is_stdarg = stdarg_p (type);
5449 if (TARGET_RTD && !is_stdarg)
5450 return IX86_CALLCVT_STDCALL | ret;
5451
5452 if (ret != 0
5453 || is_stdarg
5454 || TREE_CODE (type) != METHOD_TYPE
5455 || ix86_function_type_abi (type) != MS_ABI)
5456 return IX86_CALLCVT_CDECL | ret;
5457
5458 return IX86_CALLCVT_THISCALL;
5459 }
5460
5461 /* Return 0 if the attributes for two types are incompatible, 1 if they
5462 are compatible, and 2 if they are nearly compatible (which causes a
5463 warning to be generated). */
5464
5465 static int
5466 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5467 {
5468 unsigned int ccvt1, ccvt2;
5469
5470 if (TREE_CODE (type1) != FUNCTION_TYPE
5471 && TREE_CODE (type1) != METHOD_TYPE)
5472 return 1;
5473
5474 ccvt1 = ix86_get_callcvt (type1);
5475 ccvt2 = ix86_get_callcvt (type2);
5476 if (ccvt1 != ccvt2)
5477 return 0;
5478 if (ix86_function_regparm (type1, NULL)
5479 != ix86_function_regparm (type2, NULL))
5480 return 0;
5481
5482 return 1;
5483 }
5484 \f
5485 /* Return the regparm value for a function with the indicated TYPE and DECL.
5486 DECL may be NULL when calling function indirectly
5487 or considering a libcall. */
5488
5489 static int
5490 ix86_function_regparm (const_tree type, const_tree decl)
5491 {
5492 tree attr;
5493 int regparm;
5494 unsigned int ccvt;
5495
5496 if (TARGET_64BIT)
5497 return (ix86_function_type_abi (type) == SYSV_ABI
5498 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5499 ccvt = ix86_get_callcvt (type);
5500 regparm = ix86_regparm;
5501
5502 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5503 {
5504 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5505 if (attr)
5506 {
5507 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5508 return regparm;
5509 }
5510 }
5511 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5512 return 2;
5513 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5514 return 1;
5515
5516 /* Use register calling convention for local functions when possible. */
5517 if (decl
5518 && TREE_CODE (decl) == FUNCTION_DECL
5519 && optimize
5520 && !(profile_flag && !flag_fentry))
5521 {
5522 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5523 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5524 if (i && i->local && i->can_change_signature)
5525 {
5526 int local_regparm, globals = 0, regno;
5527
5528 /* Make sure no regparm register is taken by a
5529 fixed register variable. */
5530 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5531 if (fixed_regs[local_regparm])
5532 break;
5533
5534 /* We don't want to use regparm(3) for nested functions as
5535 these use a static chain pointer in the third argument. */
5536 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5537 local_regparm = 2;
5538
5539 /* In 32-bit mode save a register for the split stack. */
5540 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5541 local_regparm = 2;
5542
5543 /* Each fixed register usage increases register pressure,
5544 so less registers should be used for argument passing.
5545 This functionality can be overriden by an explicit
5546 regparm value. */
5547 for (regno = AX_REG; regno <= DI_REG; regno++)
5548 if (fixed_regs[regno])
5549 globals++;
5550
5551 local_regparm
5552 = globals < local_regparm ? local_regparm - globals : 0;
5553
5554 if (local_regparm > regparm)
5555 regparm = local_regparm;
5556 }
5557 }
5558
5559 return regparm;
5560 }
5561
5562 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5563 DFmode (2) arguments in SSE registers for a function with the
5564 indicated TYPE and DECL. DECL may be NULL when calling function
5565 indirectly or considering a libcall. Otherwise return 0. */
5566
5567 static int
5568 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5569 {
5570 gcc_assert (!TARGET_64BIT);
5571
5572 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5573 by the sseregparm attribute. */
5574 if (TARGET_SSEREGPARM
5575 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5576 {
5577 if (!TARGET_SSE)
5578 {
5579 if (warn)
5580 {
5581 if (decl)
5582 error ("calling %qD with attribute sseregparm without "
5583 "SSE/SSE2 enabled", decl);
5584 else
5585 error ("calling %qT with attribute sseregparm without "
5586 "SSE/SSE2 enabled", type);
5587 }
5588 return 0;
5589 }
5590
5591 return 2;
5592 }
5593
5594 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5595 (and DFmode for SSE2) arguments in SSE registers. */
5596 if (decl && TARGET_SSE_MATH && optimize
5597 && !(profile_flag && !flag_fentry))
5598 {
5599 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5600 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5601 if (i && i->local && i->can_change_signature)
5602 return TARGET_SSE2 ? 2 : 1;
5603 }
5604
5605 return 0;
5606 }
5607
5608 /* Return true if EAX is live at the start of the function. Used by
5609 ix86_expand_prologue to determine if we need special help before
5610 calling allocate_stack_worker. */
5611
5612 static bool
5613 ix86_eax_live_at_start_p (void)
5614 {
5615 /* Cheat. Don't bother working forward from ix86_function_regparm
5616 to the function type to whether an actual argument is located in
5617 eax. Instead just look at cfg info, which is still close enough
5618 to correct at this point. This gives false positives for broken
5619 functions that might use uninitialized data that happens to be
5620 allocated in eax, but who cares? */
5621 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5622 }
5623
5624 static bool
5625 ix86_keep_aggregate_return_pointer (tree fntype)
5626 {
5627 tree attr;
5628
5629 if (!TARGET_64BIT)
5630 {
5631 attr = lookup_attribute ("callee_pop_aggregate_return",
5632 TYPE_ATTRIBUTES (fntype));
5633 if (attr)
5634 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5635
5636 /* For 32-bit MS-ABI the default is to keep aggregate
5637 return pointer. */
5638 if (ix86_function_type_abi (fntype) == MS_ABI)
5639 return true;
5640 }
5641 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5642 }
5643
5644 /* Value is the number of bytes of arguments automatically
5645 popped when returning from a subroutine call.
5646 FUNDECL is the declaration node of the function (as a tree),
5647 FUNTYPE is the data type of the function (as a tree),
5648 or for a library call it is an identifier node for the subroutine name.
5649 SIZE is the number of bytes of arguments passed on the stack.
5650
5651 On the 80386, the RTD insn may be used to pop them if the number
5652 of args is fixed, but if the number is variable then the caller
5653 must pop them all. RTD can't be used for library calls now
5654 because the library is compiled with the Unix compiler.
5655 Use of RTD is a selectable option, since it is incompatible with
5656 standard Unix calling sequences. If the option is not selected,
5657 the caller must always pop the args.
5658
5659 The attribute stdcall is equivalent to RTD on a per module basis. */
5660
5661 static int
5662 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5663 {
5664 unsigned int ccvt;
5665
5666 /* None of the 64-bit ABIs pop arguments. */
5667 if (TARGET_64BIT)
5668 return 0;
5669
5670 ccvt = ix86_get_callcvt (funtype);
5671
5672 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5673 | IX86_CALLCVT_THISCALL)) != 0
5674 && ! stdarg_p (funtype))
5675 return size;
5676
5677 /* Lose any fake structure return argument if it is passed on the stack. */
5678 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5679 && !ix86_keep_aggregate_return_pointer (funtype))
5680 {
5681 int nregs = ix86_function_regparm (funtype, fundecl);
5682 if (nregs == 0)
5683 return GET_MODE_SIZE (Pmode);
5684 }
5685
5686 return 0;
5687 }
5688
5689 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5690
5691 static bool
5692 ix86_legitimate_combined_insn (rtx insn)
5693 {
5694 /* Check operand constraints in case hard registers were propagated
5695 into insn pattern. This check prevents combine pass from
5696 generating insn patterns with invalid hard register operands.
5697 These invalid insns can eventually confuse reload to error out
5698 with a spill failure. See also PRs 46829 and 46843. */
5699 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
5700 {
5701 int i;
5702
5703 extract_insn (insn);
5704 preprocess_constraints ();
5705
5706 for (i = 0; i < recog_data.n_operands; i++)
5707 {
5708 rtx op = recog_data.operand[i];
5709 enum machine_mode mode = GET_MODE (op);
5710 struct operand_alternative *op_alt;
5711 int offset = 0;
5712 bool win;
5713 int j;
5714
5715 /* For pre-AVX disallow unaligned loads/stores where the
5716 instructions don't support it. */
5717 if (!TARGET_AVX
5718 && VECTOR_MODE_P (GET_MODE (op))
5719 && misaligned_operand (op, GET_MODE (op)))
5720 {
5721 int min_align = get_attr_ssememalign (insn);
5722 if (min_align == 0)
5723 return false;
5724 }
5725
5726 /* A unary operator may be accepted by the predicate, but it
5727 is irrelevant for matching constraints. */
5728 if (UNARY_P (op))
5729 op = XEXP (op, 0);
5730
5731 if (GET_CODE (op) == SUBREG)
5732 {
5733 if (REG_P (SUBREG_REG (op))
5734 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
5735 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
5736 GET_MODE (SUBREG_REG (op)),
5737 SUBREG_BYTE (op),
5738 GET_MODE (op));
5739 op = SUBREG_REG (op);
5740 }
5741
5742 if (!(REG_P (op) && HARD_REGISTER_P (op)))
5743 continue;
5744
5745 op_alt = recog_op_alt[i];
5746
5747 /* Operand has no constraints, anything is OK. */
5748 win = !recog_data.n_alternatives;
5749
5750 for (j = 0; j < recog_data.n_alternatives; j++)
5751 {
5752 if (op_alt[j].anything_ok
5753 || (op_alt[j].matches != -1
5754 && operands_match_p
5755 (recog_data.operand[i],
5756 recog_data.operand[op_alt[j].matches]))
5757 || reg_fits_class_p (op, op_alt[j].cl, offset, mode))
5758 {
5759 win = true;
5760 break;
5761 }
5762 }
5763
5764 if (!win)
5765 return false;
5766 }
5767 }
5768
5769 return true;
5770 }
5771 \f
5772 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5773
5774 static unsigned HOST_WIDE_INT
5775 ix86_asan_shadow_offset (void)
5776 {
5777 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
5778 : HOST_WIDE_INT_C (0x7fff8000))
5779 : (HOST_WIDE_INT_1 << 29);
5780 }
5781 \f
5782 /* Argument support functions. */
5783
5784 /* Return true when register may be used to pass function parameters. */
5785 bool
5786 ix86_function_arg_regno_p (int regno)
5787 {
5788 int i;
5789 const int *parm_regs;
5790
5791 if (!TARGET_64BIT)
5792 {
5793 if (TARGET_MACHO)
5794 return (regno < REGPARM_MAX
5795 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5796 else
5797 return (regno < REGPARM_MAX
5798 || (TARGET_MMX && MMX_REGNO_P (regno)
5799 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5800 || (TARGET_SSE && SSE_REGNO_P (regno)
5801 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5802 }
5803
5804 if (TARGET_SSE && SSE_REGNO_P (regno)
5805 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5806 return true;
5807
5808 /* TODO: The function should depend on current function ABI but
5809 builtins.c would need updating then. Therefore we use the
5810 default ABI. */
5811
5812 /* RAX is used as hidden argument to va_arg functions. */
5813 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5814 return true;
5815
5816 if (ix86_abi == MS_ABI)
5817 parm_regs = x86_64_ms_abi_int_parameter_registers;
5818 else
5819 parm_regs = x86_64_int_parameter_registers;
5820 for (i = 0; i < (ix86_abi == MS_ABI
5821 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5822 if (regno == parm_regs[i])
5823 return true;
5824 return false;
5825 }
5826
5827 /* Return if we do not know how to pass TYPE solely in registers. */
5828
5829 static bool
5830 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5831 {
5832 if (must_pass_in_stack_var_size_or_pad (mode, type))
5833 return true;
5834
5835 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5836 The layout_type routine is crafty and tries to trick us into passing
5837 currently unsupported vector types on the stack by using TImode. */
5838 return (!TARGET_64BIT && mode == TImode
5839 && type && TREE_CODE (type) != VECTOR_TYPE);
5840 }
5841
5842 /* It returns the size, in bytes, of the area reserved for arguments passed
5843 in registers for the function represented by fndecl dependent to the used
5844 abi format. */
5845 int
5846 ix86_reg_parm_stack_space (const_tree fndecl)
5847 {
5848 enum calling_abi call_abi = SYSV_ABI;
5849 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5850 call_abi = ix86_function_abi (fndecl);
5851 else
5852 call_abi = ix86_function_type_abi (fndecl);
5853 if (TARGET_64BIT && call_abi == MS_ABI)
5854 return 32;
5855 return 0;
5856 }
5857
5858 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5859 call abi used. */
5860 enum calling_abi
5861 ix86_function_type_abi (const_tree fntype)
5862 {
5863 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
5864 {
5865 enum calling_abi abi = ix86_abi;
5866 if (abi == SYSV_ABI)
5867 {
5868 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5869 abi = MS_ABI;
5870 }
5871 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5872 abi = SYSV_ABI;
5873 return abi;
5874 }
5875 return ix86_abi;
5876 }
5877
5878 /* We add this as a workaround in order to use libc_has_function
5879 hook in i386.md. */
5880 bool
5881 ix86_libc_has_function (enum function_class fn_class)
5882 {
5883 return targetm.libc_has_function (fn_class);
5884 }
5885
5886 static bool
5887 ix86_function_ms_hook_prologue (const_tree fn)
5888 {
5889 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5890 {
5891 if (decl_function_context (fn) != NULL_TREE)
5892 error_at (DECL_SOURCE_LOCATION (fn),
5893 "ms_hook_prologue is not compatible with nested function");
5894 else
5895 return true;
5896 }
5897 return false;
5898 }
5899
5900 static enum calling_abi
5901 ix86_function_abi (const_tree fndecl)
5902 {
5903 if (! fndecl)
5904 return ix86_abi;
5905 return ix86_function_type_abi (TREE_TYPE (fndecl));
5906 }
5907
5908 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5909 call abi used. */
5910 enum calling_abi
5911 ix86_cfun_abi (void)
5912 {
5913 if (! cfun)
5914 return ix86_abi;
5915 return cfun->machine->call_abi;
5916 }
5917
5918 /* Write the extra assembler code needed to declare a function properly. */
5919
5920 void
5921 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5922 tree decl)
5923 {
5924 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5925
5926 if (is_ms_hook)
5927 {
5928 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5929 unsigned int filler_cc = 0xcccccccc;
5930
5931 for (i = 0; i < filler_count; i += 4)
5932 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5933 }
5934
5935 #ifdef SUBTARGET_ASM_UNWIND_INIT
5936 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
5937 #endif
5938
5939 ASM_OUTPUT_LABEL (asm_out_file, fname);
5940
5941 /* Output magic byte marker, if hot-patch attribute is set. */
5942 if (is_ms_hook)
5943 {
5944 if (TARGET_64BIT)
5945 {
5946 /* leaq [%rsp + 0], %rsp */
5947 asm_fprintf (asm_out_file, ASM_BYTE
5948 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5949 }
5950 else
5951 {
5952 /* movl.s %edi, %edi
5953 push %ebp
5954 movl.s %esp, %ebp */
5955 asm_fprintf (asm_out_file, ASM_BYTE
5956 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5957 }
5958 }
5959 }
5960
5961 /* regclass.c */
5962 extern void init_regs (void);
5963
5964 /* Implementation of call abi switching target hook. Specific to FNDECL
5965 the specific call register sets are set. See also
5966 ix86_conditional_register_usage for more details. */
5967 void
5968 ix86_call_abi_override (const_tree fndecl)
5969 {
5970 if (fndecl == NULL_TREE)
5971 cfun->machine->call_abi = ix86_abi;
5972 else
5973 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5974 }
5975
5976 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5977 expensive re-initialization of init_regs each time we switch function context
5978 since this is needed only during RTL expansion. */
5979 static void
5980 ix86_maybe_switch_abi (void)
5981 {
5982 if (TARGET_64BIT &&
5983 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5984 reinit_regs ();
5985 }
5986
5987 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5988 for a call to a function whose data type is FNTYPE.
5989 For a library call, FNTYPE is 0. */
5990
5991 void
5992 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5993 tree fntype, /* tree ptr for function decl */
5994 rtx libname, /* SYMBOL_REF of library name or 0 */
5995 tree fndecl,
5996 int caller)
5997 {
5998 struct cgraph_local_info *i;
5999
6000 memset (cum, 0, sizeof (*cum));
6001
6002 if (fndecl)
6003 {
6004 i = cgraph_local_info (fndecl);
6005 cum->call_abi = ix86_function_abi (fndecl);
6006 }
6007 else
6008 {
6009 i = NULL;
6010 cum->call_abi = ix86_function_type_abi (fntype);
6011 }
6012
6013 cum->caller = caller;
6014
6015 /* Set up the number of registers to use for passing arguments. */
6016
6017 if (TARGET_64BIT && cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
6018 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
6019 "or subtarget optimization implying it");
6020 cum->nregs = ix86_regparm;
6021 if (TARGET_64BIT)
6022 {
6023 cum->nregs = (cum->call_abi == SYSV_ABI
6024 ? X86_64_REGPARM_MAX
6025 : X86_64_MS_REGPARM_MAX);
6026 }
6027 if (TARGET_SSE)
6028 {
6029 cum->sse_nregs = SSE_REGPARM_MAX;
6030 if (TARGET_64BIT)
6031 {
6032 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6033 ? X86_64_SSE_REGPARM_MAX
6034 : X86_64_MS_SSE_REGPARM_MAX);
6035 }
6036 }
6037 if (TARGET_MMX)
6038 cum->mmx_nregs = MMX_REGPARM_MAX;
6039 cum->warn_avx = true;
6040 cum->warn_sse = true;
6041 cum->warn_mmx = true;
6042
6043 /* Because type might mismatch in between caller and callee, we need to
6044 use actual type of function for local calls.
6045 FIXME: cgraph_analyze can be told to actually record if function uses
6046 va_start so for local functions maybe_vaarg can be made aggressive
6047 helping K&R code.
6048 FIXME: once typesytem is fixed, we won't need this code anymore. */
6049 if (i && i->local && i->can_change_signature)
6050 fntype = TREE_TYPE (fndecl);
6051 cum->maybe_vaarg = (fntype
6052 ? (!prototype_p (fntype) || stdarg_p (fntype))
6053 : !libname);
6054
6055 if (!TARGET_64BIT)
6056 {
6057 /* If there are variable arguments, then we won't pass anything
6058 in registers in 32-bit mode. */
6059 if (stdarg_p (fntype))
6060 {
6061 cum->nregs = 0;
6062 cum->sse_nregs = 0;
6063 cum->mmx_nregs = 0;
6064 cum->warn_avx = 0;
6065 cum->warn_sse = 0;
6066 cum->warn_mmx = 0;
6067 return;
6068 }
6069
6070 /* Use ecx and edx registers if function has fastcall attribute,
6071 else look for regparm information. */
6072 if (fntype)
6073 {
6074 unsigned int ccvt = ix86_get_callcvt (fntype);
6075 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6076 {
6077 cum->nregs = 1;
6078 cum->fastcall = 1; /* Same first register as in fastcall. */
6079 }
6080 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6081 {
6082 cum->nregs = 2;
6083 cum->fastcall = 1;
6084 }
6085 else
6086 cum->nregs = ix86_function_regparm (fntype, fndecl);
6087 }
6088
6089 /* Set up the number of SSE registers used for passing SFmode
6090 and DFmode arguments. Warn for mismatching ABI. */
6091 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6092 }
6093 }
6094
6095 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6096 But in the case of vector types, it is some vector mode.
6097
6098 When we have only some of our vector isa extensions enabled, then there
6099 are some modes for which vector_mode_supported_p is false. For these
6100 modes, the generic vector support in gcc will choose some non-vector mode
6101 in order to implement the type. By computing the natural mode, we'll
6102 select the proper ABI location for the operand and not depend on whatever
6103 the middle-end decides to do with these vector types.
6104
6105 The midde-end can't deal with the vector types > 16 bytes. In this
6106 case, we return the original mode and warn ABI change if CUM isn't
6107 NULL.
6108
6109 If INT_RETURN is true, warn ABI change if the vector mode isn't
6110 available for function return value. */
6111
6112 static enum machine_mode
6113 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6114 bool in_return)
6115 {
6116 enum machine_mode mode = TYPE_MODE (type);
6117
6118 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6119 {
6120 HOST_WIDE_INT size = int_size_in_bytes (type);
6121 if ((size == 8 || size == 16 || size == 32)
6122 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6123 && TYPE_VECTOR_SUBPARTS (type) > 1)
6124 {
6125 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6126
6127 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6128 mode = MIN_MODE_VECTOR_FLOAT;
6129 else
6130 mode = MIN_MODE_VECTOR_INT;
6131
6132 /* Get the mode which has this inner mode and number of units. */
6133 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6134 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6135 && GET_MODE_INNER (mode) == innermode)
6136 {
6137 if (size == 32 && !TARGET_AVX)
6138 {
6139 static bool warnedavx;
6140 static bool warnedavx_ret;
6141
6142 if (cum
6143 && !warnedavx
6144 && cum->warn_avx)
6145 {
6146 warnedavx = true;
6147 warning (0, "AVX vector argument without AVX "
6148 "enabled changes the ABI");
6149 }
6150 else if (in_return & !warnedavx_ret)
6151 {
6152 warnedavx_ret = true;
6153 warning (0, "AVX vector return without AVX "
6154 "enabled changes the ABI");
6155 }
6156
6157 return TYPE_MODE (type);
6158 }
6159 else if (((size == 8 && TARGET_64BIT) || size == 16)
6160 && !TARGET_SSE)
6161 {
6162 static bool warnedsse;
6163 static bool warnedsse_ret;
6164
6165 if (cum
6166 && !warnedsse
6167 && cum->warn_sse)
6168 {
6169 warnedsse = true;
6170 warning (0, "SSE vector argument without SSE "
6171 "enabled changes the ABI");
6172 }
6173 else if (!TARGET_64BIT
6174 && in_return
6175 & !warnedsse_ret)
6176 {
6177 warnedsse_ret = true;
6178 warning (0, "SSE vector return without SSE "
6179 "enabled changes the ABI");
6180 }
6181 }
6182 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6183 {
6184 static bool warnedmmx;
6185 static bool warnedmmx_ret;
6186
6187 if (cum
6188 && !warnedmmx
6189 && cum->warn_mmx)
6190 {
6191 warnedmmx = true;
6192 warning (0, "MMX vector argument without MMX "
6193 "enabled changes the ABI");
6194 }
6195 else if (in_return & !warnedmmx_ret)
6196 {
6197 warnedmmx_ret = true;
6198 warning (0, "MMX vector return without MMX "
6199 "enabled changes the ABI");
6200 }
6201 }
6202 return mode;
6203 }
6204
6205 gcc_unreachable ();
6206 }
6207 }
6208
6209 return mode;
6210 }
6211
6212 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6213 this may not agree with the mode that the type system has chosen for the
6214 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6215 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6216
6217 static rtx
6218 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
6219 unsigned int regno)
6220 {
6221 rtx tmp;
6222
6223 if (orig_mode != BLKmode)
6224 tmp = gen_rtx_REG (orig_mode, regno);
6225 else
6226 {
6227 tmp = gen_rtx_REG (mode, regno);
6228 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6229 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6230 }
6231
6232 return tmp;
6233 }
6234
6235 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6236 of this code is to classify each 8bytes of incoming argument by the register
6237 class and assign registers accordingly. */
6238
6239 /* Return the union class of CLASS1 and CLASS2.
6240 See the x86-64 PS ABI for details. */
6241
6242 static enum x86_64_reg_class
6243 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6244 {
6245 /* Rule #1: If both classes are equal, this is the resulting class. */
6246 if (class1 == class2)
6247 return class1;
6248
6249 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6250 the other class. */
6251 if (class1 == X86_64_NO_CLASS)
6252 return class2;
6253 if (class2 == X86_64_NO_CLASS)
6254 return class1;
6255
6256 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6257 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6258 return X86_64_MEMORY_CLASS;
6259
6260 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6261 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6262 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6263 return X86_64_INTEGERSI_CLASS;
6264 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6265 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6266 return X86_64_INTEGER_CLASS;
6267
6268 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6269 MEMORY is used. */
6270 if (class1 == X86_64_X87_CLASS
6271 || class1 == X86_64_X87UP_CLASS
6272 || class1 == X86_64_COMPLEX_X87_CLASS
6273 || class2 == X86_64_X87_CLASS
6274 || class2 == X86_64_X87UP_CLASS
6275 || class2 == X86_64_COMPLEX_X87_CLASS)
6276 return X86_64_MEMORY_CLASS;
6277
6278 /* Rule #6: Otherwise class SSE is used. */
6279 return X86_64_SSE_CLASS;
6280 }
6281
6282 /* Classify the argument of type TYPE and mode MODE.
6283 CLASSES will be filled by the register class used to pass each word
6284 of the operand. The number of words is returned. In case the parameter
6285 should be passed in memory, 0 is returned. As a special case for zero
6286 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6287
6288 BIT_OFFSET is used internally for handling records and specifies offset
6289 of the offset in bits modulo 512 to avoid overflow cases.
6290
6291 See the x86-64 PS ABI for details.
6292 */
6293
6294 static int
6295 classify_argument (enum machine_mode mode, const_tree type,
6296 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6297 {
6298 HOST_WIDE_INT bytes =
6299 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6300 int words
6301 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6302
6303 /* Variable sized entities are always passed/returned in memory. */
6304 if (bytes < 0)
6305 return 0;
6306
6307 if (mode != VOIDmode
6308 && targetm.calls.must_pass_in_stack (mode, type))
6309 return 0;
6310
6311 if (type && AGGREGATE_TYPE_P (type))
6312 {
6313 int i;
6314 tree field;
6315 enum x86_64_reg_class subclasses[MAX_CLASSES];
6316
6317 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
6318 if (bytes > 32)
6319 return 0;
6320
6321 for (i = 0; i < words; i++)
6322 classes[i] = X86_64_NO_CLASS;
6323
6324 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6325 signalize memory class, so handle it as special case. */
6326 if (!words)
6327 {
6328 classes[0] = X86_64_NO_CLASS;
6329 return 1;
6330 }
6331
6332 /* Classify each field of record and merge classes. */
6333 switch (TREE_CODE (type))
6334 {
6335 case RECORD_TYPE:
6336 /* And now merge the fields of structure. */
6337 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6338 {
6339 if (TREE_CODE (field) == FIELD_DECL)
6340 {
6341 int num;
6342
6343 if (TREE_TYPE (field) == error_mark_node)
6344 continue;
6345
6346 /* Bitfields are always classified as integer. Handle them
6347 early, since later code would consider them to be
6348 misaligned integers. */
6349 if (DECL_BIT_FIELD (field))
6350 {
6351 for (i = (int_bit_position (field)
6352 + (bit_offset % 64)) / 8 / 8;
6353 i < ((int_bit_position (field) + (bit_offset % 64))
6354 + tree_to_shwi (DECL_SIZE (field))
6355 + 63) / 8 / 8; i++)
6356 classes[i] =
6357 merge_classes (X86_64_INTEGER_CLASS,
6358 classes[i]);
6359 }
6360 else
6361 {
6362 int pos;
6363
6364 type = TREE_TYPE (field);
6365
6366 /* Flexible array member is ignored. */
6367 if (TYPE_MODE (type) == BLKmode
6368 && TREE_CODE (type) == ARRAY_TYPE
6369 && TYPE_SIZE (type) == NULL_TREE
6370 && TYPE_DOMAIN (type) != NULL_TREE
6371 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6372 == NULL_TREE))
6373 {
6374 static bool warned;
6375
6376 if (!warned && warn_psabi)
6377 {
6378 warned = true;
6379 inform (input_location,
6380 "the ABI of passing struct with"
6381 " a flexible array member has"
6382 " changed in GCC 4.4");
6383 }
6384 continue;
6385 }
6386 num = classify_argument (TYPE_MODE (type), type,
6387 subclasses,
6388 (int_bit_position (field)
6389 + bit_offset) % 512);
6390 if (!num)
6391 return 0;
6392 pos = (int_bit_position (field)
6393 + (bit_offset % 64)) / 8 / 8;
6394 for (i = 0; i < num && (i + pos) < words; i++)
6395 classes[i + pos] =
6396 merge_classes (subclasses[i], classes[i + pos]);
6397 }
6398 }
6399 }
6400 break;
6401
6402 case ARRAY_TYPE:
6403 /* Arrays are handled as small records. */
6404 {
6405 int num;
6406 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6407 TREE_TYPE (type), subclasses, bit_offset);
6408 if (!num)
6409 return 0;
6410
6411 /* The partial classes are now full classes. */
6412 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6413 subclasses[0] = X86_64_SSE_CLASS;
6414 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6415 && !((bit_offset % 64) == 0 && bytes == 4))
6416 subclasses[0] = X86_64_INTEGER_CLASS;
6417
6418 for (i = 0; i < words; i++)
6419 classes[i] = subclasses[i % num];
6420
6421 break;
6422 }
6423 case UNION_TYPE:
6424 case QUAL_UNION_TYPE:
6425 /* Unions are similar to RECORD_TYPE but offset is always 0.
6426 */
6427 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6428 {
6429 if (TREE_CODE (field) == FIELD_DECL)
6430 {
6431 int num;
6432
6433 if (TREE_TYPE (field) == error_mark_node)
6434 continue;
6435
6436 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6437 TREE_TYPE (field), subclasses,
6438 bit_offset);
6439 if (!num)
6440 return 0;
6441 for (i = 0; i < num; i++)
6442 classes[i] = merge_classes (subclasses[i], classes[i]);
6443 }
6444 }
6445 break;
6446
6447 default:
6448 gcc_unreachable ();
6449 }
6450
6451 if (words > 2)
6452 {
6453 /* When size > 16 bytes, if the first one isn't
6454 X86_64_SSE_CLASS or any other ones aren't
6455 X86_64_SSEUP_CLASS, everything should be passed in
6456 memory. */
6457 if (classes[0] != X86_64_SSE_CLASS)
6458 return 0;
6459
6460 for (i = 1; i < words; i++)
6461 if (classes[i] != X86_64_SSEUP_CLASS)
6462 return 0;
6463 }
6464
6465 /* Final merger cleanup. */
6466 for (i = 0; i < words; i++)
6467 {
6468 /* If one class is MEMORY, everything should be passed in
6469 memory. */
6470 if (classes[i] == X86_64_MEMORY_CLASS)
6471 return 0;
6472
6473 /* The X86_64_SSEUP_CLASS should be always preceded by
6474 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6475 if (classes[i] == X86_64_SSEUP_CLASS
6476 && classes[i - 1] != X86_64_SSE_CLASS
6477 && classes[i - 1] != X86_64_SSEUP_CLASS)
6478 {
6479 /* The first one should never be X86_64_SSEUP_CLASS. */
6480 gcc_assert (i != 0);
6481 classes[i] = X86_64_SSE_CLASS;
6482 }
6483
6484 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6485 everything should be passed in memory. */
6486 if (classes[i] == X86_64_X87UP_CLASS
6487 && (classes[i - 1] != X86_64_X87_CLASS))
6488 {
6489 static bool warned;
6490
6491 /* The first one should never be X86_64_X87UP_CLASS. */
6492 gcc_assert (i != 0);
6493 if (!warned && warn_psabi)
6494 {
6495 warned = true;
6496 inform (input_location,
6497 "the ABI of passing union with long double"
6498 " has changed in GCC 4.4");
6499 }
6500 return 0;
6501 }
6502 }
6503 return words;
6504 }
6505
6506 /* Compute alignment needed. We align all types to natural boundaries with
6507 exception of XFmode that is aligned to 64bits. */
6508 if (mode != VOIDmode && mode != BLKmode)
6509 {
6510 int mode_alignment = GET_MODE_BITSIZE (mode);
6511
6512 if (mode == XFmode)
6513 mode_alignment = 128;
6514 else if (mode == XCmode)
6515 mode_alignment = 256;
6516 if (COMPLEX_MODE_P (mode))
6517 mode_alignment /= 2;
6518 /* Misaligned fields are always returned in memory. */
6519 if (bit_offset % mode_alignment)
6520 return 0;
6521 }
6522
6523 /* for V1xx modes, just use the base mode */
6524 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6525 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6526 mode = GET_MODE_INNER (mode);
6527
6528 /* Classification of atomic types. */
6529 switch (mode)
6530 {
6531 case SDmode:
6532 case DDmode:
6533 classes[0] = X86_64_SSE_CLASS;
6534 return 1;
6535 case TDmode:
6536 classes[0] = X86_64_SSE_CLASS;
6537 classes[1] = X86_64_SSEUP_CLASS;
6538 return 2;
6539 case DImode:
6540 case SImode:
6541 case HImode:
6542 case QImode:
6543 case CSImode:
6544 case CHImode:
6545 case CQImode:
6546 {
6547 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
6548
6549 if (size <= 32)
6550 {
6551 classes[0] = X86_64_INTEGERSI_CLASS;
6552 return 1;
6553 }
6554 else if (size <= 64)
6555 {
6556 classes[0] = X86_64_INTEGER_CLASS;
6557 return 1;
6558 }
6559 else if (size <= 64+32)
6560 {
6561 classes[0] = X86_64_INTEGER_CLASS;
6562 classes[1] = X86_64_INTEGERSI_CLASS;
6563 return 2;
6564 }
6565 else if (size <= 64+64)
6566 {
6567 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6568 return 2;
6569 }
6570 else
6571 gcc_unreachable ();
6572 }
6573 case CDImode:
6574 case TImode:
6575 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6576 return 2;
6577 case COImode:
6578 case OImode:
6579 /* OImode shouldn't be used directly. */
6580 gcc_unreachable ();
6581 case CTImode:
6582 return 0;
6583 case SFmode:
6584 if (!(bit_offset % 64))
6585 classes[0] = X86_64_SSESF_CLASS;
6586 else
6587 classes[0] = X86_64_SSE_CLASS;
6588 return 1;
6589 case DFmode:
6590 classes[0] = X86_64_SSEDF_CLASS;
6591 return 1;
6592 case XFmode:
6593 classes[0] = X86_64_X87_CLASS;
6594 classes[1] = X86_64_X87UP_CLASS;
6595 return 2;
6596 case TFmode:
6597 classes[0] = X86_64_SSE_CLASS;
6598 classes[1] = X86_64_SSEUP_CLASS;
6599 return 2;
6600 case SCmode:
6601 classes[0] = X86_64_SSE_CLASS;
6602 if (!(bit_offset % 64))
6603 return 1;
6604 else
6605 {
6606 static bool warned;
6607
6608 if (!warned && warn_psabi)
6609 {
6610 warned = true;
6611 inform (input_location,
6612 "the ABI of passing structure with complex float"
6613 " member has changed in GCC 4.4");
6614 }
6615 classes[1] = X86_64_SSESF_CLASS;
6616 return 2;
6617 }
6618 case DCmode:
6619 classes[0] = X86_64_SSEDF_CLASS;
6620 classes[1] = X86_64_SSEDF_CLASS;
6621 return 2;
6622 case XCmode:
6623 classes[0] = X86_64_COMPLEX_X87_CLASS;
6624 return 1;
6625 case TCmode:
6626 /* This modes is larger than 16 bytes. */
6627 return 0;
6628 case V8SFmode:
6629 case V8SImode:
6630 case V32QImode:
6631 case V16HImode:
6632 case V4DFmode:
6633 case V4DImode:
6634 classes[0] = X86_64_SSE_CLASS;
6635 classes[1] = X86_64_SSEUP_CLASS;
6636 classes[2] = X86_64_SSEUP_CLASS;
6637 classes[3] = X86_64_SSEUP_CLASS;
6638 return 4;
6639 case V8DFmode:
6640 case V16SFmode:
6641 case V8DImode:
6642 case V16SImode:
6643 case V32HImode:
6644 case V64QImode:
6645 classes[0] = X86_64_SSE_CLASS;
6646 classes[1] = X86_64_SSEUP_CLASS;
6647 classes[2] = X86_64_SSEUP_CLASS;
6648 classes[3] = X86_64_SSEUP_CLASS;
6649 classes[4] = X86_64_SSEUP_CLASS;
6650 classes[5] = X86_64_SSEUP_CLASS;
6651 classes[6] = X86_64_SSEUP_CLASS;
6652 classes[7] = X86_64_SSEUP_CLASS;
6653 return 8;
6654 case V4SFmode:
6655 case V4SImode:
6656 case V16QImode:
6657 case V8HImode:
6658 case V2DFmode:
6659 case V2DImode:
6660 classes[0] = X86_64_SSE_CLASS;
6661 classes[1] = X86_64_SSEUP_CLASS;
6662 return 2;
6663 case V1TImode:
6664 case V1DImode:
6665 case V2SFmode:
6666 case V2SImode:
6667 case V4HImode:
6668 case V8QImode:
6669 classes[0] = X86_64_SSE_CLASS;
6670 return 1;
6671 case BLKmode:
6672 case VOIDmode:
6673 return 0;
6674 default:
6675 gcc_assert (VECTOR_MODE_P (mode));
6676
6677 if (bytes > 16)
6678 return 0;
6679
6680 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6681
6682 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6683 classes[0] = X86_64_INTEGERSI_CLASS;
6684 else
6685 classes[0] = X86_64_INTEGER_CLASS;
6686 classes[1] = X86_64_INTEGER_CLASS;
6687 return 1 + (bytes > 8);
6688 }
6689 }
6690
6691 /* Examine the argument and return set number of register required in each
6692 class. Return 0 iff parameter should be passed in memory. */
6693 static int
6694 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6695 int *int_nregs, int *sse_nregs)
6696 {
6697 enum x86_64_reg_class regclass[MAX_CLASSES];
6698 int n = classify_argument (mode, type, regclass, 0);
6699
6700 *int_nregs = 0;
6701 *sse_nregs = 0;
6702 if (!n)
6703 return 0;
6704 for (n--; n >= 0; n--)
6705 switch (regclass[n])
6706 {
6707 case X86_64_INTEGER_CLASS:
6708 case X86_64_INTEGERSI_CLASS:
6709 (*int_nregs)++;
6710 break;
6711 case X86_64_SSE_CLASS:
6712 case X86_64_SSESF_CLASS:
6713 case X86_64_SSEDF_CLASS:
6714 (*sse_nregs)++;
6715 break;
6716 case X86_64_NO_CLASS:
6717 case X86_64_SSEUP_CLASS:
6718 break;
6719 case X86_64_X87_CLASS:
6720 case X86_64_X87UP_CLASS:
6721 if (!in_return)
6722 return 0;
6723 break;
6724 case X86_64_COMPLEX_X87_CLASS:
6725 return in_return ? 2 : 0;
6726 case X86_64_MEMORY_CLASS:
6727 gcc_unreachable ();
6728 }
6729 return 1;
6730 }
6731
6732 /* Construct container for the argument used by GCC interface. See
6733 FUNCTION_ARG for the detailed description. */
6734
6735 static rtx
6736 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6737 const_tree type, int in_return, int nintregs, int nsseregs,
6738 const int *intreg, int sse_regno)
6739 {
6740 /* The following variables hold the static issued_error state. */
6741 static bool issued_sse_arg_error;
6742 static bool issued_sse_ret_error;
6743 static bool issued_x87_ret_error;
6744
6745 enum machine_mode tmpmode;
6746 int bytes =
6747 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6748 enum x86_64_reg_class regclass[MAX_CLASSES];
6749 int n;
6750 int i;
6751 int nexps = 0;
6752 int needed_sseregs, needed_intregs;
6753 rtx exp[MAX_CLASSES];
6754 rtx ret;
6755
6756 n = classify_argument (mode, type, regclass, 0);
6757 if (!n)
6758 return NULL;
6759 if (!examine_argument (mode, type, in_return, &needed_intregs,
6760 &needed_sseregs))
6761 return NULL;
6762 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6763 return NULL;
6764
6765 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6766 some less clueful developer tries to use floating-point anyway. */
6767 if (needed_sseregs && !TARGET_SSE)
6768 {
6769 if (in_return)
6770 {
6771 if (!issued_sse_ret_error)
6772 {
6773 error ("SSE register return with SSE disabled");
6774 issued_sse_ret_error = true;
6775 }
6776 }
6777 else if (!issued_sse_arg_error)
6778 {
6779 error ("SSE register argument with SSE disabled");
6780 issued_sse_arg_error = true;
6781 }
6782 return NULL;
6783 }
6784
6785 /* Likewise, error if the ABI requires us to return values in the
6786 x87 registers and the user specified -mno-80387. */
6787 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
6788 for (i = 0; i < n; i++)
6789 if (regclass[i] == X86_64_X87_CLASS
6790 || regclass[i] == X86_64_X87UP_CLASS
6791 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6792 {
6793 if (!issued_x87_ret_error)
6794 {
6795 error ("x87 register return with x87 disabled");
6796 issued_x87_ret_error = true;
6797 }
6798 return NULL;
6799 }
6800
6801 /* First construct simple cases. Avoid SCmode, since we want to use
6802 single register to pass this type. */
6803 if (n == 1 && mode != SCmode)
6804 switch (regclass[0])
6805 {
6806 case X86_64_INTEGER_CLASS:
6807 case X86_64_INTEGERSI_CLASS:
6808 return gen_rtx_REG (mode, intreg[0]);
6809 case X86_64_SSE_CLASS:
6810 case X86_64_SSESF_CLASS:
6811 case X86_64_SSEDF_CLASS:
6812 if (mode != BLKmode)
6813 return gen_reg_or_parallel (mode, orig_mode,
6814 SSE_REGNO (sse_regno));
6815 break;
6816 case X86_64_X87_CLASS:
6817 case X86_64_COMPLEX_X87_CLASS:
6818 return gen_rtx_REG (mode, FIRST_STACK_REG);
6819 case X86_64_NO_CLASS:
6820 /* Zero sized array, struct or class. */
6821 return NULL;
6822 default:
6823 gcc_unreachable ();
6824 }
6825 if (n == 2
6826 && regclass[0] == X86_64_SSE_CLASS
6827 && regclass[1] == X86_64_SSEUP_CLASS
6828 && mode != BLKmode)
6829 return gen_reg_or_parallel (mode, orig_mode,
6830 SSE_REGNO (sse_regno));
6831 if (n == 4
6832 && regclass[0] == X86_64_SSE_CLASS
6833 && regclass[1] == X86_64_SSEUP_CLASS
6834 && regclass[2] == X86_64_SSEUP_CLASS
6835 && regclass[3] == X86_64_SSEUP_CLASS
6836 && mode != BLKmode)
6837 return gen_reg_or_parallel (mode, orig_mode,
6838 SSE_REGNO (sse_regno));
6839 if (n == 8
6840 && regclass[0] == X86_64_SSE_CLASS
6841 && regclass[1] == X86_64_SSEUP_CLASS
6842 && regclass[2] == X86_64_SSEUP_CLASS
6843 && regclass[3] == X86_64_SSEUP_CLASS
6844 && regclass[4] == X86_64_SSEUP_CLASS
6845 && regclass[5] == X86_64_SSEUP_CLASS
6846 && regclass[6] == X86_64_SSEUP_CLASS
6847 && regclass[7] == X86_64_SSEUP_CLASS
6848 && mode != BLKmode)
6849 return gen_reg_or_parallel (mode, orig_mode,
6850 SSE_REGNO (sse_regno));
6851 if (n == 2
6852 && regclass[0] == X86_64_X87_CLASS
6853 && regclass[1] == X86_64_X87UP_CLASS)
6854 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6855
6856 if (n == 2
6857 && regclass[0] == X86_64_INTEGER_CLASS
6858 && regclass[1] == X86_64_INTEGER_CLASS
6859 && (mode == CDImode || mode == TImode || mode == TFmode)
6860 && intreg[0] + 1 == intreg[1])
6861 return gen_rtx_REG (mode, intreg[0]);
6862
6863 /* Otherwise figure out the entries of the PARALLEL. */
6864 for (i = 0; i < n; i++)
6865 {
6866 int pos;
6867
6868 switch (regclass[i])
6869 {
6870 case X86_64_NO_CLASS:
6871 break;
6872 case X86_64_INTEGER_CLASS:
6873 case X86_64_INTEGERSI_CLASS:
6874 /* Merge TImodes on aligned occasions here too. */
6875 if (i * 8 + 8 > bytes)
6876 tmpmode
6877 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6878 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6879 tmpmode = SImode;
6880 else
6881 tmpmode = DImode;
6882 /* We've requested 24 bytes we
6883 don't have mode for. Use DImode. */
6884 if (tmpmode == BLKmode)
6885 tmpmode = DImode;
6886 exp [nexps++]
6887 = gen_rtx_EXPR_LIST (VOIDmode,
6888 gen_rtx_REG (tmpmode, *intreg),
6889 GEN_INT (i*8));
6890 intreg++;
6891 break;
6892 case X86_64_SSESF_CLASS:
6893 exp [nexps++]
6894 = gen_rtx_EXPR_LIST (VOIDmode,
6895 gen_rtx_REG (SFmode,
6896 SSE_REGNO (sse_regno)),
6897 GEN_INT (i*8));
6898 sse_regno++;
6899 break;
6900 case X86_64_SSEDF_CLASS:
6901 exp [nexps++]
6902 = gen_rtx_EXPR_LIST (VOIDmode,
6903 gen_rtx_REG (DFmode,
6904 SSE_REGNO (sse_regno)),
6905 GEN_INT (i*8));
6906 sse_regno++;
6907 break;
6908 case X86_64_SSE_CLASS:
6909 pos = i;
6910 switch (n)
6911 {
6912 case 1:
6913 tmpmode = DImode;
6914 break;
6915 case 2:
6916 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6917 {
6918 tmpmode = TImode;
6919 i++;
6920 }
6921 else
6922 tmpmode = DImode;
6923 break;
6924 case 4:
6925 gcc_assert (i == 0
6926 && regclass[1] == X86_64_SSEUP_CLASS
6927 && regclass[2] == X86_64_SSEUP_CLASS
6928 && regclass[3] == X86_64_SSEUP_CLASS);
6929 tmpmode = OImode;
6930 i += 3;
6931 break;
6932 case 8:
6933 gcc_assert (i == 0
6934 && regclass[1] == X86_64_SSEUP_CLASS
6935 && regclass[2] == X86_64_SSEUP_CLASS
6936 && regclass[3] == X86_64_SSEUP_CLASS
6937 && regclass[4] == X86_64_SSEUP_CLASS
6938 && regclass[5] == X86_64_SSEUP_CLASS
6939 && regclass[6] == X86_64_SSEUP_CLASS
6940 && regclass[7] == X86_64_SSEUP_CLASS);
6941 tmpmode = XImode;
6942 i += 7;
6943 break;
6944 default:
6945 gcc_unreachable ();
6946 }
6947 exp [nexps++]
6948 = gen_rtx_EXPR_LIST (VOIDmode,
6949 gen_rtx_REG (tmpmode,
6950 SSE_REGNO (sse_regno)),
6951 GEN_INT (pos*8));
6952 sse_regno++;
6953 break;
6954 default:
6955 gcc_unreachable ();
6956 }
6957 }
6958
6959 /* Empty aligned struct, union or class. */
6960 if (nexps == 0)
6961 return NULL;
6962
6963 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6964 for (i = 0; i < nexps; i++)
6965 XVECEXP (ret, 0, i) = exp [i];
6966 return ret;
6967 }
6968
6969 /* Update the data in CUM to advance over an argument of mode MODE
6970 and data type TYPE. (TYPE is null for libcalls where that information
6971 may not be available.) */
6972
6973 static void
6974 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6975 const_tree type, HOST_WIDE_INT bytes,
6976 HOST_WIDE_INT words)
6977 {
6978 switch (mode)
6979 {
6980 default:
6981 break;
6982
6983 case BLKmode:
6984 if (bytes < 0)
6985 break;
6986 /* FALLTHRU */
6987
6988 case DImode:
6989 case SImode:
6990 case HImode:
6991 case QImode:
6992 cum->words += words;
6993 cum->nregs -= words;
6994 cum->regno += words;
6995
6996 if (cum->nregs <= 0)
6997 {
6998 cum->nregs = 0;
6999 cum->regno = 0;
7000 }
7001 break;
7002
7003 case OImode:
7004 /* OImode shouldn't be used directly. */
7005 gcc_unreachable ();
7006
7007 case DFmode:
7008 if (cum->float_in_sse < 2)
7009 break;
7010 case SFmode:
7011 if (cum->float_in_sse < 1)
7012 break;
7013 /* FALLTHRU */
7014
7015 case V8SFmode:
7016 case V8SImode:
7017 case V64QImode:
7018 case V32HImode:
7019 case V16SImode:
7020 case V8DImode:
7021 case V16SFmode:
7022 case V8DFmode:
7023 case V32QImode:
7024 case V16HImode:
7025 case V4DFmode:
7026 case V4DImode:
7027 case TImode:
7028 case V16QImode:
7029 case V8HImode:
7030 case V4SImode:
7031 case V2DImode:
7032 case V4SFmode:
7033 case V2DFmode:
7034 if (!type || !AGGREGATE_TYPE_P (type))
7035 {
7036 cum->sse_words += words;
7037 cum->sse_nregs -= 1;
7038 cum->sse_regno += 1;
7039 if (cum->sse_nregs <= 0)
7040 {
7041 cum->sse_nregs = 0;
7042 cum->sse_regno = 0;
7043 }
7044 }
7045 break;
7046
7047 case V8QImode:
7048 case V4HImode:
7049 case V2SImode:
7050 case V2SFmode:
7051 case V1TImode:
7052 case V1DImode:
7053 if (!type || !AGGREGATE_TYPE_P (type))
7054 {
7055 cum->mmx_words += words;
7056 cum->mmx_nregs -= 1;
7057 cum->mmx_regno += 1;
7058 if (cum->mmx_nregs <= 0)
7059 {
7060 cum->mmx_nregs = 0;
7061 cum->mmx_regno = 0;
7062 }
7063 }
7064 break;
7065 }
7066 }
7067
7068 static void
7069 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7070 const_tree type, HOST_WIDE_INT words, bool named)
7071 {
7072 int int_nregs, sse_nregs;
7073
7074 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7075 if (!named && (VALID_AVX512F_REG_MODE (mode)
7076 || VALID_AVX256_REG_MODE (mode)))
7077 return;
7078
7079 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7080 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7081 {
7082 cum->nregs -= int_nregs;
7083 cum->sse_nregs -= sse_nregs;
7084 cum->regno += int_nregs;
7085 cum->sse_regno += sse_nregs;
7086 }
7087 else
7088 {
7089 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7090 cum->words = (cum->words + align - 1) & ~(align - 1);
7091 cum->words += words;
7092 }
7093 }
7094
7095 static void
7096 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7097 HOST_WIDE_INT words)
7098 {
7099 /* Otherwise, this should be passed indirect. */
7100 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7101
7102 cum->words += words;
7103 if (cum->nregs > 0)
7104 {
7105 cum->nregs -= 1;
7106 cum->regno += 1;
7107 }
7108 }
7109
7110 /* Update the data in CUM to advance over an argument of mode MODE and
7111 data type TYPE. (TYPE is null for libcalls where that information
7112 may not be available.) */
7113
7114 static void
7115 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
7116 const_tree type, bool named)
7117 {
7118 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7119 HOST_WIDE_INT bytes, words;
7120
7121 if (mode == BLKmode)
7122 bytes = int_size_in_bytes (type);
7123 else
7124 bytes = GET_MODE_SIZE (mode);
7125 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7126
7127 if (type)
7128 mode = type_natural_mode (type, NULL, false);
7129
7130 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7131 function_arg_advance_ms_64 (cum, bytes, words);
7132 else if (TARGET_64BIT)
7133 function_arg_advance_64 (cum, mode, type, words, named);
7134 else
7135 function_arg_advance_32 (cum, mode, type, bytes, words);
7136 }
7137
7138 /* Define where to put the arguments to a function.
7139 Value is zero to push the argument on the stack,
7140 or a hard register in which to store the argument.
7141
7142 MODE is the argument's machine mode.
7143 TYPE is the data type of the argument (as a tree).
7144 This is null for libcalls where that information may
7145 not be available.
7146 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7147 the preceding args and about the function being called.
7148 NAMED is nonzero if this argument is a named parameter
7149 (otherwise it is an extra parameter matching an ellipsis). */
7150
7151 static rtx
7152 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7153 enum machine_mode orig_mode, const_tree type,
7154 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7155 {
7156 /* Avoid the AL settings for the Unix64 ABI. */
7157 if (mode == VOIDmode)
7158 return constm1_rtx;
7159
7160 switch (mode)
7161 {
7162 default:
7163 break;
7164
7165 case BLKmode:
7166 if (bytes < 0)
7167 break;
7168 /* FALLTHRU */
7169 case DImode:
7170 case SImode:
7171 case HImode:
7172 case QImode:
7173 if (words <= cum->nregs)
7174 {
7175 int regno = cum->regno;
7176
7177 /* Fastcall allocates the first two DWORD (SImode) or
7178 smaller arguments to ECX and EDX if it isn't an
7179 aggregate type . */
7180 if (cum->fastcall)
7181 {
7182 if (mode == BLKmode
7183 || mode == DImode
7184 || (type && AGGREGATE_TYPE_P (type)))
7185 break;
7186
7187 /* ECX not EAX is the first allocated register. */
7188 if (regno == AX_REG)
7189 regno = CX_REG;
7190 }
7191 return gen_rtx_REG (mode, regno);
7192 }
7193 break;
7194
7195 case DFmode:
7196 if (cum->float_in_sse < 2)
7197 break;
7198 case SFmode:
7199 if (cum->float_in_sse < 1)
7200 break;
7201 /* FALLTHRU */
7202 case TImode:
7203 /* In 32bit, we pass TImode in xmm registers. */
7204 case V16QImode:
7205 case V8HImode:
7206 case V4SImode:
7207 case V2DImode:
7208 case V4SFmode:
7209 case V2DFmode:
7210 if (!type || !AGGREGATE_TYPE_P (type))
7211 {
7212 if (cum->sse_nregs)
7213 return gen_reg_or_parallel (mode, orig_mode,
7214 cum->sse_regno + FIRST_SSE_REG);
7215 }
7216 break;
7217
7218 case OImode:
7219 case XImode:
7220 /* OImode and XImode shouldn't be used directly. */
7221 gcc_unreachable ();
7222
7223 case V64QImode:
7224 case V32HImode:
7225 case V16SImode:
7226 case V8DImode:
7227 case V16SFmode:
7228 case V8DFmode:
7229 case V8SFmode:
7230 case V8SImode:
7231 case V32QImode:
7232 case V16HImode:
7233 case V4DFmode:
7234 case V4DImode:
7235 if (!type || !AGGREGATE_TYPE_P (type))
7236 {
7237 if (cum->sse_nregs)
7238 return gen_reg_or_parallel (mode, orig_mode,
7239 cum->sse_regno + FIRST_SSE_REG);
7240 }
7241 break;
7242
7243 case V8QImode:
7244 case V4HImode:
7245 case V2SImode:
7246 case V2SFmode:
7247 case V1TImode:
7248 case V1DImode:
7249 if (!type || !AGGREGATE_TYPE_P (type))
7250 {
7251 if (cum->mmx_nregs)
7252 return gen_reg_or_parallel (mode, orig_mode,
7253 cum->mmx_regno + FIRST_MMX_REG);
7254 }
7255 break;
7256 }
7257
7258 return NULL_RTX;
7259 }
7260
7261 static rtx
7262 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7263 enum machine_mode orig_mode, const_tree type, bool named)
7264 {
7265 /* Handle a hidden AL argument containing number of registers
7266 for varargs x86-64 functions. */
7267 if (mode == VOIDmode)
7268 return GEN_INT (cum->maybe_vaarg
7269 ? (cum->sse_nregs < 0
7270 ? X86_64_SSE_REGPARM_MAX
7271 : cum->sse_regno)
7272 : -1);
7273
7274 switch (mode)
7275 {
7276 default:
7277 break;
7278
7279 case V8SFmode:
7280 case V8SImode:
7281 case V32QImode:
7282 case V16HImode:
7283 case V4DFmode:
7284 case V4DImode:
7285 case V16SFmode:
7286 case V16SImode:
7287 case V64QImode:
7288 case V32HImode:
7289 case V8DFmode:
7290 case V8DImode:
7291 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7292 if (!named)
7293 return NULL;
7294 break;
7295 }
7296
7297 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7298 cum->sse_nregs,
7299 &x86_64_int_parameter_registers [cum->regno],
7300 cum->sse_regno);
7301 }
7302
7303 static rtx
7304 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7305 enum machine_mode orig_mode, bool named,
7306 HOST_WIDE_INT bytes)
7307 {
7308 unsigned int regno;
7309
7310 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7311 We use value of -2 to specify that current function call is MSABI. */
7312 if (mode == VOIDmode)
7313 return GEN_INT (-2);
7314
7315 /* If we've run out of registers, it goes on the stack. */
7316 if (cum->nregs == 0)
7317 return NULL_RTX;
7318
7319 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7320
7321 /* Only floating point modes are passed in anything but integer regs. */
7322 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7323 {
7324 if (named)
7325 regno = cum->regno + FIRST_SSE_REG;
7326 else
7327 {
7328 rtx t1, t2;
7329
7330 /* Unnamed floating parameters are passed in both the
7331 SSE and integer registers. */
7332 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7333 t2 = gen_rtx_REG (mode, regno);
7334 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7335 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7336 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7337 }
7338 }
7339 /* Handle aggregated types passed in register. */
7340 if (orig_mode == BLKmode)
7341 {
7342 if (bytes > 0 && bytes <= 8)
7343 mode = (bytes > 4 ? DImode : SImode);
7344 if (mode == BLKmode)
7345 mode = DImode;
7346 }
7347
7348 return gen_reg_or_parallel (mode, orig_mode, regno);
7349 }
7350
7351 /* Return where to put the arguments to a function.
7352 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7353
7354 MODE is the argument's machine mode. TYPE is the data type of the
7355 argument. It is null for libcalls where that information may not be
7356 available. CUM gives information about the preceding args and about
7357 the function being called. NAMED is nonzero if this argument is a
7358 named parameter (otherwise it is an extra parameter matching an
7359 ellipsis). */
7360
7361 static rtx
7362 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
7363 const_tree type, bool named)
7364 {
7365 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7366 enum machine_mode mode = omode;
7367 HOST_WIDE_INT bytes, words;
7368 rtx arg;
7369
7370 if (mode == BLKmode)
7371 bytes = int_size_in_bytes (type);
7372 else
7373 bytes = GET_MODE_SIZE (mode);
7374 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7375
7376 /* To simplify the code below, represent vector types with a vector mode
7377 even if MMX/SSE are not active. */
7378 if (type && TREE_CODE (type) == VECTOR_TYPE)
7379 mode = type_natural_mode (type, cum, false);
7380
7381 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7382 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7383 else if (TARGET_64BIT)
7384 arg = function_arg_64 (cum, mode, omode, type, named);
7385 else
7386 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7387
7388 return arg;
7389 }
7390
7391 /* A C expression that indicates when an argument must be passed by
7392 reference. If nonzero for an argument, a copy of that argument is
7393 made in memory and a pointer to the argument is passed instead of
7394 the argument itself. The pointer is passed in whatever way is
7395 appropriate for passing a pointer to that type. */
7396
7397 static bool
7398 ix86_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
7399 const_tree type, bool named ATTRIBUTE_UNUSED)
7400 {
7401 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7402
7403 /* See Windows x64 Software Convention. */
7404 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7405 {
7406 int msize = (int) GET_MODE_SIZE (mode);
7407 if (type)
7408 {
7409 /* Arrays are passed by reference. */
7410 if (TREE_CODE (type) == ARRAY_TYPE)
7411 return true;
7412
7413 if (AGGREGATE_TYPE_P (type))
7414 {
7415 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7416 are passed by reference. */
7417 msize = int_size_in_bytes (type);
7418 }
7419 }
7420
7421 /* __m128 is passed by reference. */
7422 switch (msize) {
7423 case 1: case 2: case 4: case 8:
7424 break;
7425 default:
7426 return true;
7427 }
7428 }
7429 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7430 return 1;
7431
7432 return 0;
7433 }
7434
7435 /* Return true when TYPE should be 128bit aligned for 32bit argument
7436 passing ABI. XXX: This function is obsolete and is only used for
7437 checking psABI compatibility with previous versions of GCC. */
7438
7439 static bool
7440 ix86_compat_aligned_value_p (const_tree type)
7441 {
7442 enum machine_mode mode = TYPE_MODE (type);
7443 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7444 || mode == TDmode
7445 || mode == TFmode
7446 || mode == TCmode)
7447 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7448 return true;
7449 if (TYPE_ALIGN (type) < 128)
7450 return false;
7451
7452 if (AGGREGATE_TYPE_P (type))
7453 {
7454 /* Walk the aggregates recursively. */
7455 switch (TREE_CODE (type))
7456 {
7457 case RECORD_TYPE:
7458 case UNION_TYPE:
7459 case QUAL_UNION_TYPE:
7460 {
7461 tree field;
7462
7463 /* Walk all the structure fields. */
7464 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7465 {
7466 if (TREE_CODE (field) == FIELD_DECL
7467 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7468 return true;
7469 }
7470 break;
7471 }
7472
7473 case ARRAY_TYPE:
7474 /* Just for use if some languages passes arrays by value. */
7475 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7476 return true;
7477 break;
7478
7479 default:
7480 gcc_unreachable ();
7481 }
7482 }
7483 return false;
7484 }
7485
7486 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7487 XXX: This function is obsolete and is only used for checking psABI
7488 compatibility with previous versions of GCC. */
7489
7490 static unsigned int
7491 ix86_compat_function_arg_boundary (enum machine_mode mode,
7492 const_tree type, unsigned int align)
7493 {
7494 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7495 natural boundaries. */
7496 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7497 {
7498 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7499 make an exception for SSE modes since these require 128bit
7500 alignment.
7501
7502 The handling here differs from field_alignment. ICC aligns MMX
7503 arguments to 4 byte boundaries, while structure fields are aligned
7504 to 8 byte boundaries. */
7505 if (!type)
7506 {
7507 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7508 align = PARM_BOUNDARY;
7509 }
7510 else
7511 {
7512 if (!ix86_compat_aligned_value_p (type))
7513 align = PARM_BOUNDARY;
7514 }
7515 }
7516 if (align > BIGGEST_ALIGNMENT)
7517 align = BIGGEST_ALIGNMENT;
7518 return align;
7519 }
7520
7521 /* Return true when TYPE should be 128bit aligned for 32bit argument
7522 passing ABI. */
7523
7524 static bool
7525 ix86_contains_aligned_value_p (const_tree type)
7526 {
7527 enum machine_mode mode = TYPE_MODE (type);
7528
7529 if (mode == XFmode || mode == XCmode)
7530 return false;
7531
7532 if (TYPE_ALIGN (type) < 128)
7533 return false;
7534
7535 if (AGGREGATE_TYPE_P (type))
7536 {
7537 /* Walk the aggregates recursively. */
7538 switch (TREE_CODE (type))
7539 {
7540 case RECORD_TYPE:
7541 case UNION_TYPE:
7542 case QUAL_UNION_TYPE:
7543 {
7544 tree field;
7545
7546 /* Walk all the structure fields. */
7547 for (field = TYPE_FIELDS (type);
7548 field;
7549 field = DECL_CHAIN (field))
7550 {
7551 if (TREE_CODE (field) == FIELD_DECL
7552 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7553 return true;
7554 }
7555 break;
7556 }
7557
7558 case ARRAY_TYPE:
7559 /* Just for use if some languages passes arrays by value. */
7560 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7561 return true;
7562 break;
7563
7564 default:
7565 gcc_unreachable ();
7566 }
7567 }
7568 else
7569 return TYPE_ALIGN (type) >= 128;
7570
7571 return false;
7572 }
7573
7574 /* Gives the alignment boundary, in bits, of an argument with the
7575 specified mode and type. */
7576
7577 static unsigned int
7578 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7579 {
7580 unsigned int align;
7581 if (type)
7582 {
7583 /* Since the main variant type is used for call, we convert it to
7584 the main variant type. */
7585 type = TYPE_MAIN_VARIANT (type);
7586 align = TYPE_ALIGN (type);
7587 }
7588 else
7589 align = GET_MODE_ALIGNMENT (mode);
7590 if (align < PARM_BOUNDARY)
7591 align = PARM_BOUNDARY;
7592 else
7593 {
7594 static bool warned;
7595 unsigned int saved_align = align;
7596
7597 if (!TARGET_64BIT)
7598 {
7599 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7600 if (!type)
7601 {
7602 if (mode == XFmode || mode == XCmode)
7603 align = PARM_BOUNDARY;
7604 }
7605 else if (!ix86_contains_aligned_value_p (type))
7606 align = PARM_BOUNDARY;
7607
7608 if (align < 128)
7609 align = PARM_BOUNDARY;
7610 }
7611
7612 if (warn_psabi
7613 && !warned
7614 && align != ix86_compat_function_arg_boundary (mode, type,
7615 saved_align))
7616 {
7617 warned = true;
7618 inform (input_location,
7619 "The ABI for passing parameters with %d-byte"
7620 " alignment has changed in GCC 4.6",
7621 align / BITS_PER_UNIT);
7622 }
7623 }
7624
7625 return align;
7626 }
7627
7628 /* Return true if N is a possible register number of function value. */
7629
7630 static bool
7631 ix86_function_value_regno_p (const unsigned int regno)
7632 {
7633 switch (regno)
7634 {
7635 case AX_REG:
7636 case DX_REG:
7637 return true;
7638 case DI_REG:
7639 case SI_REG:
7640 return TARGET_64BIT && ix86_abi != MS_ABI;
7641
7642 /* Complex values are returned in %st(0)/%st(1) pair. */
7643 case ST0_REG:
7644 case ST1_REG:
7645 /* TODO: The function should depend on current function ABI but
7646 builtins.c would need updating then. Therefore we use the
7647 default ABI. */
7648 if (TARGET_64BIT && ix86_abi == MS_ABI)
7649 return false;
7650 return TARGET_FLOAT_RETURNS_IN_80387;
7651
7652 /* Complex values are returned in %xmm0/%xmm1 pair. */
7653 case XMM0_REG:
7654 case XMM1_REG:
7655 return TARGET_SSE;
7656
7657 case MM0_REG:
7658 if (TARGET_MACHO || TARGET_64BIT)
7659 return false;
7660 return TARGET_MMX;
7661 }
7662
7663 return false;
7664 }
7665
7666 /* Define how to find the value returned by a function.
7667 VALTYPE is the data type of the value (as a tree).
7668 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7669 otherwise, FUNC is 0. */
7670
7671 static rtx
7672 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7673 const_tree fntype, const_tree fn)
7674 {
7675 unsigned int regno;
7676
7677 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7678 we normally prevent this case when mmx is not available. However
7679 some ABIs may require the result to be returned like DImode. */
7680 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7681 regno = FIRST_MMX_REG;
7682
7683 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7684 we prevent this case when sse is not available. However some ABIs
7685 may require the result to be returned like integer TImode. */
7686 else if (mode == TImode
7687 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7688 regno = FIRST_SSE_REG;
7689
7690 /* 32-byte vector modes in %ymm0. */
7691 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7692 regno = FIRST_SSE_REG;
7693
7694 /* 64-byte vector modes in %zmm0. */
7695 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
7696 regno = FIRST_SSE_REG;
7697
7698 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7699 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7700 regno = FIRST_FLOAT_REG;
7701 else
7702 /* Most things go in %eax. */
7703 regno = AX_REG;
7704
7705 /* Override FP return register with %xmm0 for local functions when
7706 SSE math is enabled or for functions with sseregparm attribute. */
7707 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7708 {
7709 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7710 if ((sse_level >= 1 && mode == SFmode)
7711 || (sse_level == 2 && mode == DFmode))
7712 regno = FIRST_SSE_REG;
7713 }
7714
7715 /* OImode shouldn't be used directly. */
7716 gcc_assert (mode != OImode);
7717
7718 return gen_rtx_REG (orig_mode, regno);
7719 }
7720
7721 static rtx
7722 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7723 const_tree valtype)
7724 {
7725 rtx ret;
7726
7727 /* Handle libcalls, which don't provide a type node. */
7728 if (valtype == NULL)
7729 {
7730 unsigned int regno;
7731
7732 switch (mode)
7733 {
7734 case SFmode:
7735 case SCmode:
7736 case DFmode:
7737 case DCmode:
7738 case TFmode:
7739 case SDmode:
7740 case DDmode:
7741 case TDmode:
7742 regno = FIRST_SSE_REG;
7743 break;
7744 case XFmode:
7745 case XCmode:
7746 regno = FIRST_FLOAT_REG;
7747 break;
7748 case TCmode:
7749 return NULL;
7750 default:
7751 regno = AX_REG;
7752 }
7753
7754 return gen_rtx_REG (mode, regno);
7755 }
7756 else if (POINTER_TYPE_P (valtype))
7757 {
7758 /* Pointers are always returned in word_mode. */
7759 mode = word_mode;
7760 }
7761
7762 ret = construct_container (mode, orig_mode, valtype, 1,
7763 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7764 x86_64_int_return_registers, 0);
7765
7766 /* For zero sized structures, construct_container returns NULL, but we
7767 need to keep rest of compiler happy by returning meaningful value. */
7768 if (!ret)
7769 ret = gen_rtx_REG (orig_mode, AX_REG);
7770
7771 return ret;
7772 }
7773
7774 static rtx
7775 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode,
7776 const_tree valtype)
7777 {
7778 unsigned int regno = AX_REG;
7779
7780 if (TARGET_SSE)
7781 {
7782 switch (GET_MODE_SIZE (mode))
7783 {
7784 case 16:
7785 if (valtype != NULL_TREE
7786 && !VECTOR_INTEGER_TYPE_P (valtype)
7787 && !VECTOR_INTEGER_TYPE_P (valtype)
7788 && !INTEGRAL_TYPE_P (valtype)
7789 && !VECTOR_FLOAT_TYPE_P (valtype))
7790 break;
7791 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7792 && !COMPLEX_MODE_P (mode))
7793 regno = FIRST_SSE_REG;
7794 break;
7795 case 8:
7796 case 4:
7797 if (mode == SFmode || mode == DFmode)
7798 regno = FIRST_SSE_REG;
7799 break;
7800 default:
7801 break;
7802 }
7803 }
7804 return gen_rtx_REG (orig_mode, regno);
7805 }
7806
7807 static rtx
7808 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7809 enum machine_mode orig_mode, enum machine_mode mode)
7810 {
7811 const_tree fn, fntype;
7812
7813 fn = NULL_TREE;
7814 if (fntype_or_decl && DECL_P (fntype_or_decl))
7815 fn = fntype_or_decl;
7816 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7817
7818 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7819 return function_value_ms_64 (orig_mode, mode, valtype);
7820 else if (TARGET_64BIT)
7821 return function_value_64 (orig_mode, mode, valtype);
7822 else
7823 return function_value_32 (orig_mode, mode, fntype, fn);
7824 }
7825
7826 static rtx
7827 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7828 bool outgoing ATTRIBUTE_UNUSED)
7829 {
7830 enum machine_mode mode, orig_mode;
7831
7832 orig_mode = TYPE_MODE (valtype);
7833 mode = type_natural_mode (valtype, NULL, true);
7834 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7835 }
7836
7837 /* Pointer function arguments and return values are promoted to
7838 word_mode. */
7839
7840 static enum machine_mode
7841 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
7842 int *punsignedp, const_tree fntype,
7843 int for_return)
7844 {
7845 if (type != NULL_TREE && POINTER_TYPE_P (type))
7846 {
7847 *punsignedp = POINTERS_EXTEND_UNSIGNED;
7848 return word_mode;
7849 }
7850 return default_promote_function_mode (type, mode, punsignedp, fntype,
7851 for_return);
7852 }
7853
7854 /* Return true if a structure, union or array with MODE containing FIELD
7855 should be accessed using BLKmode. */
7856
7857 static bool
7858 ix86_member_type_forces_blk (const_tree field, enum machine_mode mode)
7859 {
7860 /* Union with XFmode must be in BLKmode. */
7861 return (mode == XFmode
7862 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
7863 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
7864 }
7865
7866 rtx
7867 ix86_libcall_value (enum machine_mode mode)
7868 {
7869 return ix86_function_value_1 (NULL, NULL, mode, mode);
7870 }
7871
7872 /* Return true iff type is returned in memory. */
7873
7874 static bool ATTRIBUTE_UNUSED
7875 return_in_memory_32 (const_tree type, enum machine_mode mode)
7876 {
7877 HOST_WIDE_INT size;
7878
7879 if (mode == BLKmode)
7880 return true;
7881
7882 size = int_size_in_bytes (type);
7883
7884 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
7885 return false;
7886
7887 if (VECTOR_MODE_P (mode) || mode == TImode)
7888 {
7889 /* User-created vectors small enough to fit in EAX. */
7890 if (size < 8)
7891 return false;
7892
7893 /* MMX/3dNow values are returned in MM0,
7894 except when it doesn't exits or the ABI prescribes otherwise. */
7895 if (size == 8)
7896 return !TARGET_MMX || TARGET_VECT8_RETURNS;
7897
7898 /* SSE values are returned in XMM0, except when it doesn't exist. */
7899 if (size == 16)
7900 return !TARGET_SSE;
7901
7902 /* AVX values are returned in YMM0, except when it doesn't exist. */
7903 if (size == 32)
7904 return !TARGET_AVX;
7905
7906 /* AVX512F values are returned in ZMM0, except when it doesn't exist. */
7907 if (size == 64)
7908 return !TARGET_AVX512F;
7909 }
7910
7911 if (mode == XFmode)
7912 return false;
7913
7914 if (size > 12)
7915 return true;
7916
7917 /* OImode shouldn't be used directly. */
7918 gcc_assert (mode != OImode);
7919
7920 return false;
7921 }
7922
7923 static bool ATTRIBUTE_UNUSED
7924 return_in_memory_64 (const_tree type, enum machine_mode mode)
7925 {
7926 int needed_intregs, needed_sseregs;
7927 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
7928 }
7929
7930 static bool ATTRIBUTE_UNUSED
7931 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
7932 {
7933 HOST_WIDE_INT size = int_size_in_bytes (type);
7934
7935 /* __m128 is returned in xmm0. */
7936 if ((!type || VECTOR_INTEGER_TYPE_P (type) || INTEGRAL_TYPE_P (type)
7937 || VECTOR_FLOAT_TYPE_P (type))
7938 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7939 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
7940 return false;
7941
7942 /* Otherwise, the size must be exactly in [1248]. */
7943 return size != 1 && size != 2 && size != 4 && size != 8;
7944 }
7945
7946 static bool
7947 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7948 {
7949 #ifdef SUBTARGET_RETURN_IN_MEMORY
7950 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
7951 #else
7952 const enum machine_mode mode = type_natural_mode (type, NULL, true);
7953
7954 if (TARGET_64BIT)
7955 {
7956 if (ix86_function_type_abi (fntype) == MS_ABI)
7957 return return_in_memory_ms_64 (type, mode);
7958 else
7959 return return_in_memory_64 (type, mode);
7960 }
7961 else
7962 return return_in_memory_32 (type, mode);
7963 #endif
7964 }
7965
7966 \f
7967 /* Create the va_list data type. */
7968
7969 /* Returns the calling convention specific va_list date type.
7970 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7971
7972 static tree
7973 ix86_build_builtin_va_list_abi (enum calling_abi abi)
7974 {
7975 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
7976
7977 /* For i386 we use plain pointer to argument area. */
7978 if (!TARGET_64BIT || abi == MS_ABI)
7979 return build_pointer_type (char_type_node);
7980
7981 record = lang_hooks.types.make_type (RECORD_TYPE);
7982 type_decl = build_decl (BUILTINS_LOCATION,
7983 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7984
7985 f_gpr = build_decl (BUILTINS_LOCATION,
7986 FIELD_DECL, get_identifier ("gp_offset"),
7987 unsigned_type_node);
7988 f_fpr = build_decl (BUILTINS_LOCATION,
7989 FIELD_DECL, get_identifier ("fp_offset"),
7990 unsigned_type_node);
7991 f_ovf = build_decl (BUILTINS_LOCATION,
7992 FIELD_DECL, get_identifier ("overflow_arg_area"),
7993 ptr_type_node);
7994 f_sav = build_decl (BUILTINS_LOCATION,
7995 FIELD_DECL, get_identifier ("reg_save_area"),
7996 ptr_type_node);
7997
7998 va_list_gpr_counter_field = f_gpr;
7999 va_list_fpr_counter_field = f_fpr;
8000
8001 DECL_FIELD_CONTEXT (f_gpr) = record;
8002 DECL_FIELD_CONTEXT (f_fpr) = record;
8003 DECL_FIELD_CONTEXT (f_ovf) = record;
8004 DECL_FIELD_CONTEXT (f_sav) = record;
8005
8006 TYPE_STUB_DECL (record) = type_decl;
8007 TYPE_NAME (record) = type_decl;
8008 TYPE_FIELDS (record) = f_gpr;
8009 DECL_CHAIN (f_gpr) = f_fpr;
8010 DECL_CHAIN (f_fpr) = f_ovf;
8011 DECL_CHAIN (f_ovf) = f_sav;
8012
8013 layout_type (record);
8014
8015 /* The correct type is an array type of one element. */
8016 return build_array_type (record, build_index_type (size_zero_node));
8017 }
8018
8019 /* Setup the builtin va_list data type and for 64-bit the additional
8020 calling convention specific va_list data types. */
8021
8022 static tree
8023 ix86_build_builtin_va_list (void)
8024 {
8025 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8026
8027 /* Initialize abi specific va_list builtin types. */
8028 if (TARGET_64BIT)
8029 {
8030 tree t;
8031 if (ix86_abi == MS_ABI)
8032 {
8033 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8034 if (TREE_CODE (t) != RECORD_TYPE)
8035 t = build_variant_type_copy (t);
8036 sysv_va_list_type_node = t;
8037 }
8038 else
8039 {
8040 t = ret;
8041 if (TREE_CODE (t) != RECORD_TYPE)
8042 t = build_variant_type_copy (t);
8043 sysv_va_list_type_node = t;
8044 }
8045 if (ix86_abi != MS_ABI)
8046 {
8047 t = ix86_build_builtin_va_list_abi (MS_ABI);
8048 if (TREE_CODE (t) != RECORD_TYPE)
8049 t = build_variant_type_copy (t);
8050 ms_va_list_type_node = t;
8051 }
8052 else
8053 {
8054 t = ret;
8055 if (TREE_CODE (t) != RECORD_TYPE)
8056 t = build_variant_type_copy (t);
8057 ms_va_list_type_node = t;
8058 }
8059 }
8060
8061 return ret;
8062 }
8063
8064 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8065
8066 static void
8067 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8068 {
8069 rtx save_area, mem;
8070 alias_set_type set;
8071 int i, max;
8072
8073 /* GPR size of varargs save area. */
8074 if (cfun->va_list_gpr_size)
8075 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8076 else
8077 ix86_varargs_gpr_size = 0;
8078
8079 /* FPR size of varargs save area. We don't need it if we don't pass
8080 anything in SSE registers. */
8081 if (TARGET_SSE && cfun->va_list_fpr_size)
8082 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8083 else
8084 ix86_varargs_fpr_size = 0;
8085
8086 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8087 return;
8088
8089 save_area = frame_pointer_rtx;
8090 set = get_varargs_alias_set ();
8091
8092 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8093 if (max > X86_64_REGPARM_MAX)
8094 max = X86_64_REGPARM_MAX;
8095
8096 for (i = cum->regno; i < max; i++)
8097 {
8098 mem = gen_rtx_MEM (word_mode,
8099 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8100 MEM_NOTRAP_P (mem) = 1;
8101 set_mem_alias_set (mem, set);
8102 emit_move_insn (mem,
8103 gen_rtx_REG (word_mode,
8104 x86_64_int_parameter_registers[i]));
8105 }
8106
8107 if (ix86_varargs_fpr_size)
8108 {
8109 enum machine_mode smode;
8110 rtx label, test;
8111
8112 /* Now emit code to save SSE registers. The AX parameter contains number
8113 of SSE parameter registers used to call this function, though all we
8114 actually check here is the zero/non-zero status. */
8115
8116 label = gen_label_rtx ();
8117 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8118 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8119 label));
8120
8121 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8122 we used movdqa (i.e. TImode) instead? Perhaps even better would
8123 be if we could determine the real mode of the data, via a hook
8124 into pass_stdarg. Ignore all that for now. */
8125 smode = V4SFmode;
8126 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8127 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8128
8129 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8130 if (max > X86_64_SSE_REGPARM_MAX)
8131 max = X86_64_SSE_REGPARM_MAX;
8132
8133 for (i = cum->sse_regno; i < max; ++i)
8134 {
8135 mem = plus_constant (Pmode, save_area,
8136 i * 16 + ix86_varargs_gpr_size);
8137 mem = gen_rtx_MEM (smode, mem);
8138 MEM_NOTRAP_P (mem) = 1;
8139 set_mem_alias_set (mem, set);
8140 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8141
8142 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8143 }
8144
8145 emit_label (label);
8146 }
8147 }
8148
8149 static void
8150 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8151 {
8152 alias_set_type set = get_varargs_alias_set ();
8153 int i;
8154
8155 /* Reset to zero, as there might be a sysv vaarg used
8156 before. */
8157 ix86_varargs_gpr_size = 0;
8158 ix86_varargs_fpr_size = 0;
8159
8160 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8161 {
8162 rtx reg, mem;
8163
8164 mem = gen_rtx_MEM (Pmode,
8165 plus_constant (Pmode, virtual_incoming_args_rtx,
8166 i * UNITS_PER_WORD));
8167 MEM_NOTRAP_P (mem) = 1;
8168 set_mem_alias_set (mem, set);
8169
8170 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8171 emit_move_insn (mem, reg);
8172 }
8173 }
8174
8175 static void
8176 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
8177 tree type, int *pretend_size ATTRIBUTE_UNUSED,
8178 int no_rtl)
8179 {
8180 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8181 CUMULATIVE_ARGS next_cum;
8182 tree fntype;
8183
8184 /* This argument doesn't appear to be used anymore. Which is good,
8185 because the old code here didn't suppress rtl generation. */
8186 gcc_assert (!no_rtl);
8187
8188 if (!TARGET_64BIT)
8189 return;
8190
8191 fntype = TREE_TYPE (current_function_decl);
8192
8193 /* For varargs, we do not want to skip the dummy va_dcl argument.
8194 For stdargs, we do want to skip the last named argument. */
8195 next_cum = *cum;
8196 if (stdarg_p (fntype))
8197 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8198 true);
8199
8200 if (cum->call_abi == MS_ABI)
8201 setup_incoming_varargs_ms_64 (&next_cum);
8202 else
8203 setup_incoming_varargs_64 (&next_cum);
8204 }
8205
8206 /* Checks if TYPE is of kind va_list char *. */
8207
8208 static bool
8209 is_va_list_char_pointer (tree type)
8210 {
8211 tree canonic;
8212
8213 /* For 32-bit it is always true. */
8214 if (!TARGET_64BIT)
8215 return true;
8216 canonic = ix86_canonical_va_list_type (type);
8217 return (canonic == ms_va_list_type_node
8218 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8219 }
8220
8221 /* Implement va_start. */
8222
8223 static void
8224 ix86_va_start (tree valist, rtx nextarg)
8225 {
8226 HOST_WIDE_INT words, n_gpr, n_fpr;
8227 tree f_gpr, f_fpr, f_ovf, f_sav;
8228 tree gpr, fpr, ovf, sav, t;
8229 tree type;
8230 rtx ovf_rtx;
8231
8232 if (flag_split_stack
8233 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8234 {
8235 unsigned int scratch_regno;
8236
8237 /* When we are splitting the stack, we can't refer to the stack
8238 arguments using internal_arg_pointer, because they may be on
8239 the old stack. The split stack prologue will arrange to
8240 leave a pointer to the old stack arguments in a scratch
8241 register, which we here copy to a pseudo-register. The split
8242 stack prologue can't set the pseudo-register directly because
8243 it (the prologue) runs before any registers have been saved. */
8244
8245 scratch_regno = split_stack_prologue_scratch_regno ();
8246 if (scratch_regno != INVALID_REGNUM)
8247 {
8248 rtx reg, seq;
8249
8250 reg = gen_reg_rtx (Pmode);
8251 cfun->machine->split_stack_varargs_pointer = reg;
8252
8253 start_sequence ();
8254 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8255 seq = get_insns ();
8256 end_sequence ();
8257
8258 push_topmost_sequence ();
8259 emit_insn_after (seq, entry_of_function ());
8260 pop_topmost_sequence ();
8261 }
8262 }
8263
8264 /* Only 64bit target needs something special. */
8265 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8266 {
8267 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8268 std_expand_builtin_va_start (valist, nextarg);
8269 else
8270 {
8271 rtx va_r, next;
8272
8273 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8274 next = expand_binop (ptr_mode, add_optab,
8275 cfun->machine->split_stack_varargs_pointer,
8276 crtl->args.arg_offset_rtx,
8277 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8278 convert_move (va_r, next, 0);
8279 }
8280 return;
8281 }
8282
8283 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8284 f_fpr = DECL_CHAIN (f_gpr);
8285 f_ovf = DECL_CHAIN (f_fpr);
8286 f_sav = DECL_CHAIN (f_ovf);
8287
8288 valist = build_simple_mem_ref (valist);
8289 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8290 /* The following should be folded into the MEM_REF offset. */
8291 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8292 f_gpr, NULL_TREE);
8293 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8294 f_fpr, NULL_TREE);
8295 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8296 f_ovf, NULL_TREE);
8297 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8298 f_sav, NULL_TREE);
8299
8300 /* Count number of gp and fp argument registers used. */
8301 words = crtl->args.info.words;
8302 n_gpr = crtl->args.info.regno;
8303 n_fpr = crtl->args.info.sse_regno;
8304
8305 if (cfun->va_list_gpr_size)
8306 {
8307 type = TREE_TYPE (gpr);
8308 t = build2 (MODIFY_EXPR, type,
8309 gpr, build_int_cst (type, n_gpr * 8));
8310 TREE_SIDE_EFFECTS (t) = 1;
8311 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8312 }
8313
8314 if (TARGET_SSE && cfun->va_list_fpr_size)
8315 {
8316 type = TREE_TYPE (fpr);
8317 t = build2 (MODIFY_EXPR, type, fpr,
8318 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8319 TREE_SIDE_EFFECTS (t) = 1;
8320 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8321 }
8322
8323 /* Find the overflow area. */
8324 type = TREE_TYPE (ovf);
8325 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8326 ovf_rtx = crtl->args.internal_arg_pointer;
8327 else
8328 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8329 t = make_tree (type, ovf_rtx);
8330 if (words != 0)
8331 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8332 t = build2 (MODIFY_EXPR, type, ovf, t);
8333 TREE_SIDE_EFFECTS (t) = 1;
8334 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8335
8336 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8337 {
8338 /* Find the register save area.
8339 Prologue of the function save it right above stack frame. */
8340 type = TREE_TYPE (sav);
8341 t = make_tree (type, frame_pointer_rtx);
8342 if (!ix86_varargs_gpr_size)
8343 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8344 t = build2 (MODIFY_EXPR, type, sav, t);
8345 TREE_SIDE_EFFECTS (t) = 1;
8346 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8347 }
8348 }
8349
8350 /* Implement va_arg. */
8351
8352 static tree
8353 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8354 gimple_seq *post_p)
8355 {
8356 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8357 tree f_gpr, f_fpr, f_ovf, f_sav;
8358 tree gpr, fpr, ovf, sav, t;
8359 int size, rsize;
8360 tree lab_false, lab_over = NULL_TREE;
8361 tree addr, t2;
8362 rtx container;
8363 int indirect_p = 0;
8364 tree ptrtype;
8365 enum machine_mode nat_mode;
8366 unsigned int arg_boundary;
8367
8368 /* Only 64bit target needs something special. */
8369 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8370 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8371
8372 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8373 f_fpr = DECL_CHAIN (f_gpr);
8374 f_ovf = DECL_CHAIN (f_fpr);
8375 f_sav = DECL_CHAIN (f_ovf);
8376
8377 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8378 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8379 valist = build_va_arg_indirect_ref (valist);
8380 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8381 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8382 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8383
8384 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8385 if (indirect_p)
8386 type = build_pointer_type (type);
8387 size = int_size_in_bytes (type);
8388 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8389
8390 nat_mode = type_natural_mode (type, NULL, false);
8391 switch (nat_mode)
8392 {
8393 case V8SFmode:
8394 case V8SImode:
8395 case V32QImode:
8396 case V16HImode:
8397 case V4DFmode:
8398 case V4DImode:
8399 case V16SFmode:
8400 case V16SImode:
8401 case V64QImode:
8402 case V32HImode:
8403 case V8DFmode:
8404 case V8DImode:
8405 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8406 if (!TARGET_64BIT_MS_ABI)
8407 {
8408 container = NULL;
8409 break;
8410 }
8411
8412 default:
8413 container = construct_container (nat_mode, TYPE_MODE (type),
8414 type, 0, X86_64_REGPARM_MAX,
8415 X86_64_SSE_REGPARM_MAX, intreg,
8416 0);
8417 break;
8418 }
8419
8420 /* Pull the value out of the saved registers. */
8421
8422 addr = create_tmp_var (ptr_type_node, "addr");
8423
8424 if (container)
8425 {
8426 int needed_intregs, needed_sseregs;
8427 bool need_temp;
8428 tree int_addr, sse_addr;
8429
8430 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8431 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8432
8433 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8434
8435 need_temp = (!REG_P (container)
8436 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8437 || TYPE_ALIGN (type) > 128));
8438
8439 /* In case we are passing structure, verify that it is consecutive block
8440 on the register save area. If not we need to do moves. */
8441 if (!need_temp && !REG_P (container))
8442 {
8443 /* Verify that all registers are strictly consecutive */
8444 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8445 {
8446 int i;
8447
8448 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8449 {
8450 rtx slot = XVECEXP (container, 0, i);
8451 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8452 || INTVAL (XEXP (slot, 1)) != i * 16)
8453 need_temp = 1;
8454 }
8455 }
8456 else
8457 {
8458 int i;
8459
8460 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8461 {
8462 rtx slot = XVECEXP (container, 0, i);
8463 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8464 || INTVAL (XEXP (slot, 1)) != i * 8)
8465 need_temp = 1;
8466 }
8467 }
8468 }
8469 if (!need_temp)
8470 {
8471 int_addr = addr;
8472 sse_addr = addr;
8473 }
8474 else
8475 {
8476 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8477 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8478 }
8479
8480 /* First ensure that we fit completely in registers. */
8481 if (needed_intregs)
8482 {
8483 t = build_int_cst (TREE_TYPE (gpr),
8484 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8485 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8486 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8487 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8488 gimplify_and_add (t, pre_p);
8489 }
8490 if (needed_sseregs)
8491 {
8492 t = build_int_cst (TREE_TYPE (fpr),
8493 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8494 + X86_64_REGPARM_MAX * 8);
8495 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8496 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8497 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8498 gimplify_and_add (t, pre_p);
8499 }
8500
8501 /* Compute index to start of area used for integer regs. */
8502 if (needed_intregs)
8503 {
8504 /* int_addr = gpr + sav; */
8505 t = fold_build_pointer_plus (sav, gpr);
8506 gimplify_assign (int_addr, t, pre_p);
8507 }
8508 if (needed_sseregs)
8509 {
8510 /* sse_addr = fpr + sav; */
8511 t = fold_build_pointer_plus (sav, fpr);
8512 gimplify_assign (sse_addr, t, pre_p);
8513 }
8514 if (need_temp)
8515 {
8516 int i, prev_size = 0;
8517 tree temp = create_tmp_var (type, "va_arg_tmp");
8518
8519 /* addr = &temp; */
8520 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8521 gimplify_assign (addr, t, pre_p);
8522
8523 for (i = 0; i < XVECLEN (container, 0); i++)
8524 {
8525 rtx slot = XVECEXP (container, 0, i);
8526 rtx reg = XEXP (slot, 0);
8527 enum machine_mode mode = GET_MODE (reg);
8528 tree piece_type;
8529 tree addr_type;
8530 tree daddr_type;
8531 tree src_addr, src;
8532 int src_offset;
8533 tree dest_addr, dest;
8534 int cur_size = GET_MODE_SIZE (mode);
8535
8536 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8537 prev_size = INTVAL (XEXP (slot, 1));
8538 if (prev_size + cur_size > size)
8539 {
8540 cur_size = size - prev_size;
8541 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8542 if (mode == BLKmode)
8543 mode = QImode;
8544 }
8545 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8546 if (mode == GET_MODE (reg))
8547 addr_type = build_pointer_type (piece_type);
8548 else
8549 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8550 true);
8551 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8552 true);
8553
8554 if (SSE_REGNO_P (REGNO (reg)))
8555 {
8556 src_addr = sse_addr;
8557 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8558 }
8559 else
8560 {
8561 src_addr = int_addr;
8562 src_offset = REGNO (reg) * 8;
8563 }
8564 src_addr = fold_convert (addr_type, src_addr);
8565 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
8566
8567 dest_addr = fold_convert (daddr_type, addr);
8568 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
8569 if (cur_size == GET_MODE_SIZE (mode))
8570 {
8571 src = build_va_arg_indirect_ref (src_addr);
8572 dest = build_va_arg_indirect_ref (dest_addr);
8573
8574 gimplify_assign (dest, src, pre_p);
8575 }
8576 else
8577 {
8578 tree copy
8579 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8580 3, dest_addr, src_addr,
8581 size_int (cur_size));
8582 gimplify_and_add (copy, pre_p);
8583 }
8584 prev_size += cur_size;
8585 }
8586 }
8587
8588 if (needed_intregs)
8589 {
8590 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8591 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8592 gimplify_assign (gpr, t, pre_p);
8593 }
8594
8595 if (needed_sseregs)
8596 {
8597 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8598 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8599 gimplify_assign (fpr, t, pre_p);
8600 }
8601
8602 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8603
8604 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8605 }
8606
8607 /* ... otherwise out of the overflow area. */
8608
8609 /* When we align parameter on stack for caller, if the parameter
8610 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8611 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8612 here with caller. */
8613 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8614 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8615 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8616
8617 /* Care for on-stack alignment if needed. */
8618 if (arg_boundary <= 64 || size == 0)
8619 t = ovf;
8620 else
8621 {
8622 HOST_WIDE_INT align = arg_boundary / 8;
8623 t = fold_build_pointer_plus_hwi (ovf, align - 1);
8624 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8625 build_int_cst (TREE_TYPE (t), -align));
8626 }
8627
8628 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8629 gimplify_assign (addr, t, pre_p);
8630
8631 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
8632 gimplify_assign (unshare_expr (ovf), t, pre_p);
8633
8634 if (container)
8635 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8636
8637 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8638 addr = fold_convert (ptrtype, addr);
8639
8640 if (indirect_p)
8641 addr = build_va_arg_indirect_ref (addr);
8642 return build_va_arg_indirect_ref (addr);
8643 }
8644 \f
8645 /* Return true if OPNUM's MEM should be matched
8646 in movabs* patterns. */
8647
8648 bool
8649 ix86_check_movabs (rtx insn, int opnum)
8650 {
8651 rtx set, mem;
8652
8653 set = PATTERN (insn);
8654 if (GET_CODE (set) == PARALLEL)
8655 set = XVECEXP (set, 0, 0);
8656 gcc_assert (GET_CODE (set) == SET);
8657 mem = XEXP (set, opnum);
8658 while (GET_CODE (mem) == SUBREG)
8659 mem = SUBREG_REG (mem);
8660 gcc_assert (MEM_P (mem));
8661 return volatile_ok || !MEM_VOLATILE_P (mem);
8662 }
8663 \f
8664 /* Initialize the table of extra 80387 mathematical constants. */
8665
8666 static void
8667 init_ext_80387_constants (void)
8668 {
8669 static const char * cst[5] =
8670 {
8671 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8672 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8673 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8674 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8675 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8676 };
8677 int i;
8678
8679 for (i = 0; i < 5; i++)
8680 {
8681 real_from_string (&ext_80387_constants_table[i], cst[i]);
8682 /* Ensure each constant is rounded to XFmode precision. */
8683 real_convert (&ext_80387_constants_table[i],
8684 XFmode, &ext_80387_constants_table[i]);
8685 }
8686
8687 ext_80387_constants_init = 1;
8688 }
8689
8690 /* Return non-zero if the constant is something that
8691 can be loaded with a special instruction. */
8692
8693 int
8694 standard_80387_constant_p (rtx x)
8695 {
8696 enum machine_mode mode = GET_MODE (x);
8697
8698 REAL_VALUE_TYPE r;
8699
8700 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8701 return -1;
8702
8703 if (x == CONST0_RTX (mode))
8704 return 1;
8705 if (x == CONST1_RTX (mode))
8706 return 2;
8707
8708 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8709
8710 /* For XFmode constants, try to find a special 80387 instruction when
8711 optimizing for size or on those CPUs that benefit from them. */
8712 if (mode == XFmode
8713 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8714 {
8715 int i;
8716
8717 if (! ext_80387_constants_init)
8718 init_ext_80387_constants ();
8719
8720 for (i = 0; i < 5; i++)
8721 if (real_identical (&r, &ext_80387_constants_table[i]))
8722 return i + 3;
8723 }
8724
8725 /* Load of the constant -0.0 or -1.0 will be split as
8726 fldz;fchs or fld1;fchs sequence. */
8727 if (real_isnegzero (&r))
8728 return 8;
8729 if (real_identical (&r, &dconstm1))
8730 return 9;
8731
8732 return 0;
8733 }
8734
8735 /* Return the opcode of the special instruction to be used to load
8736 the constant X. */
8737
8738 const char *
8739 standard_80387_constant_opcode (rtx x)
8740 {
8741 switch (standard_80387_constant_p (x))
8742 {
8743 case 1:
8744 return "fldz";
8745 case 2:
8746 return "fld1";
8747 case 3:
8748 return "fldlg2";
8749 case 4:
8750 return "fldln2";
8751 case 5:
8752 return "fldl2e";
8753 case 6:
8754 return "fldl2t";
8755 case 7:
8756 return "fldpi";
8757 case 8:
8758 case 9:
8759 return "#";
8760 default:
8761 gcc_unreachable ();
8762 }
8763 }
8764
8765 /* Return the CONST_DOUBLE representing the 80387 constant that is
8766 loaded by the specified special instruction. The argument IDX
8767 matches the return value from standard_80387_constant_p. */
8768
8769 rtx
8770 standard_80387_constant_rtx (int idx)
8771 {
8772 int i;
8773
8774 if (! ext_80387_constants_init)
8775 init_ext_80387_constants ();
8776
8777 switch (idx)
8778 {
8779 case 3:
8780 case 4:
8781 case 5:
8782 case 6:
8783 case 7:
8784 i = idx - 3;
8785 break;
8786
8787 default:
8788 gcc_unreachable ();
8789 }
8790
8791 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8792 XFmode);
8793 }
8794
8795 /* Return 1 if X is all 0s and 2 if x is all 1s
8796 in supported SSE/AVX vector mode. */
8797
8798 int
8799 standard_sse_constant_p (rtx x)
8800 {
8801 enum machine_mode mode = GET_MODE (x);
8802
8803 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8804 return 1;
8805 if (vector_all_ones_operand (x, mode))
8806 switch (mode)
8807 {
8808 case V16QImode:
8809 case V8HImode:
8810 case V4SImode:
8811 case V2DImode:
8812 if (TARGET_SSE2)
8813 return 2;
8814 case V32QImode:
8815 case V16HImode:
8816 case V8SImode:
8817 case V4DImode:
8818 if (TARGET_AVX2)
8819 return 2;
8820 case V64QImode:
8821 case V32HImode:
8822 case V16SImode:
8823 case V8DImode:
8824 if (TARGET_AVX512F)
8825 return 2;
8826 default:
8827 break;
8828 }
8829
8830 return 0;
8831 }
8832
8833 /* Return the opcode of the special instruction to be used to load
8834 the constant X. */
8835
8836 const char *
8837 standard_sse_constant_opcode (rtx insn, rtx x)
8838 {
8839 switch (standard_sse_constant_p (x))
8840 {
8841 case 1:
8842 switch (get_attr_mode (insn))
8843 {
8844 case MODE_XI:
8845 case MODE_V16SF:
8846 return "vpxord\t%g0, %g0, %g0";
8847 case MODE_V8DF:
8848 return "vpxorq\t%g0, %g0, %g0";
8849 case MODE_TI:
8850 return "%vpxor\t%0, %d0";
8851 case MODE_V2DF:
8852 return "%vxorpd\t%0, %d0";
8853 case MODE_V4SF:
8854 return "%vxorps\t%0, %d0";
8855
8856 case MODE_OI:
8857 return "vpxor\t%x0, %x0, %x0";
8858 case MODE_V4DF:
8859 return "vxorpd\t%x0, %x0, %x0";
8860 case MODE_V8SF:
8861 return "vxorps\t%x0, %x0, %x0";
8862
8863 default:
8864 break;
8865 }
8866
8867 case 2:
8868 if (get_attr_mode (insn) == MODE_XI
8869 || get_attr_mode (insn) == MODE_V8DF
8870 || get_attr_mode (insn) == MODE_V16SF)
8871 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
8872 if (TARGET_AVX)
8873 return "vpcmpeqd\t%0, %0, %0";
8874 else
8875 return "pcmpeqd\t%0, %0";
8876
8877 default:
8878 break;
8879 }
8880 gcc_unreachable ();
8881 }
8882
8883 /* Returns true if OP contains a symbol reference */
8884
8885 bool
8886 symbolic_reference_mentioned_p (rtx op)
8887 {
8888 const char *fmt;
8889 int i;
8890
8891 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
8892 return true;
8893
8894 fmt = GET_RTX_FORMAT (GET_CODE (op));
8895 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
8896 {
8897 if (fmt[i] == 'E')
8898 {
8899 int j;
8900
8901 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
8902 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
8903 return true;
8904 }
8905
8906 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
8907 return true;
8908 }
8909
8910 return false;
8911 }
8912
8913 /* Return true if it is appropriate to emit `ret' instructions in the
8914 body of a function. Do this only if the epilogue is simple, needing a
8915 couple of insns. Prior to reloading, we can't tell how many registers
8916 must be saved, so return false then. Return false if there is no frame
8917 marker to de-allocate. */
8918
8919 bool
8920 ix86_can_use_return_insn_p (void)
8921 {
8922 struct ix86_frame frame;
8923
8924 if (! reload_completed || frame_pointer_needed)
8925 return 0;
8926
8927 /* Don't allow more than 32k pop, since that's all we can do
8928 with one instruction. */
8929 if (crtl->args.pops_args && crtl->args.size >= 32768)
8930 return 0;
8931
8932 ix86_compute_frame_layout (&frame);
8933 return (frame.stack_pointer_offset == UNITS_PER_WORD
8934 && (frame.nregs + frame.nsseregs) == 0);
8935 }
8936 \f
8937 /* Value should be nonzero if functions must have frame pointers.
8938 Zero means the frame pointer need not be set up (and parms may
8939 be accessed via the stack pointer) in functions that seem suitable. */
8940
8941 static bool
8942 ix86_frame_pointer_required (void)
8943 {
8944 /* If we accessed previous frames, then the generated code expects
8945 to be able to access the saved ebp value in our frame. */
8946 if (cfun->machine->accesses_prev_frame)
8947 return true;
8948
8949 /* Several x86 os'es need a frame pointer for other reasons,
8950 usually pertaining to setjmp. */
8951 if (SUBTARGET_FRAME_POINTER_REQUIRED)
8952 return true;
8953
8954 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
8955 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
8956 return true;
8957
8958 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
8959 allocation is 4GB. */
8960 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
8961 return true;
8962
8963 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8964 turns off the frame pointer by default. Turn it back on now if
8965 we've not got a leaf function. */
8966 if (TARGET_OMIT_LEAF_FRAME_POINTER
8967 && (!crtl->is_leaf
8968 || ix86_current_function_calls_tls_descriptor))
8969 return true;
8970
8971 if (crtl->profile && !flag_fentry)
8972 return true;
8973
8974 return false;
8975 }
8976
8977 /* Record that the current function accesses previous call frames. */
8978
8979 void
8980 ix86_setup_frame_addresses (void)
8981 {
8982 cfun->machine->accesses_prev_frame = 1;
8983 }
8984 \f
8985 #ifndef USE_HIDDEN_LINKONCE
8986 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8987 # define USE_HIDDEN_LINKONCE 1
8988 # else
8989 # define USE_HIDDEN_LINKONCE 0
8990 # endif
8991 #endif
8992
8993 static int pic_labels_used;
8994
8995 /* Fills in the label name that should be used for a pc thunk for
8996 the given register. */
8997
8998 static void
8999 get_pc_thunk_name (char name[32], unsigned int regno)
9000 {
9001 gcc_assert (!TARGET_64BIT);
9002
9003 if (USE_HIDDEN_LINKONCE)
9004 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9005 else
9006 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9007 }
9008
9009
9010 /* This function generates code for -fpic that loads %ebx with
9011 the return address of the caller and then returns. */
9012
9013 static void
9014 ix86_code_end (void)
9015 {
9016 rtx xops[2];
9017 int regno;
9018
9019 for (regno = AX_REG; regno <= SP_REG; regno++)
9020 {
9021 char name[32];
9022 tree decl;
9023
9024 if (!(pic_labels_used & (1 << regno)))
9025 continue;
9026
9027 get_pc_thunk_name (name, regno);
9028
9029 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9030 get_identifier (name),
9031 build_function_type_list (void_type_node, NULL_TREE));
9032 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9033 NULL_TREE, void_type_node);
9034 TREE_PUBLIC (decl) = 1;
9035 TREE_STATIC (decl) = 1;
9036 DECL_IGNORED_P (decl) = 1;
9037
9038 #if TARGET_MACHO
9039 if (TARGET_MACHO)
9040 {
9041 switch_to_section (darwin_sections[text_coal_section]);
9042 fputs ("\t.weak_definition\t", asm_out_file);
9043 assemble_name (asm_out_file, name);
9044 fputs ("\n\t.private_extern\t", asm_out_file);
9045 assemble_name (asm_out_file, name);
9046 putc ('\n', asm_out_file);
9047 ASM_OUTPUT_LABEL (asm_out_file, name);
9048 DECL_WEAK (decl) = 1;
9049 }
9050 else
9051 #endif
9052 if (USE_HIDDEN_LINKONCE)
9053 {
9054 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
9055
9056 targetm.asm_out.unique_section (decl, 0);
9057 switch_to_section (get_named_section (decl, NULL, 0));
9058
9059 targetm.asm_out.globalize_label (asm_out_file, name);
9060 fputs ("\t.hidden\t", asm_out_file);
9061 assemble_name (asm_out_file, name);
9062 putc ('\n', asm_out_file);
9063 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9064 }
9065 else
9066 {
9067 switch_to_section (text_section);
9068 ASM_OUTPUT_LABEL (asm_out_file, name);
9069 }
9070
9071 DECL_INITIAL (decl) = make_node (BLOCK);
9072 current_function_decl = decl;
9073 init_function_start (decl);
9074 first_function_block_is_cold = false;
9075 /* Make sure unwind info is emitted for the thunk if needed. */
9076 final_start_function (emit_barrier (), asm_out_file, 1);
9077
9078 /* Pad stack IP move with 4 instructions (two NOPs count
9079 as one instruction). */
9080 if (TARGET_PAD_SHORT_FUNCTION)
9081 {
9082 int i = 8;
9083
9084 while (i--)
9085 fputs ("\tnop\n", asm_out_file);
9086 }
9087
9088 xops[0] = gen_rtx_REG (Pmode, regno);
9089 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9090 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9091 fputs ("\tret\n", asm_out_file);
9092 final_end_function ();
9093 init_insn_lengths ();
9094 free_after_compilation (cfun);
9095 set_cfun (NULL);
9096 current_function_decl = NULL;
9097 }
9098
9099 if (flag_split_stack)
9100 file_end_indicate_split_stack ();
9101 }
9102
9103 /* Emit code for the SET_GOT patterns. */
9104
9105 const char *
9106 output_set_got (rtx dest, rtx label)
9107 {
9108 rtx xops[3];
9109
9110 xops[0] = dest;
9111
9112 if (TARGET_VXWORKS_RTP && flag_pic)
9113 {
9114 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9115 xops[2] = gen_rtx_MEM (Pmode,
9116 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9117 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9118
9119 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9120 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9121 an unadorned address. */
9122 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9123 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9124 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9125 return "";
9126 }
9127
9128 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9129
9130 if (!flag_pic)
9131 {
9132 if (TARGET_MACHO)
9133 /* We don't need a pic base, we're not producing pic. */
9134 gcc_unreachable ();
9135
9136 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9137 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9138 targetm.asm_out.internal_label (asm_out_file, "L",
9139 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9140 }
9141 else
9142 {
9143 char name[32];
9144 get_pc_thunk_name (name, REGNO (dest));
9145 pic_labels_used |= 1 << REGNO (dest);
9146
9147 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9148 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9149 output_asm_insn ("call\t%X2", xops);
9150
9151 #if TARGET_MACHO
9152 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9153 This is what will be referenced by the Mach-O PIC subsystem. */
9154 if (machopic_should_output_picbase_label () || !label)
9155 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9156
9157 /* When we are restoring the pic base at the site of a nonlocal label,
9158 and we decided to emit the pic base above, we will still output a
9159 local label used for calculating the correction offset (even though
9160 the offset will be 0 in that case). */
9161 if (label)
9162 targetm.asm_out.internal_label (asm_out_file, "L",
9163 CODE_LABEL_NUMBER (label));
9164 #endif
9165 }
9166
9167 if (!TARGET_MACHO)
9168 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9169
9170 return "";
9171 }
9172
9173 /* Generate an "push" pattern for input ARG. */
9174
9175 static rtx
9176 gen_push (rtx arg)
9177 {
9178 struct machine_function *m = cfun->machine;
9179
9180 if (m->fs.cfa_reg == stack_pointer_rtx)
9181 m->fs.cfa_offset += UNITS_PER_WORD;
9182 m->fs.sp_offset += UNITS_PER_WORD;
9183
9184 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9185 arg = gen_rtx_REG (word_mode, REGNO (arg));
9186
9187 return gen_rtx_SET (VOIDmode,
9188 gen_rtx_MEM (word_mode,
9189 gen_rtx_PRE_DEC (Pmode,
9190 stack_pointer_rtx)),
9191 arg);
9192 }
9193
9194 /* Generate an "pop" pattern for input ARG. */
9195
9196 static rtx
9197 gen_pop (rtx arg)
9198 {
9199 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9200 arg = gen_rtx_REG (word_mode, REGNO (arg));
9201
9202 return gen_rtx_SET (VOIDmode,
9203 arg,
9204 gen_rtx_MEM (word_mode,
9205 gen_rtx_POST_INC (Pmode,
9206 stack_pointer_rtx)));
9207 }
9208
9209 /* Return >= 0 if there is an unused call-clobbered register available
9210 for the entire function. */
9211
9212 static unsigned int
9213 ix86_select_alt_pic_regnum (void)
9214 {
9215 if (crtl->is_leaf
9216 && !crtl->profile
9217 && !ix86_current_function_calls_tls_descriptor)
9218 {
9219 int i, drap;
9220 /* Can't use the same register for both PIC and DRAP. */
9221 if (crtl->drap_reg)
9222 drap = REGNO (crtl->drap_reg);
9223 else
9224 drap = -1;
9225 for (i = 2; i >= 0; --i)
9226 if (i != drap && !df_regs_ever_live_p (i))
9227 return i;
9228 }
9229
9230 return INVALID_REGNUM;
9231 }
9232
9233 /* Return TRUE if we need to save REGNO. */
9234
9235 static bool
9236 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9237 {
9238 if (pic_offset_table_rtx
9239 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
9240 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9241 || crtl->profile
9242 || crtl->calls_eh_return
9243 || crtl->uses_const_pool
9244 || cfun->has_nonlocal_label))
9245 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9246
9247 if (crtl->calls_eh_return && maybe_eh_return)
9248 {
9249 unsigned i;
9250 for (i = 0; ; i++)
9251 {
9252 unsigned test = EH_RETURN_DATA_REGNO (i);
9253 if (test == INVALID_REGNUM)
9254 break;
9255 if (test == regno)
9256 return true;
9257 }
9258 }
9259
9260 if (crtl->drap_reg
9261 && regno == REGNO (crtl->drap_reg)
9262 && !cfun->machine->no_drap_save_restore)
9263 return true;
9264
9265 return (df_regs_ever_live_p (regno)
9266 && !call_used_regs[regno]
9267 && !fixed_regs[regno]
9268 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9269 }
9270
9271 /* Return number of saved general prupose registers. */
9272
9273 static int
9274 ix86_nsaved_regs (void)
9275 {
9276 int nregs = 0;
9277 int regno;
9278
9279 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9280 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9281 nregs ++;
9282 return nregs;
9283 }
9284
9285 /* Return number of saved SSE registrers. */
9286
9287 static int
9288 ix86_nsaved_sseregs (void)
9289 {
9290 int nregs = 0;
9291 int regno;
9292
9293 if (!TARGET_64BIT_MS_ABI)
9294 return 0;
9295 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9296 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9297 nregs ++;
9298 return nregs;
9299 }
9300
9301 /* Given FROM and TO register numbers, say whether this elimination is
9302 allowed. If stack alignment is needed, we can only replace argument
9303 pointer with hard frame pointer, or replace frame pointer with stack
9304 pointer. Otherwise, frame pointer elimination is automatically
9305 handled and all other eliminations are valid. */
9306
9307 static bool
9308 ix86_can_eliminate (const int from, const int to)
9309 {
9310 if (stack_realign_fp)
9311 return ((from == ARG_POINTER_REGNUM
9312 && to == HARD_FRAME_POINTER_REGNUM)
9313 || (from == FRAME_POINTER_REGNUM
9314 && to == STACK_POINTER_REGNUM));
9315 else
9316 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9317 }
9318
9319 /* Return the offset between two registers, one to be eliminated, and the other
9320 its replacement, at the start of a routine. */
9321
9322 HOST_WIDE_INT
9323 ix86_initial_elimination_offset (int from, int to)
9324 {
9325 struct ix86_frame frame;
9326 ix86_compute_frame_layout (&frame);
9327
9328 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9329 return frame.hard_frame_pointer_offset;
9330 else if (from == FRAME_POINTER_REGNUM
9331 && to == HARD_FRAME_POINTER_REGNUM)
9332 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9333 else
9334 {
9335 gcc_assert (to == STACK_POINTER_REGNUM);
9336
9337 if (from == ARG_POINTER_REGNUM)
9338 return frame.stack_pointer_offset;
9339
9340 gcc_assert (from == FRAME_POINTER_REGNUM);
9341 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9342 }
9343 }
9344
9345 /* In a dynamically-aligned function, we can't know the offset from
9346 stack pointer to frame pointer, so we must ensure that setjmp
9347 eliminates fp against the hard fp (%ebp) rather than trying to
9348 index from %esp up to the top of the frame across a gap that is
9349 of unknown (at compile-time) size. */
9350 static rtx
9351 ix86_builtin_setjmp_frame_value (void)
9352 {
9353 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9354 }
9355
9356 /* When using -fsplit-stack, the allocation routines set a field in
9357 the TCB to the bottom of the stack plus this much space, measured
9358 in bytes. */
9359
9360 #define SPLIT_STACK_AVAILABLE 256
9361
9362 /* Fill structure ix86_frame about frame of currently computed function. */
9363
9364 static void
9365 ix86_compute_frame_layout (struct ix86_frame *frame)
9366 {
9367 unsigned HOST_WIDE_INT stack_alignment_needed;
9368 HOST_WIDE_INT offset;
9369 unsigned HOST_WIDE_INT preferred_alignment;
9370 HOST_WIDE_INT size = get_frame_size ();
9371 HOST_WIDE_INT to_allocate;
9372
9373 frame->nregs = ix86_nsaved_regs ();
9374 frame->nsseregs = ix86_nsaved_sseregs ();
9375
9376 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9377 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9378
9379 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9380 function prologues and leaf. */
9381 if ((TARGET_64BIT_MS_ABI && preferred_alignment < 16)
9382 && (!crtl->is_leaf || cfun->calls_alloca != 0
9383 || ix86_current_function_calls_tls_descriptor))
9384 {
9385 preferred_alignment = 16;
9386 stack_alignment_needed = 16;
9387 crtl->preferred_stack_boundary = 128;
9388 crtl->stack_alignment_needed = 128;
9389 }
9390
9391 gcc_assert (!size || stack_alignment_needed);
9392 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9393 gcc_assert (preferred_alignment <= stack_alignment_needed);
9394
9395 /* For SEH we have to limit the amount of code movement into the prologue.
9396 At present we do this via a BLOCKAGE, at which point there's very little
9397 scheduling that can be done, which means that there's very little point
9398 in doing anything except PUSHs. */
9399 if (TARGET_SEH)
9400 cfun->machine->use_fast_prologue_epilogue = false;
9401
9402 /* During reload iteration the amount of registers saved can change.
9403 Recompute the value as needed. Do not recompute when amount of registers
9404 didn't change as reload does multiple calls to the function and does not
9405 expect the decision to change within single iteration. */
9406 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
9407 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9408 {
9409 int count = frame->nregs;
9410 struct cgraph_node *node = cgraph_get_node (current_function_decl);
9411
9412 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9413
9414 /* The fast prologue uses move instead of push to save registers. This
9415 is significantly longer, but also executes faster as modern hardware
9416 can execute the moves in parallel, but can't do that for push/pop.
9417
9418 Be careful about choosing what prologue to emit: When function takes
9419 many instructions to execute we may use slow version as well as in
9420 case function is known to be outside hot spot (this is known with
9421 feedback only). Weight the size of function by number of registers
9422 to save as it is cheap to use one or two push instructions but very
9423 slow to use many of them. */
9424 if (count)
9425 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9426 if (node->frequency < NODE_FREQUENCY_NORMAL
9427 || (flag_branch_probabilities
9428 && node->frequency < NODE_FREQUENCY_HOT))
9429 cfun->machine->use_fast_prologue_epilogue = false;
9430 else
9431 cfun->machine->use_fast_prologue_epilogue
9432 = !expensive_function_p (count);
9433 }
9434
9435 frame->save_regs_using_mov
9436 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
9437 /* If static stack checking is enabled and done with probes,
9438 the registers need to be saved before allocating the frame. */
9439 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
9440
9441 /* Skip return address. */
9442 offset = UNITS_PER_WORD;
9443
9444 /* Skip pushed static chain. */
9445 if (ix86_static_chain_on_stack)
9446 offset += UNITS_PER_WORD;
9447
9448 /* Skip saved base pointer. */
9449 if (frame_pointer_needed)
9450 offset += UNITS_PER_WORD;
9451 frame->hfp_save_offset = offset;
9452
9453 /* The traditional frame pointer location is at the top of the frame. */
9454 frame->hard_frame_pointer_offset = offset;
9455
9456 /* Register save area */
9457 offset += frame->nregs * UNITS_PER_WORD;
9458 frame->reg_save_offset = offset;
9459
9460 /* On SEH target, registers are pushed just before the frame pointer
9461 location. */
9462 if (TARGET_SEH)
9463 frame->hard_frame_pointer_offset = offset;
9464
9465 /* Align and set SSE register save area. */
9466 if (frame->nsseregs)
9467 {
9468 /* The only ABI that has saved SSE registers (Win64) also has a
9469 16-byte aligned default stack, and thus we don't need to be
9470 within the re-aligned local stack frame to save them. */
9471 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9472 offset = (offset + 16 - 1) & -16;
9473 offset += frame->nsseregs * 16;
9474 }
9475 frame->sse_reg_save_offset = offset;
9476
9477 /* The re-aligned stack starts here. Values before this point are not
9478 directly comparable with values below this point. In order to make
9479 sure that no value happens to be the same before and after, force
9480 the alignment computation below to add a non-zero value. */
9481 if (stack_realign_fp)
9482 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9483
9484 /* Va-arg area */
9485 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9486 offset += frame->va_arg_size;
9487
9488 /* Align start of frame for local function. */
9489 if (stack_realign_fp
9490 || offset != frame->sse_reg_save_offset
9491 || size != 0
9492 || !crtl->is_leaf
9493 || cfun->calls_alloca
9494 || ix86_current_function_calls_tls_descriptor)
9495 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9496
9497 /* Frame pointer points here. */
9498 frame->frame_pointer_offset = offset;
9499
9500 offset += size;
9501
9502 /* Add outgoing arguments area. Can be skipped if we eliminated
9503 all the function calls as dead code.
9504 Skipping is however impossible when function calls alloca. Alloca
9505 expander assumes that last crtl->outgoing_args_size
9506 of stack frame are unused. */
9507 if (ACCUMULATE_OUTGOING_ARGS
9508 && (!crtl->is_leaf || cfun->calls_alloca
9509 || ix86_current_function_calls_tls_descriptor))
9510 {
9511 offset += crtl->outgoing_args_size;
9512 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9513 }
9514 else
9515 frame->outgoing_arguments_size = 0;
9516
9517 /* Align stack boundary. Only needed if we're calling another function
9518 or using alloca. */
9519 if (!crtl->is_leaf || cfun->calls_alloca
9520 || ix86_current_function_calls_tls_descriptor)
9521 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9522
9523 /* We've reached end of stack frame. */
9524 frame->stack_pointer_offset = offset;
9525
9526 /* Size prologue needs to allocate. */
9527 to_allocate = offset - frame->sse_reg_save_offset;
9528
9529 if ((!to_allocate && frame->nregs <= 1)
9530 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9531 frame->save_regs_using_mov = false;
9532
9533 if (ix86_using_red_zone ()
9534 && crtl->sp_is_unchanging
9535 && crtl->is_leaf
9536 && !ix86_current_function_calls_tls_descriptor)
9537 {
9538 frame->red_zone_size = to_allocate;
9539 if (frame->save_regs_using_mov)
9540 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9541 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9542 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9543 }
9544 else
9545 frame->red_zone_size = 0;
9546 frame->stack_pointer_offset -= frame->red_zone_size;
9547
9548 /* The SEH frame pointer location is near the bottom of the frame.
9549 This is enforced by the fact that the difference between the
9550 stack pointer and the frame pointer is limited to 240 bytes in
9551 the unwind data structure. */
9552 if (TARGET_SEH)
9553 {
9554 HOST_WIDE_INT diff;
9555
9556 /* If we can leave the frame pointer where it is, do so. Also, returns
9557 the establisher frame for __builtin_frame_address (0). */
9558 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9559 if (diff <= SEH_MAX_FRAME_SIZE
9560 && (diff > 240 || (diff & 15) != 0)
9561 && !crtl->accesses_prior_frames)
9562 {
9563 /* Ideally we'd determine what portion of the local stack frame
9564 (within the constraint of the lowest 240) is most heavily used.
9565 But without that complication, simply bias the frame pointer
9566 by 128 bytes so as to maximize the amount of the local stack
9567 frame that is addressable with 8-bit offsets. */
9568 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9569 }
9570 }
9571 }
9572
9573 /* This is semi-inlined memory_address_length, but simplified
9574 since we know that we're always dealing with reg+offset, and
9575 to avoid having to create and discard all that rtl. */
9576
9577 static inline int
9578 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9579 {
9580 int len = 4;
9581
9582 if (offset == 0)
9583 {
9584 /* EBP and R13 cannot be encoded without an offset. */
9585 len = (regno == BP_REG || regno == R13_REG);
9586 }
9587 else if (IN_RANGE (offset, -128, 127))
9588 len = 1;
9589
9590 /* ESP and R12 must be encoded with a SIB byte. */
9591 if (regno == SP_REG || regno == R12_REG)
9592 len++;
9593
9594 return len;
9595 }
9596
9597 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9598 The valid base registers are taken from CFUN->MACHINE->FS. */
9599
9600 static rtx
9601 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9602 {
9603 const struct machine_function *m = cfun->machine;
9604 rtx base_reg = NULL;
9605 HOST_WIDE_INT base_offset = 0;
9606
9607 if (m->use_fast_prologue_epilogue)
9608 {
9609 /* Choose the base register most likely to allow the most scheduling
9610 opportunities. Generally FP is valid throughout the function,
9611 while DRAP must be reloaded within the epilogue. But choose either
9612 over the SP due to increased encoding size. */
9613
9614 if (m->fs.fp_valid)
9615 {
9616 base_reg = hard_frame_pointer_rtx;
9617 base_offset = m->fs.fp_offset - cfa_offset;
9618 }
9619 else if (m->fs.drap_valid)
9620 {
9621 base_reg = crtl->drap_reg;
9622 base_offset = 0 - cfa_offset;
9623 }
9624 else if (m->fs.sp_valid)
9625 {
9626 base_reg = stack_pointer_rtx;
9627 base_offset = m->fs.sp_offset - cfa_offset;
9628 }
9629 }
9630 else
9631 {
9632 HOST_WIDE_INT toffset;
9633 int len = 16, tlen;
9634
9635 /* Choose the base register with the smallest address encoding.
9636 With a tie, choose FP > DRAP > SP. */
9637 if (m->fs.sp_valid)
9638 {
9639 base_reg = stack_pointer_rtx;
9640 base_offset = m->fs.sp_offset - cfa_offset;
9641 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9642 }
9643 if (m->fs.drap_valid)
9644 {
9645 toffset = 0 - cfa_offset;
9646 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9647 if (tlen <= len)
9648 {
9649 base_reg = crtl->drap_reg;
9650 base_offset = toffset;
9651 len = tlen;
9652 }
9653 }
9654 if (m->fs.fp_valid)
9655 {
9656 toffset = m->fs.fp_offset - cfa_offset;
9657 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9658 if (tlen <= len)
9659 {
9660 base_reg = hard_frame_pointer_rtx;
9661 base_offset = toffset;
9662 len = tlen;
9663 }
9664 }
9665 }
9666 gcc_assert (base_reg != NULL);
9667
9668 return plus_constant (Pmode, base_reg, base_offset);
9669 }
9670
9671 /* Emit code to save registers in the prologue. */
9672
9673 static void
9674 ix86_emit_save_regs (void)
9675 {
9676 unsigned int regno;
9677 rtx insn;
9678
9679 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9680 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9681 {
9682 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
9683 RTX_FRAME_RELATED_P (insn) = 1;
9684 }
9685 }
9686
9687 /* Emit a single register save at CFA - CFA_OFFSET. */
9688
9689 static void
9690 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9691 HOST_WIDE_INT cfa_offset)
9692 {
9693 struct machine_function *m = cfun->machine;
9694 rtx reg = gen_rtx_REG (mode, regno);
9695 rtx mem, addr, base, insn;
9696
9697 addr = choose_baseaddr (cfa_offset);
9698 mem = gen_frame_mem (mode, addr);
9699
9700 /* For SSE saves, we need to indicate the 128-bit alignment. */
9701 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9702
9703 insn = emit_move_insn (mem, reg);
9704 RTX_FRAME_RELATED_P (insn) = 1;
9705
9706 base = addr;
9707 if (GET_CODE (base) == PLUS)
9708 base = XEXP (base, 0);
9709 gcc_checking_assert (REG_P (base));
9710
9711 /* When saving registers into a re-aligned local stack frame, avoid
9712 any tricky guessing by dwarf2out. */
9713 if (m->fs.realigned)
9714 {
9715 gcc_checking_assert (stack_realign_drap);
9716
9717 if (regno == REGNO (crtl->drap_reg))
9718 {
9719 /* A bit of a hack. We force the DRAP register to be saved in
9720 the re-aligned stack frame, which provides us with a copy
9721 of the CFA that will last past the prologue. Install it. */
9722 gcc_checking_assert (cfun->machine->fs.fp_valid);
9723 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9724 cfun->machine->fs.fp_offset - cfa_offset);
9725 mem = gen_rtx_MEM (mode, addr);
9726 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9727 }
9728 else
9729 {
9730 /* The frame pointer is a stable reference within the
9731 aligned frame. Use it. */
9732 gcc_checking_assert (cfun->machine->fs.fp_valid);
9733 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9734 cfun->machine->fs.fp_offset - cfa_offset);
9735 mem = gen_rtx_MEM (mode, addr);
9736 add_reg_note (insn, REG_CFA_EXPRESSION,
9737 gen_rtx_SET (VOIDmode, mem, reg));
9738 }
9739 }
9740
9741 /* The memory may not be relative to the current CFA register,
9742 which means that we may need to generate a new pattern for
9743 use by the unwind info. */
9744 else if (base != m->fs.cfa_reg)
9745 {
9746 addr = plus_constant (Pmode, m->fs.cfa_reg,
9747 m->fs.cfa_offset - cfa_offset);
9748 mem = gen_rtx_MEM (mode, addr);
9749 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9750 }
9751 }
9752
9753 /* Emit code to save registers using MOV insns.
9754 First register is stored at CFA - CFA_OFFSET. */
9755 static void
9756 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9757 {
9758 unsigned int regno;
9759
9760 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9761 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9762 {
9763 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
9764 cfa_offset -= UNITS_PER_WORD;
9765 }
9766 }
9767
9768 /* Emit code to save SSE registers using MOV insns.
9769 First register is stored at CFA - CFA_OFFSET. */
9770 static void
9771 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9772 {
9773 unsigned int regno;
9774
9775 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9776 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9777 {
9778 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9779 cfa_offset -= 16;
9780 }
9781 }
9782
9783 static GTY(()) rtx queued_cfa_restores;
9784
9785 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9786 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9787 Don't add the note if the previously saved value will be left untouched
9788 within stack red-zone till return, as unwinders can find the same value
9789 in the register and on the stack. */
9790
9791 static void
9792 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9793 {
9794 if (!crtl->shrink_wrapped
9795 && cfa_offset <= cfun->machine->fs.red_zone_offset)
9796 return;
9797
9798 if (insn)
9799 {
9800 add_reg_note (insn, REG_CFA_RESTORE, reg);
9801 RTX_FRAME_RELATED_P (insn) = 1;
9802 }
9803 else
9804 queued_cfa_restores
9805 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9806 }
9807
9808 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9809
9810 static void
9811 ix86_add_queued_cfa_restore_notes (rtx insn)
9812 {
9813 rtx last;
9814 if (!queued_cfa_restores)
9815 return;
9816 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9817 ;
9818 XEXP (last, 1) = REG_NOTES (insn);
9819 REG_NOTES (insn) = queued_cfa_restores;
9820 queued_cfa_restores = NULL_RTX;
9821 RTX_FRAME_RELATED_P (insn) = 1;
9822 }
9823
9824 /* Expand prologue or epilogue stack adjustment.
9825 The pattern exist to put a dependency on all ebp-based memory accesses.
9826 STYLE should be negative if instructions should be marked as frame related,
9827 zero if %r11 register is live and cannot be freely used and positive
9828 otherwise. */
9829
9830 static void
9831 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9832 int style, bool set_cfa)
9833 {
9834 struct machine_function *m = cfun->machine;
9835 rtx insn;
9836 bool add_frame_related_expr = false;
9837
9838 if (Pmode == SImode)
9839 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9840 else if (x86_64_immediate_operand (offset, DImode))
9841 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9842 else
9843 {
9844 rtx tmp;
9845 /* r11 is used by indirect sibcall return as well, set before the
9846 epilogue and used after the epilogue. */
9847 if (style)
9848 tmp = gen_rtx_REG (DImode, R11_REG);
9849 else
9850 {
9851 gcc_assert (src != hard_frame_pointer_rtx
9852 && dest != hard_frame_pointer_rtx);
9853 tmp = hard_frame_pointer_rtx;
9854 }
9855 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9856 if (style < 0)
9857 add_frame_related_expr = true;
9858
9859 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
9860 }
9861
9862 insn = emit_insn (insn);
9863 if (style >= 0)
9864 ix86_add_queued_cfa_restore_notes (insn);
9865
9866 if (set_cfa)
9867 {
9868 rtx r;
9869
9870 gcc_assert (m->fs.cfa_reg == src);
9871 m->fs.cfa_offset += INTVAL (offset);
9872 m->fs.cfa_reg = dest;
9873
9874 r = gen_rtx_PLUS (Pmode, src, offset);
9875 r = gen_rtx_SET (VOIDmode, dest, r);
9876 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9877 RTX_FRAME_RELATED_P (insn) = 1;
9878 }
9879 else if (style < 0)
9880 {
9881 RTX_FRAME_RELATED_P (insn) = 1;
9882 if (add_frame_related_expr)
9883 {
9884 rtx r = gen_rtx_PLUS (Pmode, src, offset);
9885 r = gen_rtx_SET (VOIDmode, dest, r);
9886 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
9887 }
9888 }
9889
9890 if (dest == stack_pointer_rtx)
9891 {
9892 HOST_WIDE_INT ooffset = m->fs.sp_offset;
9893 bool valid = m->fs.sp_valid;
9894
9895 if (src == hard_frame_pointer_rtx)
9896 {
9897 valid = m->fs.fp_valid;
9898 ooffset = m->fs.fp_offset;
9899 }
9900 else if (src == crtl->drap_reg)
9901 {
9902 valid = m->fs.drap_valid;
9903 ooffset = 0;
9904 }
9905 else
9906 {
9907 /* Else there are two possibilities: SP itself, which we set
9908 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9909 taken care of this by hand along the eh_return path. */
9910 gcc_checking_assert (src == stack_pointer_rtx
9911 || offset == const0_rtx);
9912 }
9913
9914 m->fs.sp_offset = ooffset - INTVAL (offset);
9915 m->fs.sp_valid = valid;
9916 }
9917 }
9918
9919 /* Find an available register to be used as dynamic realign argument
9920 pointer regsiter. Such a register will be written in prologue and
9921 used in begin of body, so it must not be
9922 1. parameter passing register.
9923 2. GOT pointer.
9924 We reuse static-chain register if it is available. Otherwise, we
9925 use DI for i386 and R13 for x86-64. We chose R13 since it has
9926 shorter encoding.
9927
9928 Return: the regno of chosen register. */
9929
9930 static unsigned int
9931 find_drap_reg (void)
9932 {
9933 tree decl = cfun->decl;
9934
9935 if (TARGET_64BIT)
9936 {
9937 /* Use R13 for nested function or function need static chain.
9938 Since function with tail call may use any caller-saved
9939 registers in epilogue, DRAP must not use caller-saved
9940 register in such case. */
9941 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9942 return R13_REG;
9943
9944 return R10_REG;
9945 }
9946 else
9947 {
9948 /* Use DI for nested function or function need static chain.
9949 Since function with tail call may use any caller-saved
9950 registers in epilogue, DRAP must not use caller-saved
9951 register in such case. */
9952 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9953 return DI_REG;
9954
9955 /* Reuse static chain register if it isn't used for parameter
9956 passing. */
9957 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
9958 {
9959 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
9960 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
9961 return CX_REG;
9962 }
9963 return DI_REG;
9964 }
9965 }
9966
9967 /* Return minimum incoming stack alignment. */
9968
9969 static unsigned int
9970 ix86_minimum_incoming_stack_boundary (bool sibcall)
9971 {
9972 unsigned int incoming_stack_boundary;
9973
9974 /* Prefer the one specified at command line. */
9975 if (ix86_user_incoming_stack_boundary)
9976 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
9977 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9978 if -mstackrealign is used, it isn't used for sibcall check and
9979 estimated stack alignment is 128bit. */
9980 else if (!sibcall
9981 && !TARGET_64BIT
9982 && ix86_force_align_arg_pointer
9983 && crtl->stack_alignment_estimated == 128)
9984 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9985 else
9986 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
9987
9988 /* Incoming stack alignment can be changed on individual functions
9989 via force_align_arg_pointer attribute. We use the smallest
9990 incoming stack boundary. */
9991 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
9992 && lookup_attribute (ix86_force_align_arg_pointer_string,
9993 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
9994 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9995
9996 /* The incoming stack frame has to be aligned at least at
9997 parm_stack_boundary. */
9998 if (incoming_stack_boundary < crtl->parm_stack_boundary)
9999 incoming_stack_boundary = crtl->parm_stack_boundary;
10000
10001 /* Stack at entrance of main is aligned by runtime. We use the
10002 smallest incoming stack boundary. */
10003 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10004 && DECL_NAME (current_function_decl)
10005 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10006 && DECL_FILE_SCOPE_P (current_function_decl))
10007 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10008
10009 return incoming_stack_boundary;
10010 }
10011
10012 /* Update incoming stack boundary and estimated stack alignment. */
10013
10014 static void
10015 ix86_update_stack_boundary (void)
10016 {
10017 ix86_incoming_stack_boundary
10018 = ix86_minimum_incoming_stack_boundary (false);
10019
10020 /* x86_64 vararg needs 16byte stack alignment for register save
10021 area. */
10022 if (TARGET_64BIT
10023 && cfun->stdarg
10024 && crtl->stack_alignment_estimated < 128)
10025 crtl->stack_alignment_estimated = 128;
10026 }
10027
10028 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10029 needed or an rtx for DRAP otherwise. */
10030
10031 static rtx
10032 ix86_get_drap_rtx (void)
10033 {
10034 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10035 crtl->need_drap = true;
10036
10037 if (stack_realign_drap)
10038 {
10039 /* Assign DRAP to vDRAP and returns vDRAP */
10040 unsigned int regno = find_drap_reg ();
10041 rtx drap_vreg;
10042 rtx arg_ptr;
10043 rtx seq, insn;
10044
10045 arg_ptr = gen_rtx_REG (Pmode, regno);
10046 crtl->drap_reg = arg_ptr;
10047
10048 start_sequence ();
10049 drap_vreg = copy_to_reg (arg_ptr);
10050 seq = get_insns ();
10051 end_sequence ();
10052
10053 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10054 if (!optimize)
10055 {
10056 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10057 RTX_FRAME_RELATED_P (insn) = 1;
10058 }
10059 return drap_vreg;
10060 }
10061 else
10062 return NULL;
10063 }
10064
10065 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10066
10067 static rtx
10068 ix86_internal_arg_pointer (void)
10069 {
10070 return virtual_incoming_args_rtx;
10071 }
10072
10073 struct scratch_reg {
10074 rtx reg;
10075 bool saved;
10076 };
10077
10078 /* Return a short-lived scratch register for use on function entry.
10079 In 32-bit mode, it is valid only after the registers are saved
10080 in the prologue. This register must be released by means of
10081 release_scratch_register_on_entry once it is dead. */
10082
10083 static void
10084 get_scratch_register_on_entry (struct scratch_reg *sr)
10085 {
10086 int regno;
10087
10088 sr->saved = false;
10089
10090 if (TARGET_64BIT)
10091 {
10092 /* We always use R11 in 64-bit mode. */
10093 regno = R11_REG;
10094 }
10095 else
10096 {
10097 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10098 bool fastcall_p
10099 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10100 bool thiscall_p
10101 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10102 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10103 int regparm = ix86_function_regparm (fntype, decl);
10104 int drap_regno
10105 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10106
10107 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10108 for the static chain register. */
10109 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10110 && drap_regno != AX_REG)
10111 regno = AX_REG;
10112 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10113 for the static chain register. */
10114 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10115 regno = AX_REG;
10116 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10117 regno = DX_REG;
10118 /* ecx is the static chain register. */
10119 else if (regparm < 3 && !fastcall_p && !thiscall_p
10120 && !static_chain_p
10121 && drap_regno != CX_REG)
10122 regno = CX_REG;
10123 else if (ix86_save_reg (BX_REG, true))
10124 regno = BX_REG;
10125 /* esi is the static chain register. */
10126 else if (!(regparm == 3 && static_chain_p)
10127 && ix86_save_reg (SI_REG, true))
10128 regno = SI_REG;
10129 else if (ix86_save_reg (DI_REG, true))
10130 regno = DI_REG;
10131 else
10132 {
10133 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10134 sr->saved = true;
10135 }
10136 }
10137
10138 sr->reg = gen_rtx_REG (Pmode, regno);
10139 if (sr->saved)
10140 {
10141 rtx insn = emit_insn (gen_push (sr->reg));
10142 RTX_FRAME_RELATED_P (insn) = 1;
10143 }
10144 }
10145
10146 /* Release a scratch register obtained from the preceding function. */
10147
10148 static void
10149 release_scratch_register_on_entry (struct scratch_reg *sr)
10150 {
10151 if (sr->saved)
10152 {
10153 struct machine_function *m = cfun->machine;
10154 rtx x, insn = emit_insn (gen_pop (sr->reg));
10155
10156 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10157 RTX_FRAME_RELATED_P (insn) = 1;
10158 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10159 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10160 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10161 m->fs.sp_offset -= UNITS_PER_WORD;
10162 }
10163 }
10164
10165 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10166
10167 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10168
10169 static void
10170 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10171 {
10172 /* We skip the probe for the first interval + a small dope of 4 words and
10173 probe that many bytes past the specified size to maintain a protection
10174 area at the botton of the stack. */
10175 const int dope = 4 * UNITS_PER_WORD;
10176 rtx size_rtx = GEN_INT (size), last;
10177
10178 /* See if we have a constant small number of probes to generate. If so,
10179 that's the easy case. The run-time loop is made up of 11 insns in the
10180 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10181 for n # of intervals. */
10182 if (size <= 5 * PROBE_INTERVAL)
10183 {
10184 HOST_WIDE_INT i, adjust;
10185 bool first_probe = true;
10186
10187 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10188 values of N from 1 until it exceeds SIZE. If only one probe is
10189 needed, this will not generate any code. Then adjust and probe
10190 to PROBE_INTERVAL + SIZE. */
10191 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10192 {
10193 if (first_probe)
10194 {
10195 adjust = 2 * PROBE_INTERVAL + dope;
10196 first_probe = false;
10197 }
10198 else
10199 adjust = PROBE_INTERVAL;
10200
10201 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10202 plus_constant (Pmode, stack_pointer_rtx,
10203 -adjust)));
10204 emit_stack_probe (stack_pointer_rtx);
10205 }
10206
10207 if (first_probe)
10208 adjust = size + PROBE_INTERVAL + dope;
10209 else
10210 adjust = size + PROBE_INTERVAL - i;
10211
10212 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10213 plus_constant (Pmode, stack_pointer_rtx,
10214 -adjust)));
10215 emit_stack_probe (stack_pointer_rtx);
10216
10217 /* Adjust back to account for the additional first interval. */
10218 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10219 plus_constant (Pmode, stack_pointer_rtx,
10220 PROBE_INTERVAL + dope)));
10221 }
10222
10223 /* Otherwise, do the same as above, but in a loop. Note that we must be
10224 extra careful with variables wrapping around because we might be at
10225 the very top (or the very bottom) of the address space and we have
10226 to be able to handle this case properly; in particular, we use an
10227 equality test for the loop condition. */
10228 else
10229 {
10230 HOST_WIDE_INT rounded_size;
10231 struct scratch_reg sr;
10232
10233 get_scratch_register_on_entry (&sr);
10234
10235
10236 /* Step 1: round SIZE to the previous multiple of the interval. */
10237
10238 rounded_size = size & -PROBE_INTERVAL;
10239
10240
10241 /* Step 2: compute initial and final value of the loop counter. */
10242
10243 /* SP = SP_0 + PROBE_INTERVAL. */
10244 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10245 plus_constant (Pmode, stack_pointer_rtx,
10246 - (PROBE_INTERVAL + dope))));
10247
10248 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10249 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10250 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10251 gen_rtx_PLUS (Pmode, sr.reg,
10252 stack_pointer_rtx)));
10253
10254
10255 /* Step 3: the loop
10256
10257 while (SP != LAST_ADDR)
10258 {
10259 SP = SP + PROBE_INTERVAL
10260 probe at SP
10261 }
10262
10263 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10264 values of N from 1 until it is equal to ROUNDED_SIZE. */
10265
10266 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10267
10268
10269 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10270 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10271
10272 if (size != rounded_size)
10273 {
10274 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10275 plus_constant (Pmode, stack_pointer_rtx,
10276 rounded_size - size)));
10277 emit_stack_probe (stack_pointer_rtx);
10278 }
10279
10280 /* Adjust back to account for the additional first interval. */
10281 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10282 plus_constant (Pmode, stack_pointer_rtx,
10283 PROBE_INTERVAL + dope)));
10284
10285 release_scratch_register_on_entry (&sr);
10286 }
10287
10288 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10289
10290 /* Even if the stack pointer isn't the CFA register, we need to correctly
10291 describe the adjustments made to it, in particular differentiate the
10292 frame-related ones from the frame-unrelated ones. */
10293 if (size > 0)
10294 {
10295 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10296 XVECEXP (expr, 0, 0)
10297 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10298 plus_constant (Pmode, stack_pointer_rtx, -size));
10299 XVECEXP (expr, 0, 1)
10300 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10301 plus_constant (Pmode, stack_pointer_rtx,
10302 PROBE_INTERVAL + dope + size));
10303 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10304 RTX_FRAME_RELATED_P (last) = 1;
10305
10306 cfun->machine->fs.sp_offset += size;
10307 }
10308
10309 /* Make sure nothing is scheduled before we are done. */
10310 emit_insn (gen_blockage ());
10311 }
10312
10313 /* Adjust the stack pointer up to REG while probing it. */
10314
10315 const char *
10316 output_adjust_stack_and_probe (rtx reg)
10317 {
10318 static int labelno = 0;
10319 char loop_lab[32], end_lab[32];
10320 rtx xops[2];
10321
10322 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10323 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10324
10325 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10326
10327 /* Jump to END_LAB if SP == LAST_ADDR. */
10328 xops[0] = stack_pointer_rtx;
10329 xops[1] = reg;
10330 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10331 fputs ("\tje\t", asm_out_file);
10332 assemble_name_raw (asm_out_file, end_lab);
10333 fputc ('\n', asm_out_file);
10334
10335 /* SP = SP + PROBE_INTERVAL. */
10336 xops[1] = GEN_INT (PROBE_INTERVAL);
10337 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10338
10339 /* Probe at SP. */
10340 xops[1] = const0_rtx;
10341 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10342
10343 fprintf (asm_out_file, "\tjmp\t");
10344 assemble_name_raw (asm_out_file, loop_lab);
10345 fputc ('\n', asm_out_file);
10346
10347 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10348
10349 return "";
10350 }
10351
10352 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10353 inclusive. These are offsets from the current stack pointer. */
10354
10355 static void
10356 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10357 {
10358 /* See if we have a constant small number of probes to generate. If so,
10359 that's the easy case. The run-time loop is made up of 7 insns in the
10360 generic case while the compile-time loop is made up of n insns for n #
10361 of intervals. */
10362 if (size <= 7 * PROBE_INTERVAL)
10363 {
10364 HOST_WIDE_INT i;
10365
10366 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10367 it exceeds SIZE. If only one probe is needed, this will not
10368 generate any code. Then probe at FIRST + SIZE. */
10369 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10370 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10371 -(first + i)));
10372
10373 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10374 -(first + size)));
10375 }
10376
10377 /* Otherwise, do the same as above, but in a loop. Note that we must be
10378 extra careful with variables wrapping around because we might be at
10379 the very top (or the very bottom) of the address space and we have
10380 to be able to handle this case properly; in particular, we use an
10381 equality test for the loop condition. */
10382 else
10383 {
10384 HOST_WIDE_INT rounded_size, last;
10385 struct scratch_reg sr;
10386
10387 get_scratch_register_on_entry (&sr);
10388
10389
10390 /* Step 1: round SIZE to the previous multiple of the interval. */
10391
10392 rounded_size = size & -PROBE_INTERVAL;
10393
10394
10395 /* Step 2: compute initial and final value of the loop counter. */
10396
10397 /* TEST_OFFSET = FIRST. */
10398 emit_move_insn (sr.reg, GEN_INT (-first));
10399
10400 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10401 last = first + rounded_size;
10402
10403
10404 /* Step 3: the loop
10405
10406 while (TEST_ADDR != LAST_ADDR)
10407 {
10408 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10409 probe at TEST_ADDR
10410 }
10411
10412 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10413 until it is equal to ROUNDED_SIZE. */
10414
10415 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10416
10417
10418 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10419 that SIZE is equal to ROUNDED_SIZE. */
10420
10421 if (size != rounded_size)
10422 emit_stack_probe (plus_constant (Pmode,
10423 gen_rtx_PLUS (Pmode,
10424 stack_pointer_rtx,
10425 sr.reg),
10426 rounded_size - size));
10427
10428 release_scratch_register_on_entry (&sr);
10429 }
10430
10431 /* Make sure nothing is scheduled before we are done. */
10432 emit_insn (gen_blockage ());
10433 }
10434
10435 /* Probe a range of stack addresses from REG to END, inclusive. These are
10436 offsets from the current stack pointer. */
10437
10438 const char *
10439 output_probe_stack_range (rtx reg, rtx end)
10440 {
10441 static int labelno = 0;
10442 char loop_lab[32], end_lab[32];
10443 rtx xops[3];
10444
10445 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10446 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10447
10448 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10449
10450 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10451 xops[0] = reg;
10452 xops[1] = end;
10453 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10454 fputs ("\tje\t", asm_out_file);
10455 assemble_name_raw (asm_out_file, end_lab);
10456 fputc ('\n', asm_out_file);
10457
10458 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10459 xops[1] = GEN_INT (PROBE_INTERVAL);
10460 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10461
10462 /* Probe at TEST_ADDR. */
10463 xops[0] = stack_pointer_rtx;
10464 xops[1] = reg;
10465 xops[2] = const0_rtx;
10466 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10467
10468 fprintf (asm_out_file, "\tjmp\t");
10469 assemble_name_raw (asm_out_file, loop_lab);
10470 fputc ('\n', asm_out_file);
10471
10472 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10473
10474 return "";
10475 }
10476
10477 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10478 to be generated in correct form. */
10479 static void
10480 ix86_finalize_stack_realign_flags (void)
10481 {
10482 /* Check if stack realign is really needed after reload, and
10483 stores result in cfun */
10484 unsigned int incoming_stack_boundary
10485 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
10486 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
10487 unsigned int stack_realign = (incoming_stack_boundary
10488 < (crtl->is_leaf
10489 ? crtl->max_used_stack_slot_alignment
10490 : crtl->stack_alignment_needed));
10491
10492 if (crtl->stack_realign_finalized)
10493 {
10494 /* After stack_realign_needed is finalized, we can't no longer
10495 change it. */
10496 gcc_assert (crtl->stack_realign_needed == stack_realign);
10497 return;
10498 }
10499
10500 /* If the only reason for frame_pointer_needed is that we conservatively
10501 assumed stack realignment might be needed, but in the end nothing that
10502 needed the stack alignment had been spilled, clear frame_pointer_needed
10503 and say we don't need stack realignment. */
10504 if (stack_realign
10505 && frame_pointer_needed
10506 && crtl->is_leaf
10507 && flag_omit_frame_pointer
10508 && crtl->sp_is_unchanging
10509 && !ix86_current_function_calls_tls_descriptor
10510 && !crtl->accesses_prior_frames
10511 && !cfun->calls_alloca
10512 && !crtl->calls_eh_return
10513 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
10514 && !ix86_frame_pointer_required ()
10515 && get_frame_size () == 0
10516 && ix86_nsaved_sseregs () == 0
10517 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
10518 {
10519 HARD_REG_SET set_up_by_prologue, prologue_used;
10520 basic_block bb;
10521
10522 CLEAR_HARD_REG_SET (prologue_used);
10523 CLEAR_HARD_REG_SET (set_up_by_prologue);
10524 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
10525 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
10526 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
10527 HARD_FRAME_POINTER_REGNUM);
10528 FOR_EACH_BB_FN (bb, cfun)
10529 {
10530 rtx insn;
10531 FOR_BB_INSNS (bb, insn)
10532 if (NONDEBUG_INSN_P (insn)
10533 && requires_stack_frame_p (insn, prologue_used,
10534 set_up_by_prologue))
10535 {
10536 crtl->stack_realign_needed = stack_realign;
10537 crtl->stack_realign_finalized = true;
10538 return;
10539 }
10540 }
10541
10542 /* If drap has been set, but it actually isn't live at the start
10543 of the function, there is no reason to set it up. */
10544 if (crtl->drap_reg)
10545 {
10546 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
10547 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
10548 {
10549 crtl->drap_reg = NULL_RTX;
10550 crtl->need_drap = false;
10551 }
10552 }
10553 else
10554 cfun->machine->no_drap_save_restore = true;
10555
10556 frame_pointer_needed = false;
10557 stack_realign = false;
10558 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
10559 crtl->stack_alignment_needed = incoming_stack_boundary;
10560 crtl->stack_alignment_estimated = incoming_stack_boundary;
10561 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
10562 crtl->preferred_stack_boundary = incoming_stack_boundary;
10563 df_finish_pass (true);
10564 df_scan_alloc (NULL);
10565 df_scan_blocks ();
10566 df_compute_regs_ever_live (true);
10567 df_analyze ();
10568 }
10569
10570 crtl->stack_realign_needed = stack_realign;
10571 crtl->stack_realign_finalized = true;
10572 }
10573
10574 /* Expand the prologue into a bunch of separate insns. */
10575
10576 void
10577 ix86_expand_prologue (void)
10578 {
10579 struct machine_function *m = cfun->machine;
10580 rtx insn, t;
10581 bool pic_reg_used;
10582 struct ix86_frame frame;
10583 HOST_WIDE_INT allocate;
10584 bool int_registers_saved;
10585 bool sse_registers_saved;
10586
10587 ix86_finalize_stack_realign_flags ();
10588
10589 /* DRAP should not coexist with stack_realign_fp */
10590 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10591
10592 memset (&m->fs, 0, sizeof (m->fs));
10593
10594 /* Initialize CFA state for before the prologue. */
10595 m->fs.cfa_reg = stack_pointer_rtx;
10596 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10597
10598 /* Track SP offset to the CFA. We continue tracking this after we've
10599 swapped the CFA register away from SP. In the case of re-alignment
10600 this is fudged; we're interested to offsets within the local frame. */
10601 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10602 m->fs.sp_valid = true;
10603
10604 ix86_compute_frame_layout (&frame);
10605
10606 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10607 {
10608 /* We should have already generated an error for any use of
10609 ms_hook on a nested function. */
10610 gcc_checking_assert (!ix86_static_chain_on_stack);
10611
10612 /* Check if profiling is active and we shall use profiling before
10613 prologue variant. If so sorry. */
10614 if (crtl->profile && flag_fentry != 0)
10615 sorry ("ms_hook_prologue attribute isn%'t compatible "
10616 "with -mfentry for 32-bit");
10617
10618 /* In ix86_asm_output_function_label we emitted:
10619 8b ff movl.s %edi,%edi
10620 55 push %ebp
10621 8b ec movl.s %esp,%ebp
10622
10623 This matches the hookable function prologue in Win32 API
10624 functions in Microsoft Windows XP Service Pack 2 and newer.
10625 Wine uses this to enable Windows apps to hook the Win32 API
10626 functions provided by Wine.
10627
10628 What that means is that we've already set up the frame pointer. */
10629
10630 if (frame_pointer_needed
10631 && !(crtl->drap_reg && crtl->stack_realign_needed))
10632 {
10633 rtx push, mov;
10634
10635 /* We've decided to use the frame pointer already set up.
10636 Describe this to the unwinder by pretending that both
10637 push and mov insns happen right here.
10638
10639 Putting the unwind info here at the end of the ms_hook
10640 is done so that we can make absolutely certain we get
10641 the required byte sequence at the start of the function,
10642 rather than relying on an assembler that can produce
10643 the exact encoding required.
10644
10645 However it does mean (in the unpatched case) that we have
10646 a 1 insn window where the asynchronous unwind info is
10647 incorrect. However, if we placed the unwind info at
10648 its correct location we would have incorrect unwind info
10649 in the patched case. Which is probably all moot since
10650 I don't expect Wine generates dwarf2 unwind info for the
10651 system libraries that use this feature. */
10652
10653 insn = emit_insn (gen_blockage ());
10654
10655 push = gen_push (hard_frame_pointer_rtx);
10656 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10657 stack_pointer_rtx);
10658 RTX_FRAME_RELATED_P (push) = 1;
10659 RTX_FRAME_RELATED_P (mov) = 1;
10660
10661 RTX_FRAME_RELATED_P (insn) = 1;
10662 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10663 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10664
10665 /* Note that gen_push incremented m->fs.cfa_offset, even
10666 though we didn't emit the push insn here. */
10667 m->fs.cfa_reg = hard_frame_pointer_rtx;
10668 m->fs.fp_offset = m->fs.cfa_offset;
10669 m->fs.fp_valid = true;
10670 }
10671 else
10672 {
10673 /* The frame pointer is not needed so pop %ebp again.
10674 This leaves us with a pristine state. */
10675 emit_insn (gen_pop (hard_frame_pointer_rtx));
10676 }
10677 }
10678
10679 /* The first insn of a function that accepts its static chain on the
10680 stack is to push the register that would be filled in by a direct
10681 call. This insn will be skipped by the trampoline. */
10682 else if (ix86_static_chain_on_stack)
10683 {
10684 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10685 emit_insn (gen_blockage ());
10686
10687 /* We don't want to interpret this push insn as a register save,
10688 only as a stack adjustment. The real copy of the register as
10689 a save will be done later, if needed. */
10690 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
10691 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10692 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10693 RTX_FRAME_RELATED_P (insn) = 1;
10694 }
10695
10696 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10697 of DRAP is needed and stack realignment is really needed after reload */
10698 if (stack_realign_drap)
10699 {
10700 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10701
10702 /* Only need to push parameter pointer reg if it is caller saved. */
10703 if (!call_used_regs[REGNO (crtl->drap_reg)])
10704 {
10705 /* Push arg pointer reg */
10706 insn = emit_insn (gen_push (crtl->drap_reg));
10707 RTX_FRAME_RELATED_P (insn) = 1;
10708 }
10709
10710 /* Grab the argument pointer. */
10711 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
10712 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10713 RTX_FRAME_RELATED_P (insn) = 1;
10714 m->fs.cfa_reg = crtl->drap_reg;
10715 m->fs.cfa_offset = 0;
10716
10717 /* Align the stack. */
10718 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10719 stack_pointer_rtx,
10720 GEN_INT (-align_bytes)));
10721 RTX_FRAME_RELATED_P (insn) = 1;
10722
10723 /* Replicate the return address on the stack so that return
10724 address can be reached via (argp - 1) slot. This is needed
10725 to implement macro RETURN_ADDR_RTX and intrinsic function
10726 expand_builtin_return_addr etc. */
10727 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
10728 t = gen_frame_mem (word_mode, t);
10729 insn = emit_insn (gen_push (t));
10730 RTX_FRAME_RELATED_P (insn) = 1;
10731
10732 /* For the purposes of frame and register save area addressing,
10733 we've started over with a new frame. */
10734 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10735 m->fs.realigned = true;
10736 }
10737
10738 int_registers_saved = (frame.nregs == 0);
10739 sse_registers_saved = (frame.nsseregs == 0);
10740
10741 if (frame_pointer_needed && !m->fs.fp_valid)
10742 {
10743 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10744 slower on all targets. Also sdb doesn't like it. */
10745 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10746 RTX_FRAME_RELATED_P (insn) = 1;
10747
10748 /* Push registers now, before setting the frame pointer
10749 on SEH target. */
10750 if (!int_registers_saved
10751 && TARGET_SEH
10752 && !frame.save_regs_using_mov)
10753 {
10754 ix86_emit_save_regs ();
10755 int_registers_saved = true;
10756 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10757 }
10758
10759 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10760 {
10761 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10762 RTX_FRAME_RELATED_P (insn) = 1;
10763
10764 if (m->fs.cfa_reg == stack_pointer_rtx)
10765 m->fs.cfa_reg = hard_frame_pointer_rtx;
10766 m->fs.fp_offset = m->fs.sp_offset;
10767 m->fs.fp_valid = true;
10768 }
10769 }
10770
10771 if (!int_registers_saved)
10772 {
10773 /* If saving registers via PUSH, do so now. */
10774 if (!frame.save_regs_using_mov)
10775 {
10776 ix86_emit_save_regs ();
10777 int_registers_saved = true;
10778 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10779 }
10780
10781 /* When using red zone we may start register saving before allocating
10782 the stack frame saving one cycle of the prologue. However, avoid
10783 doing this if we have to probe the stack; at least on x86_64 the
10784 stack probe can turn into a call that clobbers a red zone location. */
10785 else if (ix86_using_red_zone ()
10786 && (! TARGET_STACK_PROBE
10787 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10788 {
10789 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10790 int_registers_saved = true;
10791 }
10792 }
10793
10794 if (stack_realign_fp)
10795 {
10796 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10797 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10798
10799 /* The computation of the size of the re-aligned stack frame means
10800 that we must allocate the size of the register save area before
10801 performing the actual alignment. Otherwise we cannot guarantee
10802 that there's enough storage above the realignment point. */
10803 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10804 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10805 GEN_INT (m->fs.sp_offset
10806 - frame.sse_reg_save_offset),
10807 -1, false);
10808
10809 /* Align the stack. */
10810 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10811 stack_pointer_rtx,
10812 GEN_INT (-align_bytes)));
10813
10814 /* For the purposes of register save area addressing, the stack
10815 pointer is no longer valid. As for the value of sp_offset,
10816 see ix86_compute_frame_layout, which we need to match in order
10817 to pass verification of stack_pointer_offset at the end. */
10818 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10819 m->fs.sp_valid = false;
10820 }
10821
10822 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10823
10824 if (flag_stack_usage_info)
10825 {
10826 /* We start to count from ARG_POINTER. */
10827 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10828
10829 /* If it was realigned, take into account the fake frame. */
10830 if (stack_realign_drap)
10831 {
10832 if (ix86_static_chain_on_stack)
10833 stack_size += UNITS_PER_WORD;
10834
10835 if (!call_used_regs[REGNO (crtl->drap_reg)])
10836 stack_size += UNITS_PER_WORD;
10837
10838 /* This over-estimates by 1 minimal-stack-alignment-unit but
10839 mitigates that by counting in the new return address slot. */
10840 current_function_dynamic_stack_size
10841 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10842 }
10843
10844 current_function_static_stack_size = stack_size;
10845 }
10846
10847 /* On SEH target with very large frame size, allocate an area to save
10848 SSE registers (as the very large allocation won't be described). */
10849 if (TARGET_SEH
10850 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
10851 && !sse_registers_saved)
10852 {
10853 HOST_WIDE_INT sse_size =
10854 frame.sse_reg_save_offset - frame.reg_save_offset;
10855
10856 gcc_assert (int_registers_saved);
10857
10858 /* No need to do stack checking as the area will be immediately
10859 written. */
10860 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10861 GEN_INT (-sse_size), -1,
10862 m->fs.cfa_reg == stack_pointer_rtx);
10863 allocate -= sse_size;
10864 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10865 sse_registers_saved = true;
10866 }
10867
10868 /* The stack has already been decremented by the instruction calling us
10869 so probe if the size is non-negative to preserve the protection area. */
10870 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
10871 {
10872 /* We expect the registers to be saved when probes are used. */
10873 gcc_assert (int_registers_saved);
10874
10875 if (STACK_CHECK_MOVING_SP)
10876 {
10877 if (!(crtl->is_leaf && !cfun->calls_alloca
10878 && allocate <= PROBE_INTERVAL))
10879 {
10880 ix86_adjust_stack_and_probe (allocate);
10881 allocate = 0;
10882 }
10883 }
10884 else
10885 {
10886 HOST_WIDE_INT size = allocate;
10887
10888 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
10889 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
10890
10891 if (TARGET_STACK_PROBE)
10892 {
10893 if (crtl->is_leaf && !cfun->calls_alloca)
10894 {
10895 if (size > PROBE_INTERVAL)
10896 ix86_emit_probe_stack_range (0, size);
10897 }
10898 else
10899 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
10900 }
10901 else
10902 {
10903 if (crtl->is_leaf && !cfun->calls_alloca)
10904 {
10905 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
10906 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
10907 size - STACK_CHECK_PROTECT);
10908 }
10909 else
10910 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
10911 }
10912 }
10913 }
10914
10915 if (allocate == 0)
10916 ;
10917 else if (!ix86_target_stack_probe ()
10918 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
10919 {
10920 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10921 GEN_INT (-allocate), -1,
10922 m->fs.cfa_reg == stack_pointer_rtx);
10923 }
10924 else
10925 {
10926 rtx eax = gen_rtx_REG (Pmode, AX_REG);
10927 rtx r10 = NULL;
10928 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
10929 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
10930 bool eax_live = false;
10931 bool r10_live = false;
10932
10933 if (TARGET_64BIT)
10934 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
10935 if (!TARGET_64BIT_MS_ABI)
10936 eax_live = ix86_eax_live_at_start_p ();
10937
10938 /* Note that SEH directives need to continue tracking the stack
10939 pointer even after the frame pointer has been set up. */
10940 if (eax_live)
10941 {
10942 insn = emit_insn (gen_push (eax));
10943 allocate -= UNITS_PER_WORD;
10944 if (sp_is_cfa_reg || TARGET_SEH)
10945 {
10946 if (sp_is_cfa_reg)
10947 m->fs.cfa_offset += UNITS_PER_WORD;
10948 RTX_FRAME_RELATED_P (insn) = 1;
10949 }
10950 }
10951
10952 if (r10_live)
10953 {
10954 r10 = gen_rtx_REG (Pmode, R10_REG);
10955 insn = emit_insn (gen_push (r10));
10956 allocate -= UNITS_PER_WORD;
10957 if (sp_is_cfa_reg || TARGET_SEH)
10958 {
10959 if (sp_is_cfa_reg)
10960 m->fs.cfa_offset += UNITS_PER_WORD;
10961 RTX_FRAME_RELATED_P (insn) = 1;
10962 }
10963 }
10964
10965 emit_move_insn (eax, GEN_INT (allocate));
10966 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
10967
10968 /* Use the fact that AX still contains ALLOCATE. */
10969 adjust_stack_insn = (Pmode == DImode
10970 ? gen_pro_epilogue_adjust_stack_di_sub
10971 : gen_pro_epilogue_adjust_stack_si_sub);
10972
10973 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
10974 stack_pointer_rtx, eax));
10975
10976 if (sp_is_cfa_reg || TARGET_SEH)
10977 {
10978 if (sp_is_cfa_reg)
10979 m->fs.cfa_offset += allocate;
10980 RTX_FRAME_RELATED_P (insn) = 1;
10981 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10982 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10983 plus_constant (Pmode, stack_pointer_rtx,
10984 -allocate)));
10985 }
10986 m->fs.sp_offset += allocate;
10987
10988 /* Use stack_pointer_rtx for relative addressing so that code
10989 works for realigned stack, too. */
10990 if (r10_live && eax_live)
10991 {
10992 t = plus_constant (Pmode, stack_pointer_rtx, allocate);
10993 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
10994 gen_frame_mem (word_mode, t));
10995 t = plus_constant (Pmode, stack_pointer_rtx,
10996 allocate - UNITS_PER_WORD);
10997 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
10998 gen_frame_mem (word_mode, t));
10999 }
11000 else if (eax_live || r10_live)
11001 {
11002 t = plus_constant (Pmode, stack_pointer_rtx, allocate);
11003 emit_move_insn (gen_rtx_REG (word_mode,
11004 (eax_live ? AX_REG : R10_REG)),
11005 gen_frame_mem (word_mode, t));
11006 }
11007 }
11008 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11009
11010 /* If we havn't already set up the frame pointer, do so now. */
11011 if (frame_pointer_needed && !m->fs.fp_valid)
11012 {
11013 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11014 GEN_INT (frame.stack_pointer_offset
11015 - frame.hard_frame_pointer_offset));
11016 insn = emit_insn (insn);
11017 RTX_FRAME_RELATED_P (insn) = 1;
11018 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11019
11020 if (m->fs.cfa_reg == stack_pointer_rtx)
11021 m->fs.cfa_reg = hard_frame_pointer_rtx;
11022 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11023 m->fs.fp_valid = true;
11024 }
11025
11026 if (!int_registers_saved)
11027 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11028 if (!sse_registers_saved)
11029 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11030
11031 pic_reg_used = false;
11032 /* We don't use pic-register for pe-coff target. */
11033 if (pic_offset_table_rtx
11034 && !TARGET_PECOFF
11035 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
11036 || crtl->profile))
11037 {
11038 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
11039
11040 if (alt_pic_reg_used != INVALID_REGNUM)
11041 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
11042
11043 pic_reg_used = true;
11044 }
11045
11046 if (pic_reg_used)
11047 {
11048 if (TARGET_64BIT)
11049 {
11050 if (ix86_cmodel == CM_LARGE_PIC)
11051 {
11052 rtx label, tmp_reg;
11053
11054 gcc_assert (Pmode == DImode);
11055 label = gen_label_rtx ();
11056 emit_label (label);
11057 LABEL_PRESERVE_P (label) = 1;
11058 tmp_reg = gen_rtx_REG (Pmode, R11_REG);
11059 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
11060 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
11061 label));
11062 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
11063 insn = emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
11064 pic_offset_table_rtx, tmp_reg));
11065 }
11066 else
11067 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
11068 }
11069 else
11070 {
11071 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
11072 RTX_FRAME_RELATED_P (insn) = 1;
11073 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11074 }
11075 }
11076
11077 /* In the pic_reg_used case, make sure that the got load isn't deleted
11078 when mcount needs it. Blockage to avoid call movement across mcount
11079 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
11080 note. */
11081 if (crtl->profile && !flag_fentry && pic_reg_used)
11082 emit_insn (gen_prologue_use (pic_offset_table_rtx));
11083
11084 if (crtl->drap_reg && !crtl->stack_realign_needed)
11085 {
11086 /* vDRAP is setup but after reload it turns out stack realign
11087 isn't necessary, here we will emit prologue to setup DRAP
11088 without stack realign adjustment */
11089 t = choose_baseaddr (0);
11090 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11091 }
11092
11093 /* Prevent instructions from being scheduled into register save push
11094 sequence when access to the redzone area is done through frame pointer.
11095 The offset between the frame pointer and the stack pointer is calculated
11096 relative to the value of the stack pointer at the end of the function
11097 prologue, and moving instructions that access redzone area via frame
11098 pointer inside push sequence violates this assumption. */
11099 if (frame_pointer_needed && frame.red_zone_size)
11100 emit_insn (gen_memory_blockage ());
11101
11102 /* Emit cld instruction if stringops are used in the function. */
11103 if (TARGET_CLD && ix86_current_function_needs_cld)
11104 emit_insn (gen_cld ());
11105
11106 /* SEH requires that the prologue end within 256 bytes of the start of
11107 the function. Prevent instruction schedules that would extend that.
11108 Further, prevent alloca modifications to the stack pointer from being
11109 combined with prologue modifications. */
11110 if (TARGET_SEH)
11111 emit_insn (gen_prologue_use (stack_pointer_rtx));
11112 }
11113
11114 /* Emit code to restore REG using a POP insn. */
11115
11116 static void
11117 ix86_emit_restore_reg_using_pop (rtx reg)
11118 {
11119 struct machine_function *m = cfun->machine;
11120 rtx insn = emit_insn (gen_pop (reg));
11121
11122 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11123 m->fs.sp_offset -= UNITS_PER_WORD;
11124
11125 if (m->fs.cfa_reg == crtl->drap_reg
11126 && REGNO (reg) == REGNO (crtl->drap_reg))
11127 {
11128 /* Previously we'd represented the CFA as an expression
11129 like *(%ebp - 8). We've just popped that value from
11130 the stack, which means we need to reset the CFA to
11131 the drap register. This will remain until we restore
11132 the stack pointer. */
11133 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11134 RTX_FRAME_RELATED_P (insn) = 1;
11135
11136 /* This means that the DRAP register is valid for addressing too. */
11137 m->fs.drap_valid = true;
11138 return;
11139 }
11140
11141 if (m->fs.cfa_reg == stack_pointer_rtx)
11142 {
11143 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11144 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11145 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11146 RTX_FRAME_RELATED_P (insn) = 1;
11147
11148 m->fs.cfa_offset -= UNITS_PER_WORD;
11149 }
11150
11151 /* When the frame pointer is the CFA, and we pop it, we are
11152 swapping back to the stack pointer as the CFA. This happens
11153 for stack frames that don't allocate other data, so we assume
11154 the stack pointer is now pointing at the return address, i.e.
11155 the function entry state, which makes the offset be 1 word. */
11156 if (reg == hard_frame_pointer_rtx)
11157 {
11158 m->fs.fp_valid = false;
11159 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11160 {
11161 m->fs.cfa_reg = stack_pointer_rtx;
11162 m->fs.cfa_offset -= UNITS_PER_WORD;
11163
11164 add_reg_note (insn, REG_CFA_DEF_CFA,
11165 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11166 GEN_INT (m->fs.cfa_offset)));
11167 RTX_FRAME_RELATED_P (insn) = 1;
11168 }
11169 }
11170 }
11171
11172 /* Emit code to restore saved registers using POP insns. */
11173
11174 static void
11175 ix86_emit_restore_regs_using_pop (void)
11176 {
11177 unsigned int regno;
11178
11179 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11180 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11181 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11182 }
11183
11184 /* Emit code and notes for the LEAVE instruction. */
11185
11186 static void
11187 ix86_emit_leave (void)
11188 {
11189 struct machine_function *m = cfun->machine;
11190 rtx insn = emit_insn (ix86_gen_leave ());
11191
11192 ix86_add_queued_cfa_restore_notes (insn);
11193
11194 gcc_assert (m->fs.fp_valid);
11195 m->fs.sp_valid = true;
11196 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11197 m->fs.fp_valid = false;
11198
11199 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11200 {
11201 m->fs.cfa_reg = stack_pointer_rtx;
11202 m->fs.cfa_offset = m->fs.sp_offset;
11203
11204 add_reg_note (insn, REG_CFA_DEF_CFA,
11205 plus_constant (Pmode, stack_pointer_rtx,
11206 m->fs.sp_offset));
11207 RTX_FRAME_RELATED_P (insn) = 1;
11208 }
11209 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11210 m->fs.fp_offset);
11211 }
11212
11213 /* Emit code to restore saved registers using MOV insns.
11214 First register is restored from CFA - CFA_OFFSET. */
11215 static void
11216 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11217 bool maybe_eh_return)
11218 {
11219 struct machine_function *m = cfun->machine;
11220 unsigned int regno;
11221
11222 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11223 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11224 {
11225 rtx reg = gen_rtx_REG (word_mode, regno);
11226 rtx insn, mem;
11227
11228 mem = choose_baseaddr (cfa_offset);
11229 mem = gen_frame_mem (word_mode, mem);
11230 insn = emit_move_insn (reg, mem);
11231
11232 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11233 {
11234 /* Previously we'd represented the CFA as an expression
11235 like *(%ebp - 8). We've just popped that value from
11236 the stack, which means we need to reset the CFA to
11237 the drap register. This will remain until we restore
11238 the stack pointer. */
11239 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11240 RTX_FRAME_RELATED_P (insn) = 1;
11241
11242 /* This means that the DRAP register is valid for addressing. */
11243 m->fs.drap_valid = true;
11244 }
11245 else
11246 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11247
11248 cfa_offset -= UNITS_PER_WORD;
11249 }
11250 }
11251
11252 /* Emit code to restore saved registers using MOV insns.
11253 First register is restored from CFA - CFA_OFFSET. */
11254 static void
11255 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11256 bool maybe_eh_return)
11257 {
11258 unsigned int regno;
11259
11260 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11261 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11262 {
11263 rtx reg = gen_rtx_REG (V4SFmode, regno);
11264 rtx mem;
11265
11266 mem = choose_baseaddr (cfa_offset);
11267 mem = gen_rtx_MEM (V4SFmode, mem);
11268 set_mem_align (mem, 128);
11269 emit_move_insn (reg, mem);
11270
11271 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11272
11273 cfa_offset -= 16;
11274 }
11275 }
11276
11277 /* Restore function stack, frame, and registers. */
11278
11279 void
11280 ix86_expand_epilogue (int style)
11281 {
11282 struct machine_function *m = cfun->machine;
11283 struct machine_frame_state frame_state_save = m->fs;
11284 struct ix86_frame frame;
11285 bool restore_regs_via_mov;
11286 bool using_drap;
11287
11288 ix86_finalize_stack_realign_flags ();
11289 ix86_compute_frame_layout (&frame);
11290
11291 m->fs.sp_valid = (!frame_pointer_needed
11292 || (crtl->sp_is_unchanging
11293 && !stack_realign_fp));
11294 gcc_assert (!m->fs.sp_valid
11295 || m->fs.sp_offset == frame.stack_pointer_offset);
11296
11297 /* The FP must be valid if the frame pointer is present. */
11298 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11299 gcc_assert (!m->fs.fp_valid
11300 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11301
11302 /* We must have *some* valid pointer to the stack frame. */
11303 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11304
11305 /* The DRAP is never valid at this point. */
11306 gcc_assert (!m->fs.drap_valid);
11307
11308 /* See the comment about red zone and frame
11309 pointer usage in ix86_expand_prologue. */
11310 if (frame_pointer_needed && frame.red_zone_size)
11311 emit_insn (gen_memory_blockage ());
11312
11313 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11314 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11315
11316 /* Determine the CFA offset of the end of the red-zone. */
11317 m->fs.red_zone_offset = 0;
11318 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11319 {
11320 /* The red-zone begins below the return address. */
11321 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11322
11323 /* When the register save area is in the aligned portion of
11324 the stack, determine the maximum runtime displacement that
11325 matches up with the aligned frame. */
11326 if (stack_realign_drap)
11327 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
11328 + UNITS_PER_WORD);
11329 }
11330
11331 /* Special care must be taken for the normal return case of a function
11332 using eh_return: the eax and edx registers are marked as saved, but
11333 not restored along this path. Adjust the save location to match. */
11334 if (crtl->calls_eh_return && style != 2)
11335 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
11336
11337 /* EH_RETURN requires the use of moves to function properly. */
11338 if (crtl->calls_eh_return)
11339 restore_regs_via_mov = true;
11340 /* SEH requires the use of pops to identify the epilogue. */
11341 else if (TARGET_SEH)
11342 restore_regs_via_mov = false;
11343 /* If we're only restoring one register and sp is not valid then
11344 using a move instruction to restore the register since it's
11345 less work than reloading sp and popping the register. */
11346 else if (!m->fs.sp_valid && frame.nregs <= 1)
11347 restore_regs_via_mov = true;
11348 else if (TARGET_EPILOGUE_USING_MOVE
11349 && cfun->machine->use_fast_prologue_epilogue
11350 && (frame.nregs > 1
11351 || m->fs.sp_offset != frame.reg_save_offset))
11352 restore_regs_via_mov = true;
11353 else if (frame_pointer_needed
11354 && !frame.nregs
11355 && m->fs.sp_offset != frame.reg_save_offset)
11356 restore_regs_via_mov = true;
11357 else if (frame_pointer_needed
11358 && TARGET_USE_LEAVE
11359 && cfun->machine->use_fast_prologue_epilogue
11360 && frame.nregs == 1)
11361 restore_regs_via_mov = true;
11362 else
11363 restore_regs_via_mov = false;
11364
11365 if (restore_regs_via_mov || frame.nsseregs)
11366 {
11367 /* Ensure that the entire register save area is addressable via
11368 the stack pointer, if we will restore via sp. */
11369 if (TARGET_64BIT
11370 && m->fs.sp_offset > 0x7fffffff
11371 && !(m->fs.fp_valid || m->fs.drap_valid)
11372 && (frame.nsseregs + frame.nregs) != 0)
11373 {
11374 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11375 GEN_INT (m->fs.sp_offset
11376 - frame.sse_reg_save_offset),
11377 style,
11378 m->fs.cfa_reg == stack_pointer_rtx);
11379 }
11380 }
11381
11382 /* If there are any SSE registers to restore, then we have to do it
11383 via moves, since there's obviously no pop for SSE regs. */
11384 if (frame.nsseregs)
11385 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11386 style == 2);
11387
11388 if (restore_regs_via_mov)
11389 {
11390 rtx t;
11391
11392 if (frame.nregs)
11393 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11394
11395 /* eh_return epilogues need %ecx added to the stack pointer. */
11396 if (style == 2)
11397 {
11398 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11399
11400 /* Stack align doesn't work with eh_return. */
11401 gcc_assert (!stack_realign_drap);
11402 /* Neither does regparm nested functions. */
11403 gcc_assert (!ix86_static_chain_on_stack);
11404
11405 if (frame_pointer_needed)
11406 {
11407 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11408 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
11409 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11410
11411 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11412 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11413
11414 /* Note that we use SA as a temporary CFA, as the return
11415 address is at the proper place relative to it. We
11416 pretend this happens at the FP restore insn because
11417 prior to this insn the FP would be stored at the wrong
11418 offset relative to SA, and after this insn we have no
11419 other reasonable register to use for the CFA. We don't
11420 bother resetting the CFA to the SP for the duration of
11421 the return insn. */
11422 add_reg_note (insn, REG_CFA_DEF_CFA,
11423 plus_constant (Pmode, sa, UNITS_PER_WORD));
11424 ix86_add_queued_cfa_restore_notes (insn);
11425 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11426 RTX_FRAME_RELATED_P (insn) = 1;
11427
11428 m->fs.cfa_reg = sa;
11429 m->fs.cfa_offset = UNITS_PER_WORD;
11430 m->fs.fp_valid = false;
11431
11432 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11433 const0_rtx, style, false);
11434 }
11435 else
11436 {
11437 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11438 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
11439 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11440 ix86_add_queued_cfa_restore_notes (insn);
11441
11442 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
11443 if (m->fs.cfa_offset != UNITS_PER_WORD)
11444 {
11445 m->fs.cfa_offset = UNITS_PER_WORD;
11446 add_reg_note (insn, REG_CFA_DEF_CFA,
11447 plus_constant (Pmode, stack_pointer_rtx,
11448 UNITS_PER_WORD));
11449 RTX_FRAME_RELATED_P (insn) = 1;
11450 }
11451 }
11452 m->fs.sp_offset = UNITS_PER_WORD;
11453 m->fs.sp_valid = true;
11454 }
11455 }
11456 else
11457 {
11458 /* SEH requires that the function end with (1) a stack adjustment
11459 if necessary, (2) a sequence of pops, and (3) a return or
11460 jump instruction. Prevent insns from the function body from
11461 being scheduled into this sequence. */
11462 if (TARGET_SEH)
11463 {
11464 /* Prevent a catch region from being adjacent to the standard
11465 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11466 several other flags that would be interesting to test are
11467 not yet set up. */
11468 if (flag_non_call_exceptions)
11469 emit_insn (gen_nops (const1_rtx));
11470 else
11471 emit_insn (gen_blockage ());
11472 }
11473
11474 /* First step is to deallocate the stack frame so that we can
11475 pop the registers. Also do it on SEH target for very large
11476 frame as the emitted instructions aren't allowed by the ABI in
11477 epilogues. */
11478 if (!m->fs.sp_valid
11479 || (TARGET_SEH
11480 && (m->fs.sp_offset - frame.reg_save_offset
11481 >= SEH_MAX_FRAME_SIZE)))
11482 {
11483 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
11484 GEN_INT (m->fs.fp_offset
11485 - frame.reg_save_offset),
11486 style, false);
11487 }
11488 else if (m->fs.sp_offset != frame.reg_save_offset)
11489 {
11490 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11491 GEN_INT (m->fs.sp_offset
11492 - frame.reg_save_offset),
11493 style,
11494 m->fs.cfa_reg == stack_pointer_rtx);
11495 }
11496
11497 ix86_emit_restore_regs_using_pop ();
11498 }
11499
11500 /* If we used a stack pointer and haven't already got rid of it,
11501 then do so now. */
11502 if (m->fs.fp_valid)
11503 {
11504 /* If the stack pointer is valid and pointing at the frame
11505 pointer store address, then we only need a pop. */
11506 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
11507 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11508 /* Leave results in shorter dependency chains on CPUs that are
11509 able to grok it fast. */
11510 else if (TARGET_USE_LEAVE
11511 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
11512 || !cfun->machine->use_fast_prologue_epilogue)
11513 ix86_emit_leave ();
11514 else
11515 {
11516 pro_epilogue_adjust_stack (stack_pointer_rtx,
11517 hard_frame_pointer_rtx,
11518 const0_rtx, style, !using_drap);
11519 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11520 }
11521 }
11522
11523 if (using_drap)
11524 {
11525 int param_ptr_offset = UNITS_PER_WORD;
11526 rtx insn;
11527
11528 gcc_assert (stack_realign_drap);
11529
11530 if (ix86_static_chain_on_stack)
11531 param_ptr_offset += UNITS_PER_WORD;
11532 if (!call_used_regs[REGNO (crtl->drap_reg)])
11533 param_ptr_offset += UNITS_PER_WORD;
11534
11535 insn = emit_insn (gen_rtx_SET
11536 (VOIDmode, stack_pointer_rtx,
11537 gen_rtx_PLUS (Pmode,
11538 crtl->drap_reg,
11539 GEN_INT (-param_ptr_offset))));
11540 m->fs.cfa_reg = stack_pointer_rtx;
11541 m->fs.cfa_offset = param_ptr_offset;
11542 m->fs.sp_offset = param_ptr_offset;
11543 m->fs.realigned = false;
11544
11545 add_reg_note (insn, REG_CFA_DEF_CFA,
11546 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11547 GEN_INT (param_ptr_offset)));
11548 RTX_FRAME_RELATED_P (insn) = 1;
11549
11550 if (!call_used_regs[REGNO (crtl->drap_reg)])
11551 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
11552 }
11553
11554 /* At this point the stack pointer must be valid, and we must have
11555 restored all of the registers. We may not have deallocated the
11556 entire stack frame. We've delayed this until now because it may
11557 be possible to merge the local stack deallocation with the
11558 deallocation forced by ix86_static_chain_on_stack. */
11559 gcc_assert (m->fs.sp_valid);
11560 gcc_assert (!m->fs.fp_valid);
11561 gcc_assert (!m->fs.realigned);
11562 if (m->fs.sp_offset != UNITS_PER_WORD)
11563 {
11564 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11565 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
11566 style, true);
11567 }
11568 else
11569 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11570
11571 /* Sibcall epilogues don't want a return instruction. */
11572 if (style == 0)
11573 {
11574 m->fs = frame_state_save;
11575 return;
11576 }
11577
11578 if (crtl->args.pops_args && crtl->args.size)
11579 {
11580 rtx popc = GEN_INT (crtl->args.pops_args);
11581
11582 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11583 address, do explicit add, and jump indirectly to the caller. */
11584
11585 if (crtl->args.pops_args >= 65536)
11586 {
11587 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11588 rtx insn;
11589
11590 /* There is no "pascal" calling convention in any 64bit ABI. */
11591 gcc_assert (!TARGET_64BIT);
11592
11593 insn = emit_insn (gen_pop (ecx));
11594 m->fs.cfa_offset -= UNITS_PER_WORD;
11595 m->fs.sp_offset -= UNITS_PER_WORD;
11596
11597 add_reg_note (insn, REG_CFA_ADJUST_CFA,
11598 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
11599 add_reg_note (insn, REG_CFA_REGISTER,
11600 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11601 RTX_FRAME_RELATED_P (insn) = 1;
11602
11603 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11604 popc, -1, true);
11605 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
11606 }
11607 else
11608 emit_jump_insn (gen_simple_return_pop_internal (popc));
11609 }
11610 else
11611 emit_jump_insn (gen_simple_return_internal ());
11612
11613 /* Restore the state back to the state from the prologue,
11614 so that it's correct for the next epilogue. */
11615 m->fs = frame_state_save;
11616 }
11617
11618 /* Reset from the function's potential modifications. */
11619
11620 static void
11621 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11622 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
11623 {
11624 if (pic_offset_table_rtx)
11625 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11626 #if TARGET_MACHO
11627 /* Mach-O doesn't support labels at the end of objects, so if
11628 it looks like we might want one, insert a NOP. */
11629 {
11630 rtx insn = get_last_insn ();
11631 rtx deleted_debug_label = NULL_RTX;
11632 while (insn
11633 && NOTE_P (insn)
11634 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11635 {
11636 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11637 notes only, instead set their CODE_LABEL_NUMBER to -1,
11638 otherwise there would be code generation differences
11639 in between -g and -g0. */
11640 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11641 deleted_debug_label = insn;
11642 insn = PREV_INSN (insn);
11643 }
11644 if (insn
11645 && (LABEL_P (insn)
11646 || (NOTE_P (insn)
11647 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11648 fputs ("\tnop\n", file);
11649 else if (deleted_debug_label)
11650 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
11651 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11652 CODE_LABEL_NUMBER (insn) = -1;
11653 }
11654 #endif
11655
11656 }
11657
11658 /* Return a scratch register to use in the split stack prologue. The
11659 split stack prologue is used for -fsplit-stack. It is the first
11660 instructions in the function, even before the regular prologue.
11661 The scratch register can be any caller-saved register which is not
11662 used for parameters or for the static chain. */
11663
11664 static unsigned int
11665 split_stack_prologue_scratch_regno (void)
11666 {
11667 if (TARGET_64BIT)
11668 return R11_REG;
11669 else
11670 {
11671 bool is_fastcall, is_thiscall;
11672 int regparm;
11673
11674 is_fastcall = (lookup_attribute ("fastcall",
11675 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11676 != NULL);
11677 is_thiscall = (lookup_attribute ("thiscall",
11678 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11679 != NULL);
11680 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11681
11682 if (is_fastcall)
11683 {
11684 if (DECL_STATIC_CHAIN (cfun->decl))
11685 {
11686 sorry ("-fsplit-stack does not support fastcall with "
11687 "nested function");
11688 return INVALID_REGNUM;
11689 }
11690 return AX_REG;
11691 }
11692 else if (is_thiscall)
11693 {
11694 if (!DECL_STATIC_CHAIN (cfun->decl))
11695 return DX_REG;
11696 return AX_REG;
11697 }
11698 else if (regparm < 3)
11699 {
11700 if (!DECL_STATIC_CHAIN (cfun->decl))
11701 return CX_REG;
11702 else
11703 {
11704 if (regparm >= 2)
11705 {
11706 sorry ("-fsplit-stack does not support 2 register "
11707 " parameters for a nested function");
11708 return INVALID_REGNUM;
11709 }
11710 return DX_REG;
11711 }
11712 }
11713 else
11714 {
11715 /* FIXME: We could make this work by pushing a register
11716 around the addition and comparison. */
11717 sorry ("-fsplit-stack does not support 3 register parameters");
11718 return INVALID_REGNUM;
11719 }
11720 }
11721 }
11722
11723 /* A SYMBOL_REF for the function which allocates new stackspace for
11724 -fsplit-stack. */
11725
11726 static GTY(()) rtx split_stack_fn;
11727
11728 /* A SYMBOL_REF for the more stack function when using the large
11729 model. */
11730
11731 static GTY(()) rtx split_stack_fn_large;
11732
11733 /* Handle -fsplit-stack. These are the first instructions in the
11734 function, even before the regular prologue. */
11735
11736 void
11737 ix86_expand_split_stack_prologue (void)
11738 {
11739 struct ix86_frame frame;
11740 HOST_WIDE_INT allocate;
11741 unsigned HOST_WIDE_INT args_size;
11742 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11743 rtx scratch_reg = NULL_RTX;
11744 rtx varargs_label = NULL_RTX;
11745 rtx fn;
11746
11747 gcc_assert (flag_split_stack && reload_completed);
11748
11749 ix86_finalize_stack_realign_flags ();
11750 ix86_compute_frame_layout (&frame);
11751 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11752
11753 /* This is the label we will branch to if we have enough stack
11754 space. We expect the basic block reordering pass to reverse this
11755 branch if optimizing, so that we branch in the unlikely case. */
11756 label = gen_label_rtx ();
11757
11758 /* We need to compare the stack pointer minus the frame size with
11759 the stack boundary in the TCB. The stack boundary always gives
11760 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11761 can compare directly. Otherwise we need to do an addition. */
11762
11763 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11764 UNSPEC_STACK_CHECK);
11765 limit = gen_rtx_CONST (Pmode, limit);
11766 limit = gen_rtx_MEM (Pmode, limit);
11767 if (allocate < SPLIT_STACK_AVAILABLE)
11768 current = stack_pointer_rtx;
11769 else
11770 {
11771 unsigned int scratch_regno;
11772 rtx offset;
11773
11774 /* We need a scratch register to hold the stack pointer minus
11775 the required frame size. Since this is the very start of the
11776 function, the scratch register can be any caller-saved
11777 register which is not used for parameters. */
11778 offset = GEN_INT (- allocate);
11779 scratch_regno = split_stack_prologue_scratch_regno ();
11780 if (scratch_regno == INVALID_REGNUM)
11781 return;
11782 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11783 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11784 {
11785 /* We don't use ix86_gen_add3 in this case because it will
11786 want to split to lea, but when not optimizing the insn
11787 will not be split after this point. */
11788 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11789 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11790 offset)));
11791 }
11792 else
11793 {
11794 emit_move_insn (scratch_reg, offset);
11795 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
11796 stack_pointer_rtx));
11797 }
11798 current = scratch_reg;
11799 }
11800
11801 ix86_expand_branch (GEU, current, limit, label);
11802 jump_insn = get_last_insn ();
11803 JUMP_LABEL (jump_insn) = label;
11804
11805 /* Mark the jump as very likely to be taken. */
11806 add_int_reg_note (jump_insn, REG_BR_PROB,
11807 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
11808
11809 if (split_stack_fn == NULL_RTX)
11810 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11811 fn = split_stack_fn;
11812
11813 /* Get more stack space. We pass in the desired stack space and the
11814 size of the arguments to copy to the new stack. In 32-bit mode
11815 we push the parameters; __morestack will return on a new stack
11816 anyhow. In 64-bit mode we pass the parameters in r10 and
11817 r11. */
11818 allocate_rtx = GEN_INT (allocate);
11819 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11820 call_fusage = NULL_RTX;
11821 if (TARGET_64BIT)
11822 {
11823 rtx reg10, reg11;
11824
11825 reg10 = gen_rtx_REG (Pmode, R10_REG);
11826 reg11 = gen_rtx_REG (Pmode, R11_REG);
11827
11828 /* If this function uses a static chain, it will be in %r10.
11829 Preserve it across the call to __morestack. */
11830 if (DECL_STATIC_CHAIN (cfun->decl))
11831 {
11832 rtx rax;
11833
11834 rax = gen_rtx_REG (word_mode, AX_REG);
11835 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
11836 use_reg (&call_fusage, rax);
11837 }
11838
11839 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
11840 && !TARGET_PECOFF)
11841 {
11842 HOST_WIDE_INT argval;
11843
11844 gcc_assert (Pmode == DImode);
11845 /* When using the large model we need to load the address
11846 into a register, and we've run out of registers. So we
11847 switch to a different calling convention, and we call a
11848 different function: __morestack_large. We pass the
11849 argument size in the upper 32 bits of r10 and pass the
11850 frame size in the lower 32 bits. */
11851 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
11852 gcc_assert ((args_size & 0xffffffff) == args_size);
11853
11854 if (split_stack_fn_large == NULL_RTX)
11855 split_stack_fn_large =
11856 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
11857
11858 if (ix86_cmodel == CM_LARGE_PIC)
11859 {
11860 rtx label, x;
11861
11862 label = gen_label_rtx ();
11863 emit_label (label);
11864 LABEL_PRESERVE_P (label) = 1;
11865 emit_insn (gen_set_rip_rex64 (reg10, label));
11866 emit_insn (gen_set_got_offset_rex64 (reg11, label));
11867 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
11868 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
11869 UNSPEC_GOT);
11870 x = gen_rtx_CONST (Pmode, x);
11871 emit_move_insn (reg11, x);
11872 x = gen_rtx_PLUS (Pmode, reg10, reg11);
11873 x = gen_const_mem (Pmode, x);
11874 emit_move_insn (reg11, x);
11875 }
11876 else
11877 emit_move_insn (reg11, split_stack_fn_large);
11878
11879 fn = reg11;
11880
11881 argval = ((args_size << 16) << 16) + allocate;
11882 emit_move_insn (reg10, GEN_INT (argval));
11883 }
11884 else
11885 {
11886 emit_move_insn (reg10, allocate_rtx);
11887 emit_move_insn (reg11, GEN_INT (args_size));
11888 use_reg (&call_fusage, reg11);
11889 }
11890
11891 use_reg (&call_fusage, reg10);
11892 }
11893 else
11894 {
11895 emit_insn (gen_push (GEN_INT (args_size)));
11896 emit_insn (gen_push (allocate_rtx));
11897 }
11898 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
11899 GEN_INT (UNITS_PER_WORD), constm1_rtx,
11900 NULL_RTX, false);
11901 add_function_usage_to (call_insn, call_fusage);
11902
11903 /* In order to make call/return prediction work right, we now need
11904 to execute a return instruction. See
11905 libgcc/config/i386/morestack.S for the details on how this works.
11906
11907 For flow purposes gcc must not see this as a return
11908 instruction--we need control flow to continue at the subsequent
11909 label. Therefore, we use an unspec. */
11910 gcc_assert (crtl->args.pops_args < 65536);
11911 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
11912
11913 /* If we are in 64-bit mode and this function uses a static chain,
11914 we saved %r10 in %rax before calling _morestack. */
11915 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
11916 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11917 gen_rtx_REG (word_mode, AX_REG));
11918
11919 /* If this function calls va_start, we need to store a pointer to
11920 the arguments on the old stack, because they may not have been
11921 all copied to the new stack. At this point the old stack can be
11922 found at the frame pointer value used by __morestack, because
11923 __morestack has set that up before calling back to us. Here we
11924 store that pointer in a scratch register, and in
11925 ix86_expand_prologue we store the scratch register in a stack
11926 slot. */
11927 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11928 {
11929 unsigned int scratch_regno;
11930 rtx frame_reg;
11931 int words;
11932
11933 scratch_regno = split_stack_prologue_scratch_regno ();
11934 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11935 frame_reg = gen_rtx_REG (Pmode, BP_REG);
11936
11937 /* 64-bit:
11938 fp -> old fp value
11939 return address within this function
11940 return address of caller of this function
11941 stack arguments
11942 So we add three words to get to the stack arguments.
11943
11944 32-bit:
11945 fp -> old fp value
11946 return address within this function
11947 first argument to __morestack
11948 second argument to __morestack
11949 return address of caller of this function
11950 stack arguments
11951 So we add five words to get to the stack arguments.
11952 */
11953 words = TARGET_64BIT ? 3 : 5;
11954 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11955 gen_rtx_PLUS (Pmode, frame_reg,
11956 GEN_INT (words * UNITS_PER_WORD))));
11957
11958 varargs_label = gen_label_rtx ();
11959 emit_jump_insn (gen_jump (varargs_label));
11960 JUMP_LABEL (get_last_insn ()) = varargs_label;
11961
11962 emit_barrier ();
11963 }
11964
11965 emit_label (label);
11966 LABEL_NUSES (label) = 1;
11967
11968 /* If this function calls va_start, we now have to set the scratch
11969 register for the case where we do not call __morestack. In this
11970 case we need to set it based on the stack pointer. */
11971 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11972 {
11973 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11974 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11975 GEN_INT (UNITS_PER_WORD))));
11976
11977 emit_label (varargs_label);
11978 LABEL_NUSES (varargs_label) = 1;
11979 }
11980 }
11981
11982 /* We may have to tell the dataflow pass that the split stack prologue
11983 is initializing a scratch register. */
11984
11985 static void
11986 ix86_live_on_entry (bitmap regs)
11987 {
11988 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11989 {
11990 gcc_assert (flag_split_stack);
11991 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
11992 }
11993 }
11994 \f
11995 /* Extract the parts of an RTL expression that is a valid memory address
11996 for an instruction. Return 0 if the structure of the address is
11997 grossly off. Return -1 if the address contains ASHIFT, so it is not
11998 strictly valid, but still used for computing length of lea instruction. */
11999
12000 int
12001 ix86_decompose_address (rtx addr, struct ix86_address *out)
12002 {
12003 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12004 rtx base_reg, index_reg;
12005 HOST_WIDE_INT scale = 1;
12006 rtx scale_rtx = NULL_RTX;
12007 rtx tmp;
12008 int retval = 1;
12009 enum ix86_address_seg seg = SEG_DEFAULT;
12010
12011 /* Allow zero-extended SImode addresses,
12012 they will be emitted with addr32 prefix. */
12013 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12014 {
12015 if (GET_CODE (addr) == ZERO_EXTEND
12016 && GET_MODE (XEXP (addr, 0)) == SImode)
12017 {
12018 addr = XEXP (addr, 0);
12019 if (CONST_INT_P (addr))
12020 return 0;
12021 }
12022 else if (GET_CODE (addr) == AND
12023 && const_32bit_mask (XEXP (addr, 1), DImode))
12024 {
12025 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12026 if (addr == NULL_RTX)
12027 return 0;
12028
12029 if (CONST_INT_P (addr))
12030 return 0;
12031 }
12032 }
12033
12034 /* Allow SImode subregs of DImode addresses,
12035 they will be emitted with addr32 prefix. */
12036 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12037 {
12038 if (GET_CODE (addr) == SUBREG
12039 && GET_MODE (SUBREG_REG (addr)) == DImode)
12040 {
12041 addr = SUBREG_REG (addr);
12042 if (CONST_INT_P (addr))
12043 return 0;
12044 }
12045 }
12046
12047 if (REG_P (addr))
12048 base = addr;
12049 else if (GET_CODE (addr) == SUBREG)
12050 {
12051 if (REG_P (SUBREG_REG (addr)))
12052 base = addr;
12053 else
12054 return 0;
12055 }
12056 else if (GET_CODE (addr) == PLUS)
12057 {
12058 rtx addends[4], op;
12059 int n = 0, i;
12060
12061 op = addr;
12062 do
12063 {
12064 if (n >= 4)
12065 return 0;
12066 addends[n++] = XEXP (op, 1);
12067 op = XEXP (op, 0);
12068 }
12069 while (GET_CODE (op) == PLUS);
12070 if (n >= 4)
12071 return 0;
12072 addends[n] = op;
12073
12074 for (i = n; i >= 0; --i)
12075 {
12076 op = addends[i];
12077 switch (GET_CODE (op))
12078 {
12079 case MULT:
12080 if (index)
12081 return 0;
12082 index = XEXP (op, 0);
12083 scale_rtx = XEXP (op, 1);
12084 break;
12085
12086 case ASHIFT:
12087 if (index)
12088 return 0;
12089 index = XEXP (op, 0);
12090 tmp = XEXP (op, 1);
12091 if (!CONST_INT_P (tmp))
12092 return 0;
12093 scale = INTVAL (tmp);
12094 if ((unsigned HOST_WIDE_INT) scale > 3)
12095 return 0;
12096 scale = 1 << scale;
12097 break;
12098
12099 case ZERO_EXTEND:
12100 op = XEXP (op, 0);
12101 if (GET_CODE (op) != UNSPEC)
12102 return 0;
12103 /* FALLTHRU */
12104
12105 case UNSPEC:
12106 if (XINT (op, 1) == UNSPEC_TP
12107 && TARGET_TLS_DIRECT_SEG_REFS
12108 && seg == SEG_DEFAULT)
12109 seg = DEFAULT_TLS_SEG_REG;
12110 else
12111 return 0;
12112 break;
12113
12114 case SUBREG:
12115 if (!REG_P (SUBREG_REG (op)))
12116 return 0;
12117 /* FALLTHRU */
12118
12119 case REG:
12120 if (!base)
12121 base = op;
12122 else if (!index)
12123 index = op;
12124 else
12125 return 0;
12126 break;
12127
12128 case CONST:
12129 case CONST_INT:
12130 case SYMBOL_REF:
12131 case LABEL_REF:
12132 if (disp)
12133 return 0;
12134 disp = op;
12135 break;
12136
12137 default:
12138 return 0;
12139 }
12140 }
12141 }
12142 else if (GET_CODE (addr) == MULT)
12143 {
12144 index = XEXP (addr, 0); /* index*scale */
12145 scale_rtx = XEXP (addr, 1);
12146 }
12147 else if (GET_CODE (addr) == ASHIFT)
12148 {
12149 /* We're called for lea too, which implements ashift on occasion. */
12150 index = XEXP (addr, 0);
12151 tmp = XEXP (addr, 1);
12152 if (!CONST_INT_P (tmp))
12153 return 0;
12154 scale = INTVAL (tmp);
12155 if ((unsigned HOST_WIDE_INT) scale > 3)
12156 return 0;
12157 scale = 1 << scale;
12158 retval = -1;
12159 }
12160 else
12161 disp = addr; /* displacement */
12162
12163 if (index)
12164 {
12165 if (REG_P (index))
12166 ;
12167 else if (GET_CODE (index) == SUBREG
12168 && REG_P (SUBREG_REG (index)))
12169 ;
12170 else
12171 return 0;
12172 }
12173
12174 /* Extract the integral value of scale. */
12175 if (scale_rtx)
12176 {
12177 if (!CONST_INT_P (scale_rtx))
12178 return 0;
12179 scale = INTVAL (scale_rtx);
12180 }
12181
12182 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12183 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12184
12185 /* Avoid useless 0 displacement. */
12186 if (disp == const0_rtx && (base || index))
12187 disp = NULL_RTX;
12188
12189 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12190 if (base_reg && index_reg && scale == 1
12191 && (index_reg == arg_pointer_rtx
12192 || index_reg == frame_pointer_rtx
12193 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12194 {
12195 rtx tmp;
12196 tmp = base, base = index, index = tmp;
12197 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
12198 }
12199
12200 /* Special case: %ebp cannot be encoded as a base without a displacement.
12201 Similarly %r13. */
12202 if (!disp
12203 && base_reg
12204 && (base_reg == hard_frame_pointer_rtx
12205 || base_reg == frame_pointer_rtx
12206 || base_reg == arg_pointer_rtx
12207 || (REG_P (base_reg)
12208 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12209 || REGNO (base_reg) == R13_REG))))
12210 disp = const0_rtx;
12211
12212 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12213 Avoid this by transforming to [%esi+0].
12214 Reload calls address legitimization without cfun defined, so we need
12215 to test cfun for being non-NULL. */
12216 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12217 && base_reg && !index_reg && !disp
12218 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12219 disp = const0_rtx;
12220
12221 /* Special case: encode reg+reg instead of reg*2. */
12222 if (!base && index && scale == 2)
12223 base = index, base_reg = index_reg, scale = 1;
12224
12225 /* Special case: scaling cannot be encoded without base or displacement. */
12226 if (!base && !disp && index && scale != 1)
12227 disp = const0_rtx;
12228
12229 out->base = base;
12230 out->index = index;
12231 out->disp = disp;
12232 out->scale = scale;
12233 out->seg = seg;
12234
12235 return retval;
12236 }
12237 \f
12238 /* Return cost of the memory address x.
12239 For i386, it is better to use a complex address than let gcc copy
12240 the address into a reg and make a new pseudo. But not if the address
12241 requires to two regs - that would mean more pseudos with longer
12242 lifetimes. */
12243 static int
12244 ix86_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
12245 addr_space_t as ATTRIBUTE_UNUSED,
12246 bool speed ATTRIBUTE_UNUSED)
12247 {
12248 struct ix86_address parts;
12249 int cost = 1;
12250 int ok = ix86_decompose_address (x, &parts);
12251
12252 gcc_assert (ok);
12253
12254 if (parts.base && GET_CODE (parts.base) == SUBREG)
12255 parts.base = SUBREG_REG (parts.base);
12256 if (parts.index && GET_CODE (parts.index) == SUBREG)
12257 parts.index = SUBREG_REG (parts.index);
12258
12259 /* Attempt to minimize number of registers in the address. */
12260 if ((parts.base
12261 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
12262 || (parts.index
12263 && (!REG_P (parts.index)
12264 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
12265 cost++;
12266
12267 if (parts.base
12268 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12269 && parts.index
12270 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12271 && parts.base != parts.index)
12272 cost++;
12273
12274 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12275 since it's predecode logic can't detect the length of instructions
12276 and it degenerates to vector decoded. Increase cost of such
12277 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12278 to split such addresses or even refuse such addresses at all.
12279
12280 Following addressing modes are affected:
12281 [base+scale*index]
12282 [scale*index+disp]
12283 [base+index]
12284
12285 The first and last case may be avoidable by explicitly coding the zero in
12286 memory address, but I don't have AMD-K6 machine handy to check this
12287 theory. */
12288
12289 if (TARGET_K6
12290 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12291 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12292 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12293 cost += 10;
12294
12295 return cost;
12296 }
12297 \f
12298 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12299 this is used for to form addresses to local data when -fPIC is in
12300 use. */
12301
12302 static bool
12303 darwin_local_data_pic (rtx disp)
12304 {
12305 return (GET_CODE (disp) == UNSPEC
12306 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
12307 }
12308
12309 /* Determine if a given RTX is a valid constant. We already know this
12310 satisfies CONSTANT_P. */
12311
12312 static bool
12313 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
12314 {
12315 switch (GET_CODE (x))
12316 {
12317 case CONST:
12318 x = XEXP (x, 0);
12319
12320 if (GET_CODE (x) == PLUS)
12321 {
12322 if (!CONST_INT_P (XEXP (x, 1)))
12323 return false;
12324 x = XEXP (x, 0);
12325 }
12326
12327 if (TARGET_MACHO && darwin_local_data_pic (x))
12328 return true;
12329
12330 /* Only some unspecs are valid as "constants". */
12331 if (GET_CODE (x) == UNSPEC)
12332 switch (XINT (x, 1))
12333 {
12334 case UNSPEC_GOT:
12335 case UNSPEC_GOTOFF:
12336 case UNSPEC_PLTOFF:
12337 return TARGET_64BIT;
12338 case UNSPEC_TPOFF:
12339 case UNSPEC_NTPOFF:
12340 x = XVECEXP (x, 0, 0);
12341 return (GET_CODE (x) == SYMBOL_REF
12342 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12343 case UNSPEC_DTPOFF:
12344 x = XVECEXP (x, 0, 0);
12345 return (GET_CODE (x) == SYMBOL_REF
12346 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
12347 default:
12348 return false;
12349 }
12350
12351 /* We must have drilled down to a symbol. */
12352 if (GET_CODE (x) == LABEL_REF)
12353 return true;
12354 if (GET_CODE (x) != SYMBOL_REF)
12355 return false;
12356 /* FALLTHRU */
12357
12358 case SYMBOL_REF:
12359 /* TLS symbols are never valid. */
12360 if (SYMBOL_REF_TLS_MODEL (x))
12361 return false;
12362
12363 /* DLLIMPORT symbols are never valid. */
12364 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12365 && SYMBOL_REF_DLLIMPORT_P (x))
12366 return false;
12367
12368 #if TARGET_MACHO
12369 /* mdynamic-no-pic */
12370 if (MACHO_DYNAMIC_NO_PIC_P)
12371 return machopic_symbol_defined_p (x);
12372 #endif
12373 break;
12374
12375 case CONST_DOUBLE:
12376 if (GET_MODE (x) == TImode
12377 && x != CONST0_RTX (TImode)
12378 && !TARGET_64BIT)
12379 return false;
12380 break;
12381
12382 case CONST_VECTOR:
12383 if (!standard_sse_constant_p (x))
12384 return false;
12385
12386 default:
12387 break;
12388 }
12389
12390 /* Otherwise we handle everything else in the move patterns. */
12391 return true;
12392 }
12393
12394 /* Determine if it's legal to put X into the constant pool. This
12395 is not possible for the address of thread-local symbols, which
12396 is checked above. */
12397
12398 static bool
12399 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
12400 {
12401 /* We can always put integral constants and vectors in memory. */
12402 switch (GET_CODE (x))
12403 {
12404 case CONST_INT:
12405 case CONST_DOUBLE:
12406 case CONST_VECTOR:
12407 return false;
12408
12409 default:
12410 break;
12411 }
12412 return !ix86_legitimate_constant_p (mode, x);
12413 }
12414
12415 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12416 otherwise zero. */
12417
12418 static bool
12419 is_imported_p (rtx x)
12420 {
12421 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12422 || GET_CODE (x) != SYMBOL_REF)
12423 return false;
12424
12425 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
12426 }
12427
12428
12429 /* Nonzero if the constant value X is a legitimate general operand
12430 when generating PIC code. It is given that flag_pic is on and
12431 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12432
12433 bool
12434 legitimate_pic_operand_p (rtx x)
12435 {
12436 rtx inner;
12437
12438 switch (GET_CODE (x))
12439 {
12440 case CONST:
12441 inner = XEXP (x, 0);
12442 if (GET_CODE (inner) == PLUS
12443 && CONST_INT_P (XEXP (inner, 1)))
12444 inner = XEXP (inner, 0);
12445
12446 /* Only some unspecs are valid as "constants". */
12447 if (GET_CODE (inner) == UNSPEC)
12448 switch (XINT (inner, 1))
12449 {
12450 case UNSPEC_GOT:
12451 case UNSPEC_GOTOFF:
12452 case UNSPEC_PLTOFF:
12453 return TARGET_64BIT;
12454 case UNSPEC_TPOFF:
12455 x = XVECEXP (inner, 0, 0);
12456 return (GET_CODE (x) == SYMBOL_REF
12457 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12458 case UNSPEC_MACHOPIC_OFFSET:
12459 return legitimate_pic_address_disp_p (x);
12460 default:
12461 return false;
12462 }
12463 /* FALLTHRU */
12464
12465 case SYMBOL_REF:
12466 case LABEL_REF:
12467 return legitimate_pic_address_disp_p (x);
12468
12469 default:
12470 return true;
12471 }
12472 }
12473
12474 /* Determine if a given CONST RTX is a valid memory displacement
12475 in PIC mode. */
12476
12477 bool
12478 legitimate_pic_address_disp_p (rtx disp)
12479 {
12480 bool saw_plus;
12481
12482 /* In 64bit mode we can allow direct addresses of symbols and labels
12483 when they are not dynamic symbols. */
12484 if (TARGET_64BIT)
12485 {
12486 rtx op0 = disp, op1;
12487
12488 switch (GET_CODE (disp))
12489 {
12490 case LABEL_REF:
12491 return true;
12492
12493 case CONST:
12494 if (GET_CODE (XEXP (disp, 0)) != PLUS)
12495 break;
12496 op0 = XEXP (XEXP (disp, 0), 0);
12497 op1 = XEXP (XEXP (disp, 0), 1);
12498 if (!CONST_INT_P (op1)
12499 || INTVAL (op1) >= 16*1024*1024
12500 || INTVAL (op1) < -16*1024*1024)
12501 break;
12502 if (GET_CODE (op0) == LABEL_REF)
12503 return true;
12504 if (GET_CODE (op0) == CONST
12505 && GET_CODE (XEXP (op0, 0)) == UNSPEC
12506 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
12507 return true;
12508 if (GET_CODE (op0) == UNSPEC
12509 && XINT (op0, 1) == UNSPEC_PCREL)
12510 return true;
12511 if (GET_CODE (op0) != SYMBOL_REF)
12512 break;
12513 /* FALLTHRU */
12514
12515 case SYMBOL_REF:
12516 /* TLS references should always be enclosed in UNSPEC.
12517 The dllimported symbol needs always to be resolved. */
12518 if (SYMBOL_REF_TLS_MODEL (op0)
12519 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
12520 return false;
12521
12522 if (TARGET_PECOFF)
12523 {
12524 if (is_imported_p (op0))
12525 return true;
12526
12527 if (SYMBOL_REF_FAR_ADDR_P (op0)
12528 || !SYMBOL_REF_LOCAL_P (op0))
12529 break;
12530
12531 /* Function-symbols need to be resolved only for
12532 large-model.
12533 For the small-model we don't need to resolve anything
12534 here. */
12535 if ((ix86_cmodel != CM_LARGE_PIC
12536 && SYMBOL_REF_FUNCTION_P (op0))
12537 || ix86_cmodel == CM_SMALL_PIC)
12538 return true;
12539 /* Non-external symbols don't need to be resolved for
12540 large, and medium-model. */
12541 if ((ix86_cmodel == CM_LARGE_PIC
12542 || ix86_cmodel == CM_MEDIUM_PIC)
12543 && !SYMBOL_REF_EXTERNAL_P (op0))
12544 return true;
12545 }
12546 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
12547 && SYMBOL_REF_LOCAL_P (op0)
12548 && ix86_cmodel != CM_LARGE_PIC)
12549 return true;
12550 break;
12551
12552 default:
12553 break;
12554 }
12555 }
12556 if (GET_CODE (disp) != CONST)
12557 return false;
12558 disp = XEXP (disp, 0);
12559
12560 if (TARGET_64BIT)
12561 {
12562 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12563 of GOT tables. We should not need these anyway. */
12564 if (GET_CODE (disp) != UNSPEC
12565 || (XINT (disp, 1) != UNSPEC_GOTPCREL
12566 && XINT (disp, 1) != UNSPEC_GOTOFF
12567 && XINT (disp, 1) != UNSPEC_PCREL
12568 && XINT (disp, 1) != UNSPEC_PLTOFF))
12569 return false;
12570
12571 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
12572 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
12573 return false;
12574 return true;
12575 }
12576
12577 saw_plus = false;
12578 if (GET_CODE (disp) == PLUS)
12579 {
12580 if (!CONST_INT_P (XEXP (disp, 1)))
12581 return false;
12582 disp = XEXP (disp, 0);
12583 saw_plus = true;
12584 }
12585
12586 if (TARGET_MACHO && darwin_local_data_pic (disp))
12587 return true;
12588
12589 if (GET_CODE (disp) != UNSPEC)
12590 return false;
12591
12592 switch (XINT (disp, 1))
12593 {
12594 case UNSPEC_GOT:
12595 if (saw_plus)
12596 return false;
12597 /* We need to check for both symbols and labels because VxWorks loads
12598 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12599 details. */
12600 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12601 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
12602 case UNSPEC_GOTOFF:
12603 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12604 While ABI specify also 32bit relocation but we don't produce it in
12605 small PIC model at all. */
12606 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12607 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
12608 && !TARGET_64BIT)
12609 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12610 return false;
12611 case UNSPEC_GOTTPOFF:
12612 case UNSPEC_GOTNTPOFF:
12613 case UNSPEC_INDNTPOFF:
12614 if (saw_plus)
12615 return false;
12616 disp = XVECEXP (disp, 0, 0);
12617 return (GET_CODE (disp) == SYMBOL_REF
12618 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12619 case UNSPEC_NTPOFF:
12620 disp = XVECEXP (disp, 0, 0);
12621 return (GET_CODE (disp) == SYMBOL_REF
12622 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12623 case UNSPEC_DTPOFF:
12624 disp = XVECEXP (disp, 0, 0);
12625 return (GET_CODE (disp) == SYMBOL_REF
12626 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12627 }
12628
12629 return false;
12630 }
12631
12632 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12633 replace the input X, or the original X if no replacement is called for.
12634 The output parameter *WIN is 1 if the calling macro should goto WIN,
12635 0 if it should not. */
12636
12637 bool
12638 ix86_legitimize_reload_address (rtx x,
12639 enum machine_mode mode ATTRIBUTE_UNUSED,
12640 int opnum, int type,
12641 int ind_levels ATTRIBUTE_UNUSED)
12642 {
12643 /* Reload can generate:
12644
12645 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12646 (reg:DI 97))
12647 (reg:DI 2 cx))
12648
12649 This RTX is rejected from ix86_legitimate_address_p due to
12650 non-strictness of base register 97. Following this rejection,
12651 reload pushes all three components into separate registers,
12652 creating invalid memory address RTX.
12653
12654 Following code reloads only the invalid part of the
12655 memory address RTX. */
12656
12657 if (GET_CODE (x) == PLUS
12658 && REG_P (XEXP (x, 1))
12659 && GET_CODE (XEXP (x, 0)) == PLUS
12660 && REG_P (XEXP (XEXP (x, 0), 1)))
12661 {
12662 rtx base, index;
12663 bool something_reloaded = false;
12664
12665 base = XEXP (XEXP (x, 0), 1);
12666 if (!REG_OK_FOR_BASE_STRICT_P (base))
12667 {
12668 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
12669 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12670 opnum, (enum reload_type) type);
12671 something_reloaded = true;
12672 }
12673
12674 index = XEXP (x, 1);
12675 if (!REG_OK_FOR_INDEX_STRICT_P (index))
12676 {
12677 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
12678 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12679 opnum, (enum reload_type) type);
12680 something_reloaded = true;
12681 }
12682
12683 gcc_assert (something_reloaded);
12684 return true;
12685 }
12686
12687 return false;
12688 }
12689
12690 /* Determine if op is suitable RTX for an address register.
12691 Return naked register if a register or a register subreg is
12692 found, otherwise return NULL_RTX. */
12693
12694 static rtx
12695 ix86_validate_address_register (rtx op)
12696 {
12697 enum machine_mode mode = GET_MODE (op);
12698
12699 /* Only SImode or DImode registers can form the address. */
12700 if (mode != SImode && mode != DImode)
12701 return NULL_RTX;
12702
12703 if (REG_P (op))
12704 return op;
12705 else if (GET_CODE (op) == SUBREG)
12706 {
12707 rtx reg = SUBREG_REG (op);
12708
12709 if (!REG_P (reg))
12710 return NULL_RTX;
12711
12712 mode = GET_MODE (reg);
12713
12714 /* Don't allow SUBREGs that span more than a word. It can
12715 lead to spill failures when the register is one word out
12716 of a two word structure. */
12717 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
12718 return NULL_RTX;
12719
12720 /* Allow only SUBREGs of non-eliminable hard registers. */
12721 if (register_no_elim_operand (reg, mode))
12722 return reg;
12723 }
12724
12725 /* Op is not a register. */
12726 return NULL_RTX;
12727 }
12728
12729 /* Recognizes RTL expressions that are valid memory addresses for an
12730 instruction. The MODE argument is the machine mode for the MEM
12731 expression that wants to use this address.
12732
12733 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12734 convert common non-canonical forms to canonical form so that they will
12735 be recognized. */
12736
12737 static bool
12738 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
12739 rtx addr, bool strict)
12740 {
12741 struct ix86_address parts;
12742 rtx base, index, disp;
12743 HOST_WIDE_INT scale;
12744 enum ix86_address_seg seg;
12745
12746 if (ix86_decompose_address (addr, &parts) <= 0)
12747 /* Decomposition failed. */
12748 return false;
12749
12750 base = parts.base;
12751 index = parts.index;
12752 disp = parts.disp;
12753 scale = parts.scale;
12754 seg = parts.seg;
12755
12756 /* Validate base register. */
12757 if (base)
12758 {
12759 rtx reg = ix86_validate_address_register (base);
12760
12761 if (reg == NULL_RTX)
12762 return false;
12763
12764 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12765 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12766 /* Base is not valid. */
12767 return false;
12768 }
12769
12770 /* Validate index register. */
12771 if (index)
12772 {
12773 rtx reg = ix86_validate_address_register (index);
12774
12775 if (reg == NULL_RTX)
12776 return false;
12777
12778 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12779 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12780 /* Index is not valid. */
12781 return false;
12782 }
12783
12784 /* Index and base should have the same mode. */
12785 if (base && index
12786 && GET_MODE (base) != GET_MODE (index))
12787 return false;
12788
12789 /* Address override works only on the (%reg) part of %fs:(%reg). */
12790 if (seg != SEG_DEFAULT
12791 && ((base && GET_MODE (base) != word_mode)
12792 || (index && GET_MODE (index) != word_mode)))
12793 return false;
12794
12795 /* Validate scale factor. */
12796 if (scale != 1)
12797 {
12798 if (!index)
12799 /* Scale without index. */
12800 return false;
12801
12802 if (scale != 2 && scale != 4 && scale != 8)
12803 /* Scale is not a valid multiplier. */
12804 return false;
12805 }
12806
12807 /* Validate displacement. */
12808 if (disp)
12809 {
12810 if (GET_CODE (disp) == CONST
12811 && GET_CODE (XEXP (disp, 0)) == UNSPEC
12812 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12813 switch (XINT (XEXP (disp, 0), 1))
12814 {
12815 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12816 used. While ABI specify also 32bit relocations, we don't produce
12817 them at all and use IP relative instead. */
12818 case UNSPEC_GOT:
12819 case UNSPEC_GOTOFF:
12820 gcc_assert (flag_pic);
12821 if (!TARGET_64BIT)
12822 goto is_legitimate_pic;
12823
12824 /* 64bit address unspec. */
12825 return false;
12826
12827 case UNSPEC_GOTPCREL:
12828 case UNSPEC_PCREL:
12829 gcc_assert (flag_pic);
12830 goto is_legitimate_pic;
12831
12832 case UNSPEC_GOTTPOFF:
12833 case UNSPEC_GOTNTPOFF:
12834 case UNSPEC_INDNTPOFF:
12835 case UNSPEC_NTPOFF:
12836 case UNSPEC_DTPOFF:
12837 break;
12838
12839 case UNSPEC_STACK_CHECK:
12840 gcc_assert (flag_split_stack);
12841 break;
12842
12843 default:
12844 /* Invalid address unspec. */
12845 return false;
12846 }
12847
12848 else if (SYMBOLIC_CONST (disp)
12849 && (flag_pic
12850 || (TARGET_MACHO
12851 #if TARGET_MACHO
12852 && MACHOPIC_INDIRECT
12853 && !machopic_operand_p (disp)
12854 #endif
12855 )))
12856 {
12857
12858 is_legitimate_pic:
12859 if (TARGET_64BIT && (index || base))
12860 {
12861 /* foo@dtpoff(%rX) is ok. */
12862 if (GET_CODE (disp) != CONST
12863 || GET_CODE (XEXP (disp, 0)) != PLUS
12864 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
12865 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
12866 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
12867 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
12868 /* Non-constant pic memory reference. */
12869 return false;
12870 }
12871 else if ((!TARGET_MACHO || flag_pic)
12872 && ! legitimate_pic_address_disp_p (disp))
12873 /* Displacement is an invalid pic construct. */
12874 return false;
12875 #if TARGET_MACHO
12876 else if (MACHO_DYNAMIC_NO_PIC_P
12877 && !ix86_legitimate_constant_p (Pmode, disp))
12878 /* displacment must be referenced via non_lazy_pointer */
12879 return false;
12880 #endif
12881
12882 /* This code used to verify that a symbolic pic displacement
12883 includes the pic_offset_table_rtx register.
12884
12885 While this is good idea, unfortunately these constructs may
12886 be created by "adds using lea" optimization for incorrect
12887 code like:
12888
12889 int a;
12890 int foo(int i)
12891 {
12892 return *(&a+i);
12893 }
12894
12895 This code is nonsensical, but results in addressing
12896 GOT table with pic_offset_table_rtx base. We can't
12897 just refuse it easily, since it gets matched by
12898 "addsi3" pattern, that later gets split to lea in the
12899 case output register differs from input. While this
12900 can be handled by separate addsi pattern for this case
12901 that never results in lea, this seems to be easier and
12902 correct fix for crash to disable this test. */
12903 }
12904 else if (GET_CODE (disp) != LABEL_REF
12905 && !CONST_INT_P (disp)
12906 && (GET_CODE (disp) != CONST
12907 || !ix86_legitimate_constant_p (Pmode, disp))
12908 && (GET_CODE (disp) != SYMBOL_REF
12909 || !ix86_legitimate_constant_p (Pmode, disp)))
12910 /* Displacement is not constant. */
12911 return false;
12912 else if (TARGET_64BIT
12913 && !x86_64_immediate_operand (disp, VOIDmode))
12914 /* Displacement is out of range. */
12915 return false;
12916 /* In x32 mode, constant addresses are sign extended to 64bit, so
12917 we have to prevent addresses from 0x80000000 to 0xffffffff. */
12918 else if (TARGET_X32 && !(index || base)
12919 && CONST_INT_P (disp)
12920 && val_signbit_known_set_p (SImode, INTVAL (disp)))
12921 return false;
12922 }
12923
12924 /* Everything looks valid. */
12925 return true;
12926 }
12927
12928 /* Determine if a given RTX is a valid constant address. */
12929
12930 bool
12931 constant_address_p (rtx x)
12932 {
12933 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
12934 }
12935 \f
12936 /* Return a unique alias set for the GOT. */
12937
12938 static alias_set_type
12939 ix86_GOT_alias_set (void)
12940 {
12941 static alias_set_type set = -1;
12942 if (set == -1)
12943 set = new_alias_set ();
12944 return set;
12945 }
12946
12947 /* Return a legitimate reference for ORIG (an address) using the
12948 register REG. If REG is 0, a new pseudo is generated.
12949
12950 There are two types of references that must be handled:
12951
12952 1. Global data references must load the address from the GOT, via
12953 the PIC reg. An insn is emitted to do this load, and the reg is
12954 returned.
12955
12956 2. Static data references, constant pool addresses, and code labels
12957 compute the address as an offset from the GOT, whose base is in
12958 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12959 differentiate them from global data objects. The returned
12960 address is the PIC reg + an unspec constant.
12961
12962 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12963 reg also appears in the address. */
12964
12965 static rtx
12966 legitimize_pic_address (rtx orig, rtx reg)
12967 {
12968 rtx addr = orig;
12969 rtx new_rtx = orig;
12970
12971 #if TARGET_MACHO
12972 if (TARGET_MACHO && !TARGET_64BIT)
12973 {
12974 if (reg == 0)
12975 reg = gen_reg_rtx (Pmode);
12976 /* Use the generic Mach-O PIC machinery. */
12977 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
12978 }
12979 #endif
12980
12981 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12982 {
12983 rtx tmp = legitimize_pe_coff_symbol (addr, true);
12984 if (tmp)
12985 return tmp;
12986 }
12987
12988 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
12989 new_rtx = addr;
12990 else if (TARGET_64BIT && !TARGET_PECOFF
12991 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
12992 {
12993 rtx tmpreg;
12994 /* This symbol may be referenced via a displacement from the PIC
12995 base address (@GOTOFF). */
12996
12997 if (reload_in_progress)
12998 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12999 if (GET_CODE (addr) == CONST)
13000 addr = XEXP (addr, 0);
13001 if (GET_CODE (addr) == PLUS)
13002 {
13003 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13004 UNSPEC_GOTOFF);
13005 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13006 }
13007 else
13008 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13009 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13010 if (!reg)
13011 tmpreg = gen_reg_rtx (Pmode);
13012 else
13013 tmpreg = reg;
13014 emit_move_insn (tmpreg, new_rtx);
13015
13016 if (reg != 0)
13017 {
13018 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13019 tmpreg, 1, OPTAB_DIRECT);
13020 new_rtx = reg;
13021 }
13022 else
13023 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13024 }
13025 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13026 {
13027 /* This symbol may be referenced via a displacement from the PIC
13028 base address (@GOTOFF). */
13029
13030 if (reload_in_progress)
13031 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13032 if (GET_CODE (addr) == CONST)
13033 addr = XEXP (addr, 0);
13034 if (GET_CODE (addr) == PLUS)
13035 {
13036 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13037 UNSPEC_GOTOFF);
13038 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13039 }
13040 else
13041 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13042 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13043 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13044
13045 if (reg != 0)
13046 {
13047 emit_move_insn (reg, new_rtx);
13048 new_rtx = reg;
13049 }
13050 }
13051 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13052 /* We can't use @GOTOFF for text labels on VxWorks;
13053 see gotoff_operand. */
13054 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13055 {
13056 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13057 if (tmp)
13058 return tmp;
13059
13060 /* For x64 PE-COFF there is no GOT table. So we use address
13061 directly. */
13062 if (TARGET_64BIT && TARGET_PECOFF)
13063 {
13064 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13065 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13066
13067 if (reg == 0)
13068 reg = gen_reg_rtx (Pmode);
13069 emit_move_insn (reg, new_rtx);
13070 new_rtx = reg;
13071 }
13072 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13073 {
13074 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13075 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13076 new_rtx = gen_const_mem (Pmode, new_rtx);
13077 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13078
13079 if (reg == 0)
13080 reg = gen_reg_rtx (Pmode);
13081 /* Use directly gen_movsi, otherwise the address is loaded
13082 into register for CSE. We don't want to CSE this addresses,
13083 instead we CSE addresses from the GOT table, so skip this. */
13084 emit_insn (gen_movsi (reg, new_rtx));
13085 new_rtx = reg;
13086 }
13087 else
13088 {
13089 /* This symbol must be referenced via a load from the
13090 Global Offset Table (@GOT). */
13091
13092 if (reload_in_progress)
13093 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13094 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13095 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13096 if (TARGET_64BIT)
13097 new_rtx = force_reg (Pmode, new_rtx);
13098 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13099 new_rtx = gen_const_mem (Pmode, new_rtx);
13100 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13101
13102 if (reg == 0)
13103 reg = gen_reg_rtx (Pmode);
13104 emit_move_insn (reg, new_rtx);
13105 new_rtx = reg;
13106 }
13107 }
13108 else
13109 {
13110 if (CONST_INT_P (addr)
13111 && !x86_64_immediate_operand (addr, VOIDmode))
13112 {
13113 if (reg)
13114 {
13115 emit_move_insn (reg, addr);
13116 new_rtx = reg;
13117 }
13118 else
13119 new_rtx = force_reg (Pmode, addr);
13120 }
13121 else if (GET_CODE (addr) == CONST)
13122 {
13123 addr = XEXP (addr, 0);
13124
13125 /* We must match stuff we generate before. Assume the only
13126 unspecs that can get here are ours. Not that we could do
13127 anything with them anyway.... */
13128 if (GET_CODE (addr) == UNSPEC
13129 || (GET_CODE (addr) == PLUS
13130 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13131 return orig;
13132 gcc_assert (GET_CODE (addr) == PLUS);
13133 }
13134 if (GET_CODE (addr) == PLUS)
13135 {
13136 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13137
13138 /* Check first to see if this is a constant offset from a @GOTOFF
13139 symbol reference. */
13140 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13141 && CONST_INT_P (op1))
13142 {
13143 if (!TARGET_64BIT)
13144 {
13145 if (reload_in_progress)
13146 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13147 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13148 UNSPEC_GOTOFF);
13149 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13150 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13151 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13152
13153 if (reg != 0)
13154 {
13155 emit_move_insn (reg, new_rtx);
13156 new_rtx = reg;
13157 }
13158 }
13159 else
13160 {
13161 if (INTVAL (op1) < -16*1024*1024
13162 || INTVAL (op1) >= 16*1024*1024)
13163 {
13164 if (!x86_64_immediate_operand (op1, Pmode))
13165 op1 = force_reg (Pmode, op1);
13166 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13167 }
13168 }
13169 }
13170 else
13171 {
13172 rtx base = legitimize_pic_address (op0, reg);
13173 enum machine_mode mode = GET_MODE (base);
13174 new_rtx
13175 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13176
13177 if (CONST_INT_P (new_rtx))
13178 {
13179 if (INTVAL (new_rtx) < -16*1024*1024
13180 || INTVAL (new_rtx) >= 16*1024*1024)
13181 {
13182 if (!x86_64_immediate_operand (new_rtx, mode))
13183 new_rtx = force_reg (mode, new_rtx);
13184 new_rtx
13185 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13186 }
13187 else
13188 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13189 }
13190 else
13191 {
13192 if (GET_CODE (new_rtx) == PLUS
13193 && CONSTANT_P (XEXP (new_rtx, 1)))
13194 {
13195 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13196 new_rtx = XEXP (new_rtx, 1);
13197 }
13198 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13199 }
13200 }
13201 }
13202 }
13203 return new_rtx;
13204 }
13205 \f
13206 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13207
13208 static rtx
13209 get_thread_pointer (enum machine_mode tp_mode, bool to_reg)
13210 {
13211 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13212
13213 if (GET_MODE (tp) != tp_mode)
13214 {
13215 gcc_assert (GET_MODE (tp) == SImode);
13216 gcc_assert (tp_mode == DImode);
13217
13218 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13219 }
13220
13221 if (to_reg)
13222 tp = copy_to_mode_reg (tp_mode, tp);
13223
13224 return tp;
13225 }
13226
13227 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13228
13229 static GTY(()) rtx ix86_tls_symbol;
13230
13231 static rtx
13232 ix86_tls_get_addr (void)
13233 {
13234 if (!ix86_tls_symbol)
13235 {
13236 const char *sym
13237 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13238 ? "___tls_get_addr" : "__tls_get_addr");
13239
13240 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13241 }
13242
13243 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13244 {
13245 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13246 UNSPEC_PLTOFF);
13247 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13248 gen_rtx_CONST (Pmode, unspec));
13249 }
13250
13251 return ix86_tls_symbol;
13252 }
13253
13254 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13255
13256 static GTY(()) rtx ix86_tls_module_base_symbol;
13257
13258 rtx
13259 ix86_tls_module_base (void)
13260 {
13261 if (!ix86_tls_module_base_symbol)
13262 {
13263 ix86_tls_module_base_symbol
13264 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13265
13266 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13267 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13268 }
13269
13270 return ix86_tls_module_base_symbol;
13271 }
13272
13273 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13274 false if we expect this to be used for a memory address and true if
13275 we expect to load the address into a register. */
13276
13277 static rtx
13278 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13279 {
13280 rtx dest, base, off;
13281 rtx pic = NULL_RTX, tp = NULL_RTX;
13282 enum machine_mode tp_mode = Pmode;
13283 int type;
13284
13285 switch (model)
13286 {
13287 case TLS_MODEL_GLOBAL_DYNAMIC:
13288 dest = gen_reg_rtx (Pmode);
13289
13290 if (!TARGET_64BIT)
13291 {
13292 if (flag_pic && !TARGET_PECOFF)
13293 pic = pic_offset_table_rtx;
13294 else
13295 {
13296 pic = gen_reg_rtx (Pmode);
13297 emit_insn (gen_set_got (pic));
13298 }
13299 }
13300
13301 if (TARGET_GNU2_TLS)
13302 {
13303 if (TARGET_64BIT)
13304 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
13305 else
13306 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
13307
13308 tp = get_thread_pointer (Pmode, true);
13309 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
13310
13311 if (GET_MODE (x) != Pmode)
13312 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13313
13314 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13315 }
13316 else
13317 {
13318 rtx caddr = ix86_tls_get_addr ();
13319
13320 if (TARGET_64BIT)
13321 {
13322 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13323 rtx insns;
13324
13325 start_sequence ();
13326 emit_call_insn
13327 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
13328 insns = get_insns ();
13329 end_sequence ();
13330
13331 if (GET_MODE (x) != Pmode)
13332 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13333
13334 RTL_CONST_CALL_P (insns) = 1;
13335 emit_libcall_block (insns, dest, rax, x);
13336 }
13337 else
13338 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
13339 }
13340 break;
13341
13342 case TLS_MODEL_LOCAL_DYNAMIC:
13343 base = gen_reg_rtx (Pmode);
13344
13345 if (!TARGET_64BIT)
13346 {
13347 if (flag_pic)
13348 pic = pic_offset_table_rtx;
13349 else
13350 {
13351 pic = gen_reg_rtx (Pmode);
13352 emit_insn (gen_set_got (pic));
13353 }
13354 }
13355
13356 if (TARGET_GNU2_TLS)
13357 {
13358 rtx tmp = ix86_tls_module_base ();
13359
13360 if (TARGET_64BIT)
13361 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
13362 else
13363 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
13364
13365 tp = get_thread_pointer (Pmode, true);
13366 set_unique_reg_note (get_last_insn (), REG_EQUAL,
13367 gen_rtx_MINUS (Pmode, tmp, tp));
13368 }
13369 else
13370 {
13371 rtx caddr = ix86_tls_get_addr ();
13372
13373 if (TARGET_64BIT)
13374 {
13375 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13376 rtx insns, eqv;
13377
13378 start_sequence ();
13379 emit_call_insn
13380 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
13381 insns = get_insns ();
13382 end_sequence ();
13383
13384 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13385 share the LD_BASE result with other LD model accesses. */
13386 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
13387 UNSPEC_TLS_LD_BASE);
13388
13389 RTL_CONST_CALL_P (insns) = 1;
13390 emit_libcall_block (insns, base, rax, eqv);
13391 }
13392 else
13393 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
13394 }
13395
13396 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
13397 off = gen_rtx_CONST (Pmode, off);
13398
13399 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
13400
13401 if (TARGET_GNU2_TLS)
13402 {
13403 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
13404
13405 if (GET_MODE (x) != Pmode)
13406 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13407
13408 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13409 }
13410 break;
13411
13412 case TLS_MODEL_INITIAL_EXEC:
13413 if (TARGET_64BIT)
13414 {
13415 if (TARGET_SUN_TLS && !TARGET_X32)
13416 {
13417 /* The Sun linker took the AMD64 TLS spec literally
13418 and can only handle %rax as destination of the
13419 initial executable code sequence. */
13420
13421 dest = gen_reg_rtx (DImode);
13422 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
13423 return dest;
13424 }
13425
13426 /* Generate DImode references to avoid %fs:(%reg32)
13427 problems and linker IE->LE relaxation bug. */
13428 tp_mode = DImode;
13429 pic = NULL;
13430 type = UNSPEC_GOTNTPOFF;
13431 }
13432 else if (flag_pic)
13433 {
13434 if (reload_in_progress)
13435 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13436 pic = pic_offset_table_rtx;
13437 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
13438 }
13439 else if (!TARGET_ANY_GNU_TLS)
13440 {
13441 pic = gen_reg_rtx (Pmode);
13442 emit_insn (gen_set_got (pic));
13443 type = UNSPEC_GOTTPOFF;
13444 }
13445 else
13446 {
13447 pic = NULL;
13448 type = UNSPEC_INDNTPOFF;
13449 }
13450
13451 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
13452 off = gen_rtx_CONST (tp_mode, off);
13453 if (pic)
13454 off = gen_rtx_PLUS (tp_mode, pic, off);
13455 off = gen_const_mem (tp_mode, off);
13456 set_mem_alias_set (off, ix86_GOT_alias_set ());
13457
13458 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13459 {
13460 base = get_thread_pointer (tp_mode,
13461 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13462 off = force_reg (tp_mode, off);
13463 return gen_rtx_PLUS (tp_mode, base, off);
13464 }
13465 else
13466 {
13467 base = get_thread_pointer (Pmode, true);
13468 dest = gen_reg_rtx (Pmode);
13469 emit_insn (ix86_gen_sub3 (dest, base, off));
13470 }
13471 break;
13472
13473 case TLS_MODEL_LOCAL_EXEC:
13474 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
13475 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13476 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
13477 off = gen_rtx_CONST (Pmode, off);
13478
13479 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13480 {
13481 base = get_thread_pointer (Pmode,
13482 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13483 return gen_rtx_PLUS (Pmode, base, off);
13484 }
13485 else
13486 {
13487 base = get_thread_pointer (Pmode, true);
13488 dest = gen_reg_rtx (Pmode);
13489 emit_insn (ix86_gen_sub3 (dest, base, off));
13490 }
13491 break;
13492
13493 default:
13494 gcc_unreachable ();
13495 }
13496
13497 return dest;
13498 }
13499
13500 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13501 to symbol DECL if BEIMPORT is true. Otherwise create or return the
13502 unique refptr-DECL symbol corresponding to symbol DECL. */
13503
13504 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
13505 htab_t dllimport_map;
13506
13507 static tree
13508 get_dllimport_decl (tree decl, bool beimport)
13509 {
13510 struct tree_map *h, in;
13511 void **loc;
13512 const char *name;
13513 const char *prefix;
13514 size_t namelen, prefixlen;
13515 char *imp_name;
13516 tree to;
13517 rtx rtl;
13518
13519 if (!dllimport_map)
13520 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
13521
13522 in.hash = htab_hash_pointer (decl);
13523 in.base.from = decl;
13524 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
13525 h = (struct tree_map *) *loc;
13526 if (h)
13527 return h->to;
13528
13529 *loc = h = ggc_alloc_tree_map ();
13530 h->hash = in.hash;
13531 h->base.from = decl;
13532 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
13533 VAR_DECL, NULL, ptr_type_node);
13534 DECL_ARTIFICIAL (to) = 1;
13535 DECL_IGNORED_P (to) = 1;
13536 DECL_EXTERNAL (to) = 1;
13537 TREE_READONLY (to) = 1;
13538
13539 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
13540 name = targetm.strip_name_encoding (name);
13541 if (beimport)
13542 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
13543 ? "*__imp_" : "*__imp__";
13544 else
13545 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
13546 namelen = strlen (name);
13547 prefixlen = strlen (prefix);
13548 imp_name = (char *) alloca (namelen + prefixlen + 1);
13549 memcpy (imp_name, prefix, prefixlen);
13550 memcpy (imp_name + prefixlen, name, namelen + 1);
13551
13552 name = ggc_alloc_string (imp_name, namelen + prefixlen);
13553 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
13554 SET_SYMBOL_REF_DECL (rtl, to);
13555 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
13556 if (!beimport)
13557 {
13558 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
13559 #ifdef SUB_TARGET_RECORD_STUB
13560 SUB_TARGET_RECORD_STUB (name);
13561 #endif
13562 }
13563
13564 rtl = gen_const_mem (Pmode, rtl);
13565 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
13566
13567 SET_DECL_RTL (to, rtl);
13568 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
13569
13570 return to;
13571 }
13572
13573 /* Expand SYMBOL into its corresponding far-addresse symbol.
13574 WANT_REG is true if we require the result be a register. */
13575
13576 static rtx
13577 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
13578 {
13579 tree imp_decl;
13580 rtx x;
13581
13582 gcc_assert (SYMBOL_REF_DECL (symbol));
13583 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
13584
13585 x = DECL_RTL (imp_decl);
13586 if (want_reg)
13587 x = force_reg (Pmode, x);
13588 return x;
13589 }
13590
13591 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13592 true if we require the result be a register. */
13593
13594 static rtx
13595 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
13596 {
13597 tree imp_decl;
13598 rtx x;
13599
13600 gcc_assert (SYMBOL_REF_DECL (symbol));
13601 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
13602
13603 x = DECL_RTL (imp_decl);
13604 if (want_reg)
13605 x = force_reg (Pmode, x);
13606 return x;
13607 }
13608
13609 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
13610 is true if we require the result be a register. */
13611
13612 static rtx
13613 legitimize_pe_coff_symbol (rtx addr, bool inreg)
13614 {
13615 if (!TARGET_PECOFF)
13616 return NULL_RTX;
13617
13618 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13619 {
13620 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
13621 return legitimize_dllimport_symbol (addr, inreg);
13622 if (GET_CODE (addr) == CONST
13623 && GET_CODE (XEXP (addr, 0)) == PLUS
13624 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13625 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
13626 {
13627 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
13628 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13629 }
13630 }
13631
13632 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
13633 return NULL_RTX;
13634 if (GET_CODE (addr) == SYMBOL_REF
13635 && !is_imported_p (addr)
13636 && SYMBOL_REF_EXTERNAL_P (addr)
13637 && SYMBOL_REF_DECL (addr))
13638 return legitimize_pe_coff_extern_decl (addr, inreg);
13639
13640 if (GET_CODE (addr) == CONST
13641 && GET_CODE (XEXP (addr, 0)) == PLUS
13642 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13643 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
13644 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
13645 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
13646 {
13647 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
13648 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13649 }
13650 return NULL_RTX;
13651 }
13652
13653 /* Try machine-dependent ways of modifying an illegitimate address
13654 to be legitimate. If we find one, return the new, valid address.
13655 This macro is used in only one place: `memory_address' in explow.c.
13656
13657 OLDX is the address as it was before break_out_memory_refs was called.
13658 In some cases it is useful to look at this to decide what needs to be done.
13659
13660 It is always safe for this macro to do nothing. It exists to recognize
13661 opportunities to optimize the output.
13662
13663 For the 80386, we handle X+REG by loading X into a register R and
13664 using R+REG. R will go in a general reg and indexing will be used.
13665 However, if REG is a broken-out memory address or multiplication,
13666 nothing needs to be done because REG can certainly go in a general reg.
13667
13668 When -fpic is used, special handling is needed for symbolic references.
13669 See comments by legitimize_pic_address in i386.c for details. */
13670
13671 static rtx
13672 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
13673 enum machine_mode mode)
13674 {
13675 int changed = 0;
13676 unsigned log;
13677
13678 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
13679 if (log)
13680 return legitimize_tls_address (x, (enum tls_model) log, false);
13681 if (GET_CODE (x) == CONST
13682 && GET_CODE (XEXP (x, 0)) == PLUS
13683 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
13684 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
13685 {
13686 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
13687 (enum tls_model) log, false);
13688 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
13689 }
13690
13691 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13692 {
13693 rtx tmp = legitimize_pe_coff_symbol (x, true);
13694 if (tmp)
13695 return tmp;
13696 }
13697
13698 if (flag_pic && SYMBOLIC_CONST (x))
13699 return legitimize_pic_address (x, 0);
13700
13701 #if TARGET_MACHO
13702 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
13703 return machopic_indirect_data_reference (x, 0);
13704 #endif
13705
13706 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13707 if (GET_CODE (x) == ASHIFT
13708 && CONST_INT_P (XEXP (x, 1))
13709 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
13710 {
13711 changed = 1;
13712 log = INTVAL (XEXP (x, 1));
13713 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
13714 GEN_INT (1 << log));
13715 }
13716
13717 if (GET_CODE (x) == PLUS)
13718 {
13719 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13720
13721 if (GET_CODE (XEXP (x, 0)) == ASHIFT
13722 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13723 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
13724 {
13725 changed = 1;
13726 log = INTVAL (XEXP (XEXP (x, 0), 1));
13727 XEXP (x, 0) = gen_rtx_MULT (Pmode,
13728 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
13729 GEN_INT (1 << log));
13730 }
13731
13732 if (GET_CODE (XEXP (x, 1)) == ASHIFT
13733 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
13734 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
13735 {
13736 changed = 1;
13737 log = INTVAL (XEXP (XEXP (x, 1), 1));
13738 XEXP (x, 1) = gen_rtx_MULT (Pmode,
13739 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
13740 GEN_INT (1 << log));
13741 }
13742
13743 /* Put multiply first if it isn't already. */
13744 if (GET_CODE (XEXP (x, 1)) == MULT)
13745 {
13746 rtx tmp = XEXP (x, 0);
13747 XEXP (x, 0) = XEXP (x, 1);
13748 XEXP (x, 1) = tmp;
13749 changed = 1;
13750 }
13751
13752 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13753 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13754 created by virtual register instantiation, register elimination, and
13755 similar optimizations. */
13756 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
13757 {
13758 changed = 1;
13759 x = gen_rtx_PLUS (Pmode,
13760 gen_rtx_PLUS (Pmode, XEXP (x, 0),
13761 XEXP (XEXP (x, 1), 0)),
13762 XEXP (XEXP (x, 1), 1));
13763 }
13764
13765 /* Canonicalize
13766 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13767 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13768 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13769 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13770 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13771 && CONSTANT_P (XEXP (x, 1)))
13772 {
13773 rtx constant;
13774 rtx other = NULL_RTX;
13775
13776 if (CONST_INT_P (XEXP (x, 1)))
13777 {
13778 constant = XEXP (x, 1);
13779 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
13780 }
13781 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
13782 {
13783 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
13784 other = XEXP (x, 1);
13785 }
13786 else
13787 constant = 0;
13788
13789 if (constant)
13790 {
13791 changed = 1;
13792 x = gen_rtx_PLUS (Pmode,
13793 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
13794 XEXP (XEXP (XEXP (x, 0), 1), 0)),
13795 plus_constant (Pmode, other,
13796 INTVAL (constant)));
13797 }
13798 }
13799
13800 if (changed && ix86_legitimate_address_p (mode, x, false))
13801 return x;
13802
13803 if (GET_CODE (XEXP (x, 0)) == MULT)
13804 {
13805 changed = 1;
13806 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
13807 }
13808
13809 if (GET_CODE (XEXP (x, 1)) == MULT)
13810 {
13811 changed = 1;
13812 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
13813 }
13814
13815 if (changed
13816 && REG_P (XEXP (x, 1))
13817 && REG_P (XEXP (x, 0)))
13818 return x;
13819
13820 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
13821 {
13822 changed = 1;
13823 x = legitimize_pic_address (x, 0);
13824 }
13825
13826 if (changed && ix86_legitimate_address_p (mode, x, false))
13827 return x;
13828
13829 if (REG_P (XEXP (x, 0)))
13830 {
13831 rtx temp = gen_reg_rtx (Pmode);
13832 rtx val = force_operand (XEXP (x, 1), temp);
13833 if (val != temp)
13834 {
13835 val = convert_to_mode (Pmode, val, 1);
13836 emit_move_insn (temp, val);
13837 }
13838
13839 XEXP (x, 1) = temp;
13840 return x;
13841 }
13842
13843 else if (REG_P (XEXP (x, 1)))
13844 {
13845 rtx temp = gen_reg_rtx (Pmode);
13846 rtx val = force_operand (XEXP (x, 0), temp);
13847 if (val != temp)
13848 {
13849 val = convert_to_mode (Pmode, val, 1);
13850 emit_move_insn (temp, val);
13851 }
13852
13853 XEXP (x, 0) = temp;
13854 return x;
13855 }
13856 }
13857
13858 return x;
13859 }
13860 \f
13861 /* Print an integer constant expression in assembler syntax. Addition
13862 and subtraction are the only arithmetic that may appear in these
13863 expressions. FILE is the stdio stream to write to, X is the rtx, and
13864 CODE is the operand print code from the output string. */
13865
13866 static void
13867 output_pic_addr_const (FILE *file, rtx x, int code)
13868 {
13869 char buf[256];
13870
13871 switch (GET_CODE (x))
13872 {
13873 case PC:
13874 gcc_assert (flag_pic);
13875 putc ('.', file);
13876 break;
13877
13878 case SYMBOL_REF:
13879 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
13880 output_addr_const (file, x);
13881 else
13882 {
13883 const char *name = XSTR (x, 0);
13884
13885 /* Mark the decl as referenced so that cgraph will
13886 output the function. */
13887 if (SYMBOL_REF_DECL (x))
13888 mark_decl_referenced (SYMBOL_REF_DECL (x));
13889
13890 #if TARGET_MACHO
13891 if (MACHOPIC_INDIRECT
13892 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
13893 name = machopic_indirection_name (x, /*stub_p=*/true);
13894 #endif
13895 assemble_name (file, name);
13896 }
13897 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
13898 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
13899 fputs ("@PLT", file);
13900 break;
13901
13902 case LABEL_REF:
13903 x = XEXP (x, 0);
13904 /* FALLTHRU */
13905 case CODE_LABEL:
13906 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
13907 assemble_name (asm_out_file, buf);
13908 break;
13909
13910 case CONST_INT:
13911 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13912 break;
13913
13914 case CONST:
13915 /* This used to output parentheses around the expression,
13916 but that does not work on the 386 (either ATT or BSD assembler). */
13917 output_pic_addr_const (file, XEXP (x, 0), code);
13918 break;
13919
13920 case CONST_DOUBLE:
13921 if (GET_MODE (x) == VOIDmode)
13922 {
13923 /* We can use %d if the number is <32 bits and positive. */
13924 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
13925 fprintf (file, "0x%lx%08lx",
13926 (unsigned long) CONST_DOUBLE_HIGH (x),
13927 (unsigned long) CONST_DOUBLE_LOW (x));
13928 else
13929 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
13930 }
13931 else
13932 /* We can't handle floating point constants;
13933 TARGET_PRINT_OPERAND must handle them. */
13934 output_operand_lossage ("floating constant misused");
13935 break;
13936
13937 case PLUS:
13938 /* Some assemblers need integer constants to appear first. */
13939 if (CONST_INT_P (XEXP (x, 0)))
13940 {
13941 output_pic_addr_const (file, XEXP (x, 0), code);
13942 putc ('+', file);
13943 output_pic_addr_const (file, XEXP (x, 1), code);
13944 }
13945 else
13946 {
13947 gcc_assert (CONST_INT_P (XEXP (x, 1)));
13948 output_pic_addr_const (file, XEXP (x, 1), code);
13949 putc ('+', file);
13950 output_pic_addr_const (file, XEXP (x, 0), code);
13951 }
13952 break;
13953
13954 case MINUS:
13955 if (!TARGET_MACHO)
13956 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
13957 output_pic_addr_const (file, XEXP (x, 0), code);
13958 putc ('-', file);
13959 output_pic_addr_const (file, XEXP (x, 1), code);
13960 if (!TARGET_MACHO)
13961 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
13962 break;
13963
13964 case UNSPEC:
13965 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
13966 {
13967 bool f = i386_asm_output_addr_const_extra (file, x);
13968 gcc_assert (f);
13969 break;
13970 }
13971
13972 gcc_assert (XVECLEN (x, 0) == 1);
13973 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
13974 switch (XINT (x, 1))
13975 {
13976 case UNSPEC_GOT:
13977 fputs ("@GOT", file);
13978 break;
13979 case UNSPEC_GOTOFF:
13980 fputs ("@GOTOFF", file);
13981 break;
13982 case UNSPEC_PLTOFF:
13983 fputs ("@PLTOFF", file);
13984 break;
13985 case UNSPEC_PCREL:
13986 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13987 "(%rip)" : "[rip]", file);
13988 break;
13989 case UNSPEC_GOTPCREL:
13990 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13991 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
13992 break;
13993 case UNSPEC_GOTTPOFF:
13994 /* FIXME: This might be @TPOFF in Sun ld too. */
13995 fputs ("@gottpoff", file);
13996 break;
13997 case UNSPEC_TPOFF:
13998 fputs ("@tpoff", file);
13999 break;
14000 case UNSPEC_NTPOFF:
14001 if (TARGET_64BIT)
14002 fputs ("@tpoff", file);
14003 else
14004 fputs ("@ntpoff", file);
14005 break;
14006 case UNSPEC_DTPOFF:
14007 fputs ("@dtpoff", file);
14008 break;
14009 case UNSPEC_GOTNTPOFF:
14010 if (TARGET_64BIT)
14011 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14012 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14013 else
14014 fputs ("@gotntpoff", file);
14015 break;
14016 case UNSPEC_INDNTPOFF:
14017 fputs ("@indntpoff", file);
14018 break;
14019 #if TARGET_MACHO
14020 case UNSPEC_MACHOPIC_OFFSET:
14021 putc ('-', file);
14022 machopic_output_function_base_name (file);
14023 break;
14024 #endif
14025 default:
14026 output_operand_lossage ("invalid UNSPEC as operand");
14027 break;
14028 }
14029 break;
14030
14031 default:
14032 output_operand_lossage ("invalid expression as operand");
14033 }
14034 }
14035
14036 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14037 We need to emit DTP-relative relocations. */
14038
14039 static void ATTRIBUTE_UNUSED
14040 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14041 {
14042 fputs (ASM_LONG, file);
14043 output_addr_const (file, x);
14044 fputs ("@dtpoff", file);
14045 switch (size)
14046 {
14047 case 4:
14048 break;
14049 case 8:
14050 fputs (", 0", file);
14051 break;
14052 default:
14053 gcc_unreachable ();
14054 }
14055 }
14056
14057 /* Return true if X is a representation of the PIC register. This copes
14058 with calls from ix86_find_base_term, where the register might have
14059 been replaced by a cselib value. */
14060
14061 static bool
14062 ix86_pic_register_p (rtx x)
14063 {
14064 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14065 return (pic_offset_table_rtx
14066 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14067 else
14068 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14069 }
14070
14071 /* Helper function for ix86_delegitimize_address.
14072 Attempt to delegitimize TLS local-exec accesses. */
14073
14074 static rtx
14075 ix86_delegitimize_tls_address (rtx orig_x)
14076 {
14077 rtx x = orig_x, unspec;
14078 struct ix86_address addr;
14079
14080 if (!TARGET_TLS_DIRECT_SEG_REFS)
14081 return orig_x;
14082 if (MEM_P (x))
14083 x = XEXP (x, 0);
14084 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14085 return orig_x;
14086 if (ix86_decompose_address (x, &addr) == 0
14087 || addr.seg != DEFAULT_TLS_SEG_REG
14088 || addr.disp == NULL_RTX
14089 || GET_CODE (addr.disp) != CONST)
14090 return orig_x;
14091 unspec = XEXP (addr.disp, 0);
14092 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14093 unspec = XEXP (unspec, 0);
14094 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14095 return orig_x;
14096 x = XVECEXP (unspec, 0, 0);
14097 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14098 if (unspec != XEXP (addr.disp, 0))
14099 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14100 if (addr.index)
14101 {
14102 rtx idx = addr.index;
14103 if (addr.scale != 1)
14104 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14105 x = gen_rtx_PLUS (Pmode, idx, x);
14106 }
14107 if (addr.base)
14108 x = gen_rtx_PLUS (Pmode, addr.base, x);
14109 if (MEM_P (orig_x))
14110 x = replace_equiv_address_nv (orig_x, x);
14111 return x;
14112 }
14113
14114 /* In the name of slightly smaller debug output, and to cater to
14115 general assembler lossage, recognize PIC+GOTOFF and turn it back
14116 into a direct symbol reference.
14117
14118 On Darwin, this is necessary to avoid a crash, because Darwin
14119 has a different PIC label for each routine but the DWARF debugging
14120 information is not associated with any particular routine, so it's
14121 necessary to remove references to the PIC label from RTL stored by
14122 the DWARF output code. */
14123
14124 static rtx
14125 ix86_delegitimize_address (rtx x)
14126 {
14127 rtx orig_x = delegitimize_mem_from_attrs (x);
14128 /* addend is NULL or some rtx if x is something+GOTOFF where
14129 something doesn't include the PIC register. */
14130 rtx addend = NULL_RTX;
14131 /* reg_addend is NULL or a multiple of some register. */
14132 rtx reg_addend = NULL_RTX;
14133 /* const_addend is NULL or a const_int. */
14134 rtx const_addend = NULL_RTX;
14135 /* This is the result, or NULL. */
14136 rtx result = NULL_RTX;
14137
14138 x = orig_x;
14139
14140 if (MEM_P (x))
14141 x = XEXP (x, 0);
14142
14143 if (TARGET_64BIT)
14144 {
14145 if (GET_CODE (x) == CONST
14146 && GET_CODE (XEXP (x, 0)) == PLUS
14147 && GET_MODE (XEXP (x, 0)) == Pmode
14148 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14149 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14150 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14151 {
14152 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14153 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14154 if (MEM_P (orig_x))
14155 x = replace_equiv_address_nv (orig_x, x);
14156 return x;
14157 }
14158
14159 if (GET_CODE (x) == CONST
14160 && GET_CODE (XEXP (x, 0)) == UNSPEC
14161 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14162 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14163 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14164 {
14165 x = XVECEXP (XEXP (x, 0), 0, 0);
14166 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14167 {
14168 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14169 GET_MODE (x), 0);
14170 if (x == NULL_RTX)
14171 return orig_x;
14172 }
14173 return x;
14174 }
14175
14176 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14177 return ix86_delegitimize_tls_address (orig_x);
14178
14179 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14180 and -mcmodel=medium -fpic. */
14181 }
14182
14183 if (GET_CODE (x) != PLUS
14184 || GET_CODE (XEXP (x, 1)) != CONST)
14185 return ix86_delegitimize_tls_address (orig_x);
14186
14187 if (ix86_pic_register_p (XEXP (x, 0)))
14188 /* %ebx + GOT/GOTOFF */
14189 ;
14190 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14191 {
14192 /* %ebx + %reg * scale + GOT/GOTOFF */
14193 reg_addend = XEXP (x, 0);
14194 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14195 reg_addend = XEXP (reg_addend, 1);
14196 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14197 reg_addend = XEXP (reg_addend, 0);
14198 else
14199 {
14200 reg_addend = NULL_RTX;
14201 addend = XEXP (x, 0);
14202 }
14203 }
14204 else
14205 addend = XEXP (x, 0);
14206
14207 x = XEXP (XEXP (x, 1), 0);
14208 if (GET_CODE (x) == PLUS
14209 && CONST_INT_P (XEXP (x, 1)))
14210 {
14211 const_addend = XEXP (x, 1);
14212 x = XEXP (x, 0);
14213 }
14214
14215 if (GET_CODE (x) == UNSPEC
14216 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14217 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14218 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14219 && !MEM_P (orig_x) && !addend)))
14220 result = XVECEXP (x, 0, 0);
14221
14222 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14223 && !MEM_P (orig_x))
14224 result = XVECEXP (x, 0, 0);
14225
14226 if (! result)
14227 return ix86_delegitimize_tls_address (orig_x);
14228
14229 if (const_addend)
14230 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14231 if (reg_addend)
14232 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14233 if (addend)
14234 {
14235 /* If the rest of original X doesn't involve the PIC register, add
14236 addend and subtract pic_offset_table_rtx. This can happen e.g.
14237 for code like:
14238 leal (%ebx, %ecx, 4), %ecx
14239 ...
14240 movl foo@GOTOFF(%ecx), %edx
14241 in which case we return (%ecx - %ebx) + foo. */
14242 if (pic_offset_table_rtx)
14243 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14244 pic_offset_table_rtx),
14245 result);
14246 else
14247 return orig_x;
14248 }
14249 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14250 {
14251 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14252 if (result == NULL_RTX)
14253 return orig_x;
14254 }
14255 return result;
14256 }
14257
14258 /* If X is a machine specific address (i.e. a symbol or label being
14259 referenced as a displacement from the GOT implemented using an
14260 UNSPEC), then return the base term. Otherwise return X. */
14261
14262 rtx
14263 ix86_find_base_term (rtx x)
14264 {
14265 rtx term;
14266
14267 if (TARGET_64BIT)
14268 {
14269 if (GET_CODE (x) != CONST)
14270 return x;
14271 term = XEXP (x, 0);
14272 if (GET_CODE (term) == PLUS
14273 && (CONST_INT_P (XEXP (term, 1))
14274 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
14275 term = XEXP (term, 0);
14276 if (GET_CODE (term) != UNSPEC
14277 || (XINT (term, 1) != UNSPEC_GOTPCREL
14278 && XINT (term, 1) != UNSPEC_PCREL))
14279 return x;
14280
14281 return XVECEXP (term, 0, 0);
14282 }
14283
14284 return ix86_delegitimize_address (x);
14285 }
14286 \f
14287 static void
14288 put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
14289 bool fp, FILE *file)
14290 {
14291 const char *suffix;
14292
14293 if (mode == CCFPmode || mode == CCFPUmode)
14294 {
14295 code = ix86_fp_compare_code_to_integer (code);
14296 mode = CCmode;
14297 }
14298 if (reverse)
14299 code = reverse_condition (code);
14300
14301 switch (code)
14302 {
14303 case EQ:
14304 switch (mode)
14305 {
14306 case CCAmode:
14307 suffix = "a";
14308 break;
14309
14310 case CCCmode:
14311 suffix = "c";
14312 break;
14313
14314 case CCOmode:
14315 suffix = "o";
14316 break;
14317
14318 case CCSmode:
14319 suffix = "s";
14320 break;
14321
14322 default:
14323 suffix = "e";
14324 }
14325 break;
14326 case NE:
14327 switch (mode)
14328 {
14329 case CCAmode:
14330 suffix = "na";
14331 break;
14332
14333 case CCCmode:
14334 suffix = "nc";
14335 break;
14336
14337 case CCOmode:
14338 suffix = "no";
14339 break;
14340
14341 case CCSmode:
14342 suffix = "ns";
14343 break;
14344
14345 default:
14346 suffix = "ne";
14347 }
14348 break;
14349 case GT:
14350 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
14351 suffix = "g";
14352 break;
14353 case GTU:
14354 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14355 Those same assemblers have the same but opposite lossage on cmov. */
14356 if (mode == CCmode)
14357 suffix = fp ? "nbe" : "a";
14358 else
14359 gcc_unreachable ();
14360 break;
14361 case LT:
14362 switch (mode)
14363 {
14364 case CCNOmode:
14365 case CCGOCmode:
14366 suffix = "s";
14367 break;
14368
14369 case CCmode:
14370 case CCGCmode:
14371 suffix = "l";
14372 break;
14373
14374 default:
14375 gcc_unreachable ();
14376 }
14377 break;
14378 case LTU:
14379 if (mode == CCmode)
14380 suffix = "b";
14381 else if (mode == CCCmode)
14382 suffix = "c";
14383 else
14384 gcc_unreachable ();
14385 break;
14386 case GE:
14387 switch (mode)
14388 {
14389 case CCNOmode:
14390 case CCGOCmode:
14391 suffix = "ns";
14392 break;
14393
14394 case CCmode:
14395 case CCGCmode:
14396 suffix = "ge";
14397 break;
14398
14399 default:
14400 gcc_unreachable ();
14401 }
14402 break;
14403 case GEU:
14404 if (mode == CCmode)
14405 suffix = fp ? "nb" : "ae";
14406 else if (mode == CCCmode)
14407 suffix = "nc";
14408 else
14409 gcc_unreachable ();
14410 break;
14411 case LE:
14412 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
14413 suffix = "le";
14414 break;
14415 case LEU:
14416 if (mode == CCmode)
14417 suffix = "be";
14418 else
14419 gcc_unreachable ();
14420 break;
14421 case UNORDERED:
14422 suffix = fp ? "u" : "p";
14423 break;
14424 case ORDERED:
14425 suffix = fp ? "nu" : "np";
14426 break;
14427 default:
14428 gcc_unreachable ();
14429 }
14430 fputs (suffix, file);
14431 }
14432
14433 /* Print the name of register X to FILE based on its machine mode and number.
14434 If CODE is 'w', pretend the mode is HImode.
14435 If CODE is 'b', pretend the mode is QImode.
14436 If CODE is 'k', pretend the mode is SImode.
14437 If CODE is 'q', pretend the mode is DImode.
14438 If CODE is 'x', pretend the mode is V4SFmode.
14439 If CODE is 't', pretend the mode is V8SFmode.
14440 If CODE is 'g', pretend the mode is V16SFmode.
14441 If CODE is 'h', pretend the reg is the 'high' byte register.
14442 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
14443 If CODE is 'd', duplicate the operand for AVX instruction.
14444 */
14445
14446 void
14447 print_reg (rtx x, int code, FILE *file)
14448 {
14449 const char *reg;
14450 unsigned int regno;
14451 bool duplicated = code == 'd' && TARGET_AVX;
14452
14453 if (ASSEMBLER_DIALECT == ASM_ATT)
14454 putc ('%', file);
14455
14456 if (x == pc_rtx)
14457 {
14458 gcc_assert (TARGET_64BIT);
14459 fputs ("rip", file);
14460 return;
14461 }
14462
14463 regno = true_regnum (x);
14464 gcc_assert (regno != ARG_POINTER_REGNUM
14465 && regno != FRAME_POINTER_REGNUM
14466 && regno != FLAGS_REG
14467 && regno != FPSR_REG
14468 && regno != FPCR_REG);
14469
14470 if (code == 'w' || MMX_REG_P (x))
14471 code = 2;
14472 else if (code == 'b')
14473 code = 1;
14474 else if (code == 'k')
14475 code = 4;
14476 else if (code == 'q')
14477 code = 8;
14478 else if (code == 'y')
14479 code = 3;
14480 else if (code == 'h')
14481 code = 0;
14482 else if (code == 'x')
14483 code = 16;
14484 else if (code == 't')
14485 code = 32;
14486 else if (code == 'g')
14487 code = 64;
14488 else
14489 code = GET_MODE_SIZE (GET_MODE (x));
14490
14491 /* Irritatingly, AMD extended registers use different naming convention
14492 from the normal registers: "r%d[bwd]" */
14493 if (REX_INT_REGNO_P (regno))
14494 {
14495 gcc_assert (TARGET_64BIT);
14496 putc ('r', file);
14497 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
14498 switch (code)
14499 {
14500 case 0:
14501 error ("extended registers have no high halves");
14502 break;
14503 case 1:
14504 putc ('b', file);
14505 break;
14506 case 2:
14507 putc ('w', file);
14508 break;
14509 case 4:
14510 putc ('d', file);
14511 break;
14512 case 8:
14513 /* no suffix */
14514 break;
14515 default:
14516 error ("unsupported operand size for extended register");
14517 break;
14518 }
14519 return;
14520 }
14521
14522 reg = NULL;
14523 switch (code)
14524 {
14525 case 3:
14526 if (STACK_TOP_P (x))
14527 {
14528 reg = "st(0)";
14529 break;
14530 }
14531 /* FALLTHRU */
14532 case 8:
14533 case 4:
14534 case 12:
14535 if (! ANY_FP_REG_P (x))
14536 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
14537 /* FALLTHRU */
14538 case 16:
14539 case 2:
14540 normal:
14541 reg = hi_reg_name[regno];
14542 break;
14543 case 1:
14544 if (regno >= ARRAY_SIZE (qi_reg_name))
14545 goto normal;
14546 reg = qi_reg_name[regno];
14547 break;
14548 case 0:
14549 if (regno >= ARRAY_SIZE (qi_high_reg_name))
14550 goto normal;
14551 reg = qi_high_reg_name[regno];
14552 break;
14553 case 32:
14554 if (SSE_REG_P (x))
14555 {
14556 gcc_assert (!duplicated);
14557 putc ('y', file);
14558 fputs (hi_reg_name[regno] + 1, file);
14559 return;
14560 }
14561 case 64:
14562 if (SSE_REG_P (x))
14563 {
14564 gcc_assert (!duplicated);
14565 putc ('z', file);
14566 fputs (hi_reg_name[REGNO (x)] + 1, file);
14567 return;
14568 }
14569 break;
14570 default:
14571 gcc_unreachable ();
14572 }
14573
14574 fputs (reg, file);
14575 if (duplicated)
14576 {
14577 if (ASSEMBLER_DIALECT == ASM_ATT)
14578 fprintf (file, ", %%%s", reg);
14579 else
14580 fprintf (file, ", %s", reg);
14581 }
14582 }
14583
14584 /* Locate some local-dynamic symbol still in use by this function
14585 so that we can print its name in some tls_local_dynamic_base
14586 pattern. */
14587
14588 static int
14589 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
14590 {
14591 rtx x = *px;
14592
14593 if (GET_CODE (x) == SYMBOL_REF
14594 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
14595 {
14596 cfun->machine->some_ld_name = XSTR (x, 0);
14597 return 1;
14598 }
14599
14600 return 0;
14601 }
14602
14603 static const char *
14604 get_some_local_dynamic_name (void)
14605 {
14606 rtx insn;
14607
14608 if (cfun->machine->some_ld_name)
14609 return cfun->machine->some_ld_name;
14610
14611 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
14612 if (NONDEBUG_INSN_P (insn)
14613 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
14614 return cfun->machine->some_ld_name;
14615
14616 return NULL;
14617 }
14618
14619 /* Meaning of CODE:
14620 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14621 C -- print opcode suffix for set/cmov insn.
14622 c -- like C, but print reversed condition
14623 F,f -- likewise, but for floating-point.
14624 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14625 otherwise nothing
14626 R -- print the prefix for register names.
14627 z -- print the opcode suffix for the size of the current operand.
14628 Z -- likewise, with special suffixes for x87 instructions.
14629 * -- print a star (in certain assembler syntax)
14630 A -- print an absolute memory reference.
14631 E -- print address with DImode register names if TARGET_64BIT.
14632 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14633 s -- print a shift double count, followed by the assemblers argument
14634 delimiter.
14635 b -- print the QImode name of the register for the indicated operand.
14636 %b0 would print %al if operands[0] is reg 0.
14637 w -- likewise, print the HImode name of the register.
14638 k -- likewise, print the SImode name of the register.
14639 q -- likewise, print the DImode name of the register.
14640 x -- likewise, print the V4SFmode name of the register.
14641 t -- likewise, print the V8SFmode name of the register.
14642 g -- likewise, print the V16SFmode name of the register.
14643 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14644 y -- print "st(0)" instead of "st" as a register.
14645 d -- print duplicated register operand for AVX instruction.
14646 D -- print condition for SSE cmp instruction.
14647 P -- if PIC, print an @PLT suffix.
14648 p -- print raw symbol name.
14649 X -- don't print any sort of PIC '@' suffix for a symbol.
14650 & -- print some in-use local-dynamic symbol name.
14651 H -- print a memory address offset by 8; used for sse high-parts
14652 Y -- print condition for XOP pcom* instruction.
14653 + -- print a branch hint as 'cs' or 'ds' prefix
14654 ; -- print a semicolon (after prefixes due to bug in older gas).
14655 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14656 @ -- print a segment register of thread base pointer load
14657 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14658 */
14659
14660 void
14661 ix86_print_operand (FILE *file, rtx x, int code)
14662 {
14663 if (code)
14664 {
14665 switch (code)
14666 {
14667 case 'A':
14668 switch (ASSEMBLER_DIALECT)
14669 {
14670 case ASM_ATT:
14671 putc ('*', file);
14672 break;
14673
14674 case ASM_INTEL:
14675 /* Intel syntax. For absolute addresses, registers should not
14676 be surrounded by braces. */
14677 if (!REG_P (x))
14678 {
14679 putc ('[', file);
14680 ix86_print_operand (file, x, 0);
14681 putc (']', file);
14682 return;
14683 }
14684 break;
14685
14686 default:
14687 gcc_unreachable ();
14688 }
14689
14690 ix86_print_operand (file, x, 0);
14691 return;
14692
14693 case 'E':
14694 /* Wrap address in an UNSPEC to declare special handling. */
14695 if (TARGET_64BIT)
14696 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
14697
14698 output_address (x);
14699 return;
14700
14701 case 'L':
14702 if (ASSEMBLER_DIALECT == ASM_ATT)
14703 putc ('l', file);
14704 return;
14705
14706 case 'W':
14707 if (ASSEMBLER_DIALECT == ASM_ATT)
14708 putc ('w', file);
14709 return;
14710
14711 case 'B':
14712 if (ASSEMBLER_DIALECT == ASM_ATT)
14713 putc ('b', file);
14714 return;
14715
14716 case 'Q':
14717 if (ASSEMBLER_DIALECT == ASM_ATT)
14718 putc ('l', file);
14719 return;
14720
14721 case 'S':
14722 if (ASSEMBLER_DIALECT == ASM_ATT)
14723 putc ('s', file);
14724 return;
14725
14726 case 'T':
14727 if (ASSEMBLER_DIALECT == ASM_ATT)
14728 putc ('t', file);
14729 return;
14730
14731 case 'O':
14732 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14733 if (ASSEMBLER_DIALECT != ASM_ATT)
14734 return;
14735
14736 switch (GET_MODE_SIZE (GET_MODE (x)))
14737 {
14738 case 2:
14739 putc ('w', file);
14740 break;
14741
14742 case 4:
14743 putc ('l', file);
14744 break;
14745
14746 case 8:
14747 putc ('q', file);
14748 break;
14749
14750 default:
14751 output_operand_lossage
14752 ("invalid operand size for operand code 'O'");
14753 return;
14754 }
14755
14756 putc ('.', file);
14757 #endif
14758 return;
14759
14760 case 'z':
14761 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14762 {
14763 /* Opcodes don't get size suffixes if using Intel opcodes. */
14764 if (ASSEMBLER_DIALECT == ASM_INTEL)
14765 return;
14766
14767 switch (GET_MODE_SIZE (GET_MODE (x)))
14768 {
14769 case 1:
14770 putc ('b', file);
14771 return;
14772
14773 case 2:
14774 putc ('w', file);
14775 return;
14776
14777 case 4:
14778 putc ('l', file);
14779 return;
14780
14781 case 8:
14782 putc ('q', file);
14783 return;
14784
14785 default:
14786 output_operand_lossage
14787 ("invalid operand size for operand code 'z'");
14788 return;
14789 }
14790 }
14791
14792 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14793 warning
14794 (0, "non-integer operand used with operand code 'z'");
14795 /* FALLTHRU */
14796
14797 case 'Z':
14798 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14799 if (ASSEMBLER_DIALECT == ASM_INTEL)
14800 return;
14801
14802 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14803 {
14804 switch (GET_MODE_SIZE (GET_MODE (x)))
14805 {
14806 case 2:
14807 #ifdef HAVE_AS_IX86_FILDS
14808 putc ('s', file);
14809 #endif
14810 return;
14811
14812 case 4:
14813 putc ('l', file);
14814 return;
14815
14816 case 8:
14817 #ifdef HAVE_AS_IX86_FILDQ
14818 putc ('q', file);
14819 #else
14820 fputs ("ll", file);
14821 #endif
14822 return;
14823
14824 default:
14825 break;
14826 }
14827 }
14828 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14829 {
14830 /* 387 opcodes don't get size suffixes
14831 if the operands are registers. */
14832 if (STACK_REG_P (x))
14833 return;
14834
14835 switch (GET_MODE_SIZE (GET_MODE (x)))
14836 {
14837 case 4:
14838 putc ('s', file);
14839 return;
14840
14841 case 8:
14842 putc ('l', file);
14843 return;
14844
14845 case 12:
14846 case 16:
14847 putc ('t', file);
14848 return;
14849
14850 default:
14851 break;
14852 }
14853 }
14854 else
14855 {
14856 output_operand_lossage
14857 ("invalid operand type used with operand code 'Z'");
14858 return;
14859 }
14860
14861 output_operand_lossage
14862 ("invalid operand size for operand code 'Z'");
14863 return;
14864
14865 case 'd':
14866 case 'b':
14867 case 'w':
14868 case 'k':
14869 case 'q':
14870 case 'h':
14871 case 't':
14872 case 'g':
14873 case 'y':
14874 case 'x':
14875 case 'X':
14876 case 'P':
14877 case 'p':
14878 break;
14879
14880 case 's':
14881 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
14882 {
14883 ix86_print_operand (file, x, 0);
14884 fputs (", ", file);
14885 }
14886 return;
14887
14888 case 'Y':
14889 switch (GET_CODE (x))
14890 {
14891 case NE:
14892 fputs ("neq", file);
14893 break;
14894 case EQ:
14895 fputs ("eq", file);
14896 break;
14897 case GE:
14898 case GEU:
14899 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
14900 break;
14901 case GT:
14902 case GTU:
14903 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
14904 break;
14905 case LE:
14906 case LEU:
14907 fputs ("le", file);
14908 break;
14909 case LT:
14910 case LTU:
14911 fputs ("lt", file);
14912 break;
14913 case UNORDERED:
14914 fputs ("unord", file);
14915 break;
14916 case ORDERED:
14917 fputs ("ord", file);
14918 break;
14919 case UNEQ:
14920 fputs ("ueq", file);
14921 break;
14922 case UNGE:
14923 fputs ("nlt", file);
14924 break;
14925 case UNGT:
14926 fputs ("nle", file);
14927 break;
14928 case UNLE:
14929 fputs ("ule", file);
14930 break;
14931 case UNLT:
14932 fputs ("ult", file);
14933 break;
14934 case LTGT:
14935 fputs ("une", file);
14936 break;
14937 default:
14938 output_operand_lossage ("operand is not a condition code, "
14939 "invalid operand code 'Y'");
14940 return;
14941 }
14942 return;
14943
14944 case 'D':
14945 /* Little bit of braindamage here. The SSE compare instructions
14946 does use completely different names for the comparisons that the
14947 fp conditional moves. */
14948 switch (GET_CODE (x))
14949 {
14950 case UNEQ:
14951 if (TARGET_AVX)
14952 {
14953 fputs ("eq_us", file);
14954 break;
14955 }
14956 case EQ:
14957 fputs ("eq", file);
14958 break;
14959 case UNLT:
14960 if (TARGET_AVX)
14961 {
14962 fputs ("nge", file);
14963 break;
14964 }
14965 case LT:
14966 fputs ("lt", file);
14967 break;
14968 case UNLE:
14969 if (TARGET_AVX)
14970 {
14971 fputs ("ngt", file);
14972 break;
14973 }
14974 case LE:
14975 fputs ("le", file);
14976 break;
14977 case UNORDERED:
14978 fputs ("unord", file);
14979 break;
14980 case LTGT:
14981 if (TARGET_AVX)
14982 {
14983 fputs ("neq_oq", file);
14984 break;
14985 }
14986 case NE:
14987 fputs ("neq", file);
14988 break;
14989 case GE:
14990 if (TARGET_AVX)
14991 {
14992 fputs ("ge", file);
14993 break;
14994 }
14995 case UNGE:
14996 fputs ("nlt", file);
14997 break;
14998 case GT:
14999 if (TARGET_AVX)
15000 {
15001 fputs ("gt", file);
15002 break;
15003 }
15004 case UNGT:
15005 fputs ("nle", file);
15006 break;
15007 case ORDERED:
15008 fputs ("ord", file);
15009 break;
15010 default:
15011 output_operand_lossage ("operand is not a condition code, "
15012 "invalid operand code 'D'");
15013 return;
15014 }
15015 return;
15016
15017 case 'F':
15018 case 'f':
15019 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15020 if (ASSEMBLER_DIALECT == ASM_ATT)
15021 putc ('.', file);
15022 #endif
15023
15024 case 'C':
15025 case 'c':
15026 if (!COMPARISON_P (x))
15027 {
15028 output_operand_lossage ("operand is not a condition code, "
15029 "invalid operand code '%c'", code);
15030 return;
15031 }
15032 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15033 code == 'c' || code == 'f',
15034 code == 'F' || code == 'f',
15035 file);
15036 return;
15037
15038 case 'H':
15039 if (!offsettable_memref_p (x))
15040 {
15041 output_operand_lossage ("operand is not an offsettable memory "
15042 "reference, invalid operand code 'H'");
15043 return;
15044 }
15045 /* It doesn't actually matter what mode we use here, as we're
15046 only going to use this for printing. */
15047 x = adjust_address_nv (x, DImode, 8);
15048 /* Output 'qword ptr' for intel assembler dialect. */
15049 if (ASSEMBLER_DIALECT == ASM_INTEL)
15050 code = 'q';
15051 break;
15052
15053 case 'K':
15054 gcc_assert (CONST_INT_P (x));
15055
15056 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15057 #ifdef HAVE_AS_IX86_HLE
15058 fputs ("xacquire ", file);
15059 #else
15060 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15061 #endif
15062 else if (INTVAL (x) & IX86_HLE_RELEASE)
15063 #ifdef HAVE_AS_IX86_HLE
15064 fputs ("xrelease ", file);
15065 #else
15066 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15067 #endif
15068 /* We do not want to print value of the operand. */
15069 return;
15070
15071 case 'N':
15072 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15073 fputs ("{z}", file);
15074 return;
15075
15076 case 'R':
15077 gcc_assert (CONST_INT_P (x));
15078
15079 if (ASSEMBLER_DIALECT == ASM_INTEL)
15080 fputs (", ", file);
15081
15082 switch (INTVAL (x))
15083 {
15084 case ROUND_NEAREST_INT:
15085 fputs ("{rn-sae}", file);
15086 break;
15087 case ROUND_NEG_INF:
15088 fputs ("{rd-sae}", file);
15089 break;
15090 case ROUND_POS_INF:
15091 fputs ("{ru-sae}", file);
15092 break;
15093 case ROUND_ZERO:
15094 fputs ("{rz-sae}", file);
15095 break;
15096 case ROUND_SAE:
15097 fputs ("{sae}", file);
15098 break;
15099 default:
15100 gcc_unreachable ();
15101 }
15102
15103 if (ASSEMBLER_DIALECT == ASM_ATT)
15104 fputs (", ", file);
15105
15106 return;
15107
15108 case '*':
15109 if (ASSEMBLER_DIALECT == ASM_ATT)
15110 putc ('*', file);
15111 return;
15112
15113 case '&':
15114 {
15115 const char *name = get_some_local_dynamic_name ();
15116 if (name == NULL)
15117 output_operand_lossage ("'%%&' used without any "
15118 "local dynamic TLS references");
15119 else
15120 assemble_name (file, name);
15121 return;
15122 }
15123
15124 case '+':
15125 {
15126 rtx x;
15127
15128 if (!optimize
15129 || optimize_function_for_size_p (cfun)
15130 || !TARGET_BRANCH_PREDICTION_HINTS)
15131 return;
15132
15133 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15134 if (x)
15135 {
15136 int pred_val = XINT (x, 0);
15137
15138 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15139 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15140 {
15141 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15142 bool cputaken
15143 = final_forward_branch_p (current_output_insn) == 0;
15144
15145 /* Emit hints only in the case default branch prediction
15146 heuristics would fail. */
15147 if (taken != cputaken)
15148 {
15149 /* We use 3e (DS) prefix for taken branches and
15150 2e (CS) prefix for not taken branches. */
15151 if (taken)
15152 fputs ("ds ; ", file);
15153 else
15154 fputs ("cs ; ", file);
15155 }
15156 }
15157 }
15158 return;
15159 }
15160
15161 case ';':
15162 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15163 putc (';', file);
15164 #endif
15165 return;
15166
15167 case '@':
15168 if (ASSEMBLER_DIALECT == ASM_ATT)
15169 putc ('%', file);
15170
15171 /* The kernel uses a different segment register for performance
15172 reasons; a system call would not have to trash the userspace
15173 segment register, which would be expensive. */
15174 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15175 fputs ("fs", file);
15176 else
15177 fputs ("gs", file);
15178 return;
15179
15180 case '~':
15181 putc (TARGET_AVX2 ? 'i' : 'f', file);
15182 return;
15183
15184 case '^':
15185 if (TARGET_64BIT && Pmode != word_mode)
15186 fputs ("addr32 ", file);
15187 return;
15188
15189 default:
15190 output_operand_lossage ("invalid operand code '%c'", code);
15191 }
15192 }
15193
15194 if (REG_P (x))
15195 print_reg (x, code, file);
15196
15197 else if (MEM_P (x))
15198 {
15199 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15200 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15201 && GET_MODE (x) != BLKmode)
15202 {
15203 const char * size;
15204 switch (GET_MODE_SIZE (GET_MODE (x)))
15205 {
15206 case 1: size = "BYTE"; break;
15207 case 2: size = "WORD"; break;
15208 case 4: size = "DWORD"; break;
15209 case 8: size = "QWORD"; break;
15210 case 12: size = "TBYTE"; break;
15211 case 16:
15212 if (GET_MODE (x) == XFmode)
15213 size = "TBYTE";
15214 else
15215 size = "XMMWORD";
15216 break;
15217 case 32: size = "YMMWORD"; break;
15218 case 64: size = "ZMMWORD"; break;
15219 default:
15220 gcc_unreachable ();
15221 }
15222
15223 /* Check for explicit size override (codes 'b', 'w', 'k',
15224 'q' and 'x') */
15225 if (code == 'b')
15226 size = "BYTE";
15227 else if (code == 'w')
15228 size = "WORD";
15229 else if (code == 'k')
15230 size = "DWORD";
15231 else if (code == 'q')
15232 size = "QWORD";
15233 else if (code == 'x')
15234 size = "XMMWORD";
15235
15236 fputs (size, file);
15237 fputs (" PTR ", file);
15238 }
15239
15240 x = XEXP (x, 0);
15241 /* Avoid (%rip) for call operands. */
15242 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15243 && !CONST_INT_P (x))
15244 output_addr_const (file, x);
15245 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15246 output_operand_lossage ("invalid constraints for operand");
15247 else
15248 output_address (x);
15249 }
15250
15251 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
15252 {
15253 REAL_VALUE_TYPE r;
15254 long l;
15255
15256 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15257 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15258
15259 if (ASSEMBLER_DIALECT == ASM_ATT)
15260 putc ('$', file);
15261 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15262 if (code == 'q')
15263 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15264 (unsigned long long) (int) l);
15265 else
15266 fprintf (file, "0x%08x", (unsigned int) l);
15267 }
15268
15269 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
15270 {
15271 REAL_VALUE_TYPE r;
15272 long l[2];
15273
15274 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15275 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15276
15277 if (ASSEMBLER_DIALECT == ASM_ATT)
15278 putc ('$', file);
15279 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
15280 }
15281
15282 /* These float cases don't actually occur as immediate operands. */
15283 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
15284 {
15285 char dstr[30];
15286
15287 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
15288 fputs (dstr, file);
15289 }
15290
15291 else
15292 {
15293 /* We have patterns that allow zero sets of memory, for instance.
15294 In 64-bit mode, we should probably support all 8-byte vectors,
15295 since we can in fact encode that into an immediate. */
15296 if (GET_CODE (x) == CONST_VECTOR)
15297 {
15298 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
15299 x = const0_rtx;
15300 }
15301
15302 if (code != 'P' && code != 'p')
15303 {
15304 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
15305 {
15306 if (ASSEMBLER_DIALECT == ASM_ATT)
15307 putc ('$', file);
15308 }
15309 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
15310 || GET_CODE (x) == LABEL_REF)
15311 {
15312 if (ASSEMBLER_DIALECT == ASM_ATT)
15313 putc ('$', file);
15314 else
15315 fputs ("OFFSET FLAT:", file);
15316 }
15317 }
15318 if (CONST_INT_P (x))
15319 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15320 else if (flag_pic || MACHOPIC_INDIRECT)
15321 output_pic_addr_const (file, x, code);
15322 else
15323 output_addr_const (file, x);
15324 }
15325 }
15326
15327 static bool
15328 ix86_print_operand_punct_valid_p (unsigned char code)
15329 {
15330 return (code == '@' || code == '*' || code == '+' || code == '&'
15331 || code == ';' || code == '~' || code == '^');
15332 }
15333 \f
15334 /* Print a memory operand whose address is ADDR. */
15335
15336 static void
15337 ix86_print_operand_address (FILE *file, rtx addr)
15338 {
15339 struct ix86_address parts;
15340 rtx base, index, disp;
15341 int scale;
15342 int ok;
15343 bool vsib = false;
15344 int code = 0;
15345
15346 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
15347 {
15348 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15349 gcc_assert (parts.index == NULL_RTX);
15350 parts.index = XVECEXP (addr, 0, 1);
15351 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
15352 addr = XVECEXP (addr, 0, 0);
15353 vsib = true;
15354 }
15355 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
15356 {
15357 gcc_assert (TARGET_64BIT);
15358 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15359 code = 'q';
15360 }
15361 else
15362 ok = ix86_decompose_address (addr, &parts);
15363
15364 gcc_assert (ok);
15365
15366 base = parts.base;
15367 index = parts.index;
15368 disp = parts.disp;
15369 scale = parts.scale;
15370
15371 switch (parts.seg)
15372 {
15373 case SEG_DEFAULT:
15374 break;
15375 case SEG_FS:
15376 case SEG_GS:
15377 if (ASSEMBLER_DIALECT == ASM_ATT)
15378 putc ('%', file);
15379 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
15380 break;
15381 default:
15382 gcc_unreachable ();
15383 }
15384
15385 /* Use one byte shorter RIP relative addressing for 64bit mode. */
15386 if (TARGET_64BIT && !base && !index)
15387 {
15388 rtx symbol = disp;
15389
15390 if (GET_CODE (disp) == CONST
15391 && GET_CODE (XEXP (disp, 0)) == PLUS
15392 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15393 symbol = XEXP (XEXP (disp, 0), 0);
15394
15395 if (GET_CODE (symbol) == LABEL_REF
15396 || (GET_CODE (symbol) == SYMBOL_REF
15397 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
15398 base = pc_rtx;
15399 }
15400 if (!base && !index)
15401 {
15402 /* Displacement only requires special attention. */
15403
15404 if (CONST_INT_P (disp))
15405 {
15406 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
15407 fputs ("ds:", file);
15408 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
15409 }
15410 else if (flag_pic)
15411 output_pic_addr_const (file, disp, 0);
15412 else
15413 output_addr_const (file, disp);
15414 }
15415 else
15416 {
15417 /* Print SImode register names to force addr32 prefix. */
15418 if (SImode_address_operand (addr, VOIDmode))
15419 {
15420 #ifdef ENABLE_CHECKING
15421 gcc_assert (TARGET_64BIT);
15422 switch (GET_CODE (addr))
15423 {
15424 case SUBREG:
15425 gcc_assert (GET_MODE (addr) == SImode);
15426 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
15427 break;
15428 case ZERO_EXTEND:
15429 case AND:
15430 gcc_assert (GET_MODE (addr) == DImode);
15431 break;
15432 default:
15433 gcc_unreachable ();
15434 }
15435 #endif
15436 gcc_assert (!code);
15437 code = 'k';
15438 }
15439 else if (code == 0
15440 && TARGET_X32
15441 && disp
15442 && CONST_INT_P (disp)
15443 && INTVAL (disp) < -16*1024*1024)
15444 {
15445 /* X32 runs in 64-bit mode, where displacement, DISP, in
15446 address DISP(%r64), is encoded as 32-bit immediate sign-
15447 extended from 32-bit to 64-bit. For -0x40000300(%r64),
15448 address is %r64 + 0xffffffffbffffd00. When %r64 <
15449 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
15450 which is invalid for x32. The correct address is %r64
15451 - 0x40000300 == 0xf7ffdd64. To properly encode
15452 -0x40000300(%r64) for x32, we zero-extend negative
15453 displacement by forcing addr32 prefix which truncates
15454 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
15455 zero-extend all negative displacements, including -1(%rsp).
15456 However, for small negative displacements, sign-extension
15457 won't cause overflow. We only zero-extend negative
15458 displacements if they < -16*1024*1024, which is also used
15459 to check legitimate address displacements for PIC. */
15460 code = 'k';
15461 }
15462
15463 if (ASSEMBLER_DIALECT == ASM_ATT)
15464 {
15465 if (disp)
15466 {
15467 if (flag_pic)
15468 output_pic_addr_const (file, disp, 0);
15469 else if (GET_CODE (disp) == LABEL_REF)
15470 output_asm_label (disp);
15471 else
15472 output_addr_const (file, disp);
15473 }
15474
15475 putc ('(', file);
15476 if (base)
15477 print_reg (base, code, file);
15478 if (index)
15479 {
15480 putc (',', file);
15481 print_reg (index, vsib ? 0 : code, file);
15482 if (scale != 1 || vsib)
15483 fprintf (file, ",%d", scale);
15484 }
15485 putc (')', file);
15486 }
15487 else
15488 {
15489 rtx offset = NULL_RTX;
15490
15491 if (disp)
15492 {
15493 /* Pull out the offset of a symbol; print any symbol itself. */
15494 if (GET_CODE (disp) == CONST
15495 && GET_CODE (XEXP (disp, 0)) == PLUS
15496 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15497 {
15498 offset = XEXP (XEXP (disp, 0), 1);
15499 disp = gen_rtx_CONST (VOIDmode,
15500 XEXP (XEXP (disp, 0), 0));
15501 }
15502
15503 if (flag_pic)
15504 output_pic_addr_const (file, disp, 0);
15505 else if (GET_CODE (disp) == LABEL_REF)
15506 output_asm_label (disp);
15507 else if (CONST_INT_P (disp))
15508 offset = disp;
15509 else
15510 output_addr_const (file, disp);
15511 }
15512
15513 putc ('[', file);
15514 if (base)
15515 {
15516 print_reg (base, code, file);
15517 if (offset)
15518 {
15519 if (INTVAL (offset) >= 0)
15520 putc ('+', file);
15521 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15522 }
15523 }
15524 else if (offset)
15525 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15526 else
15527 putc ('0', file);
15528
15529 if (index)
15530 {
15531 putc ('+', file);
15532 print_reg (index, vsib ? 0 : code, file);
15533 if (scale != 1 || vsib)
15534 fprintf (file, "*%d", scale);
15535 }
15536 putc (']', file);
15537 }
15538 }
15539 }
15540
15541 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15542
15543 static bool
15544 i386_asm_output_addr_const_extra (FILE *file, rtx x)
15545 {
15546 rtx op;
15547
15548 if (GET_CODE (x) != UNSPEC)
15549 return false;
15550
15551 op = XVECEXP (x, 0, 0);
15552 switch (XINT (x, 1))
15553 {
15554 case UNSPEC_GOTTPOFF:
15555 output_addr_const (file, op);
15556 /* FIXME: This might be @TPOFF in Sun ld. */
15557 fputs ("@gottpoff", file);
15558 break;
15559 case UNSPEC_TPOFF:
15560 output_addr_const (file, op);
15561 fputs ("@tpoff", file);
15562 break;
15563 case UNSPEC_NTPOFF:
15564 output_addr_const (file, op);
15565 if (TARGET_64BIT)
15566 fputs ("@tpoff", file);
15567 else
15568 fputs ("@ntpoff", file);
15569 break;
15570 case UNSPEC_DTPOFF:
15571 output_addr_const (file, op);
15572 fputs ("@dtpoff", file);
15573 break;
15574 case UNSPEC_GOTNTPOFF:
15575 output_addr_const (file, op);
15576 if (TARGET_64BIT)
15577 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15578 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
15579 else
15580 fputs ("@gotntpoff", file);
15581 break;
15582 case UNSPEC_INDNTPOFF:
15583 output_addr_const (file, op);
15584 fputs ("@indntpoff", file);
15585 break;
15586 #if TARGET_MACHO
15587 case UNSPEC_MACHOPIC_OFFSET:
15588 output_addr_const (file, op);
15589 putc ('-', file);
15590 machopic_output_function_base_name (file);
15591 break;
15592 #endif
15593
15594 case UNSPEC_STACK_CHECK:
15595 {
15596 int offset;
15597
15598 gcc_assert (flag_split_stack);
15599
15600 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15601 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
15602 #else
15603 gcc_unreachable ();
15604 #endif
15605
15606 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
15607 }
15608 break;
15609
15610 default:
15611 return false;
15612 }
15613
15614 return true;
15615 }
15616 \f
15617 /* Split one or more double-mode RTL references into pairs of half-mode
15618 references. The RTL can be REG, offsettable MEM, integer constant, or
15619 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
15620 split and "num" is its length. lo_half and hi_half are output arrays
15621 that parallel "operands". */
15622
15623 void
15624 split_double_mode (enum machine_mode mode, rtx operands[],
15625 int num, rtx lo_half[], rtx hi_half[])
15626 {
15627 enum machine_mode half_mode;
15628 unsigned int byte;
15629
15630 switch (mode)
15631 {
15632 case TImode:
15633 half_mode = DImode;
15634 break;
15635 case DImode:
15636 half_mode = SImode;
15637 break;
15638 default:
15639 gcc_unreachable ();
15640 }
15641
15642 byte = GET_MODE_SIZE (half_mode);
15643
15644 while (num--)
15645 {
15646 rtx op = operands[num];
15647
15648 /* simplify_subreg refuse to split volatile memory addresses,
15649 but we still have to handle it. */
15650 if (MEM_P (op))
15651 {
15652 lo_half[num] = adjust_address (op, half_mode, 0);
15653 hi_half[num] = adjust_address (op, half_mode, byte);
15654 }
15655 else
15656 {
15657 lo_half[num] = simplify_gen_subreg (half_mode, op,
15658 GET_MODE (op) == VOIDmode
15659 ? mode : GET_MODE (op), 0);
15660 hi_half[num] = simplify_gen_subreg (half_mode, op,
15661 GET_MODE (op) == VOIDmode
15662 ? mode : GET_MODE (op), byte);
15663 }
15664 }
15665 }
15666 \f
15667 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15668 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15669 is the expression of the binary operation. The output may either be
15670 emitted here, or returned to the caller, like all output_* functions.
15671
15672 There is no guarantee that the operands are the same mode, as they
15673 might be within FLOAT or FLOAT_EXTEND expressions. */
15674
15675 #ifndef SYSV386_COMPAT
15676 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15677 wants to fix the assemblers because that causes incompatibility
15678 with gcc. No-one wants to fix gcc because that causes
15679 incompatibility with assemblers... You can use the option of
15680 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15681 #define SYSV386_COMPAT 1
15682 #endif
15683
15684 const char *
15685 output_387_binary_op (rtx insn, rtx *operands)
15686 {
15687 static char buf[40];
15688 const char *p;
15689 const char *ssep;
15690 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
15691
15692 #ifdef ENABLE_CHECKING
15693 /* Even if we do not want to check the inputs, this documents input
15694 constraints. Which helps in understanding the following code. */
15695 if (STACK_REG_P (operands[0])
15696 && ((REG_P (operands[1])
15697 && REGNO (operands[0]) == REGNO (operands[1])
15698 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
15699 || (REG_P (operands[2])
15700 && REGNO (operands[0]) == REGNO (operands[2])
15701 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
15702 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
15703 ; /* ok */
15704 else
15705 gcc_assert (is_sse);
15706 #endif
15707
15708 switch (GET_CODE (operands[3]))
15709 {
15710 case PLUS:
15711 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15712 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15713 p = "fiadd";
15714 else
15715 p = "fadd";
15716 ssep = "vadd";
15717 break;
15718
15719 case MINUS:
15720 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15721 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15722 p = "fisub";
15723 else
15724 p = "fsub";
15725 ssep = "vsub";
15726 break;
15727
15728 case MULT:
15729 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15730 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15731 p = "fimul";
15732 else
15733 p = "fmul";
15734 ssep = "vmul";
15735 break;
15736
15737 case DIV:
15738 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15739 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15740 p = "fidiv";
15741 else
15742 p = "fdiv";
15743 ssep = "vdiv";
15744 break;
15745
15746 default:
15747 gcc_unreachable ();
15748 }
15749
15750 if (is_sse)
15751 {
15752 if (TARGET_AVX)
15753 {
15754 strcpy (buf, ssep);
15755 if (GET_MODE (operands[0]) == SFmode)
15756 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
15757 else
15758 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
15759 }
15760 else
15761 {
15762 strcpy (buf, ssep + 1);
15763 if (GET_MODE (operands[0]) == SFmode)
15764 strcat (buf, "ss\t{%2, %0|%0, %2}");
15765 else
15766 strcat (buf, "sd\t{%2, %0|%0, %2}");
15767 }
15768 return buf;
15769 }
15770 strcpy (buf, p);
15771
15772 switch (GET_CODE (operands[3]))
15773 {
15774 case MULT:
15775 case PLUS:
15776 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
15777 {
15778 rtx temp = operands[2];
15779 operands[2] = operands[1];
15780 operands[1] = temp;
15781 }
15782
15783 /* know operands[0] == operands[1]. */
15784
15785 if (MEM_P (operands[2]))
15786 {
15787 p = "%Z2\t%2";
15788 break;
15789 }
15790
15791 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15792 {
15793 if (STACK_TOP_P (operands[0]))
15794 /* How is it that we are storing to a dead operand[2]?
15795 Well, presumably operands[1] is dead too. We can't
15796 store the result to st(0) as st(0) gets popped on this
15797 instruction. Instead store to operands[2] (which I
15798 think has to be st(1)). st(1) will be popped later.
15799 gcc <= 2.8.1 didn't have this check and generated
15800 assembly code that the Unixware assembler rejected. */
15801 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15802 else
15803 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15804 break;
15805 }
15806
15807 if (STACK_TOP_P (operands[0]))
15808 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15809 else
15810 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15811 break;
15812
15813 case MINUS:
15814 case DIV:
15815 if (MEM_P (operands[1]))
15816 {
15817 p = "r%Z1\t%1";
15818 break;
15819 }
15820
15821 if (MEM_P (operands[2]))
15822 {
15823 p = "%Z2\t%2";
15824 break;
15825 }
15826
15827 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15828 {
15829 #if SYSV386_COMPAT
15830 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15831 derived assemblers, confusingly reverse the direction of
15832 the operation for fsub{r} and fdiv{r} when the
15833 destination register is not st(0). The Intel assembler
15834 doesn't have this brain damage. Read !SYSV386_COMPAT to
15835 figure out what the hardware really does. */
15836 if (STACK_TOP_P (operands[0]))
15837 p = "{p\t%0, %2|rp\t%2, %0}";
15838 else
15839 p = "{rp\t%2, %0|p\t%0, %2}";
15840 #else
15841 if (STACK_TOP_P (operands[0]))
15842 /* As above for fmul/fadd, we can't store to st(0). */
15843 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15844 else
15845 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15846 #endif
15847 break;
15848 }
15849
15850 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
15851 {
15852 #if SYSV386_COMPAT
15853 if (STACK_TOP_P (operands[0]))
15854 p = "{rp\t%0, %1|p\t%1, %0}";
15855 else
15856 p = "{p\t%1, %0|rp\t%0, %1}";
15857 #else
15858 if (STACK_TOP_P (operands[0]))
15859 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15860 else
15861 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15862 #endif
15863 break;
15864 }
15865
15866 if (STACK_TOP_P (operands[0]))
15867 {
15868 if (STACK_TOP_P (operands[1]))
15869 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15870 else
15871 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15872 break;
15873 }
15874 else if (STACK_TOP_P (operands[1]))
15875 {
15876 #if SYSV386_COMPAT
15877 p = "{\t%1, %0|r\t%0, %1}";
15878 #else
15879 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15880 #endif
15881 }
15882 else
15883 {
15884 #if SYSV386_COMPAT
15885 p = "{r\t%2, %0|\t%0, %2}";
15886 #else
15887 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15888 #endif
15889 }
15890 break;
15891
15892 default:
15893 gcc_unreachable ();
15894 }
15895
15896 strcat (buf, p);
15897 return buf;
15898 }
15899
15900 /* Check if a 256bit AVX register is referenced inside of EXP. */
15901
15902 static int
15903 ix86_check_avx256_register (rtx *pexp, void *data ATTRIBUTE_UNUSED)
15904 {
15905 rtx exp = *pexp;
15906
15907 if (GET_CODE (exp) == SUBREG)
15908 exp = SUBREG_REG (exp);
15909
15910 if (REG_P (exp)
15911 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)))
15912 return 1;
15913
15914 return 0;
15915 }
15916
15917 /* Return needed mode for entity in optimize_mode_switching pass. */
15918
15919 static int
15920 ix86_avx_u128_mode_needed (rtx insn)
15921 {
15922 if (CALL_P (insn))
15923 {
15924 rtx link;
15925
15926 /* Needed mode is set to AVX_U128_CLEAN if there are
15927 no 256bit modes used in function arguments. */
15928 for (link = CALL_INSN_FUNCTION_USAGE (insn);
15929 link;
15930 link = XEXP (link, 1))
15931 {
15932 if (GET_CODE (XEXP (link, 0)) == USE)
15933 {
15934 rtx arg = XEXP (XEXP (link, 0), 0);
15935
15936 if (ix86_check_avx256_register (&arg, NULL))
15937 return AVX_U128_DIRTY;
15938 }
15939 }
15940
15941 return AVX_U128_CLEAN;
15942 }
15943
15944 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
15945 changes state only when a 256bit register is written to, but we need
15946 to prevent the compiler from moving optimal insertion point above
15947 eventual read from 256bit register. */
15948 if (for_each_rtx (&PATTERN (insn), ix86_check_avx256_register, NULL))
15949 return AVX_U128_DIRTY;
15950
15951 return AVX_U128_ANY;
15952 }
15953
15954 /* Return mode that i387 must be switched into
15955 prior to the execution of insn. */
15956
15957 static int
15958 ix86_i387_mode_needed (int entity, rtx insn)
15959 {
15960 enum attr_i387_cw mode;
15961
15962 /* The mode UNINITIALIZED is used to store control word after a
15963 function call or ASM pattern. The mode ANY specify that function
15964 has no requirements on the control word and make no changes in the
15965 bits we are interested in. */
15966
15967 if (CALL_P (insn)
15968 || (NONJUMP_INSN_P (insn)
15969 && (asm_noperands (PATTERN (insn)) >= 0
15970 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
15971 return I387_CW_UNINITIALIZED;
15972
15973 if (recog_memoized (insn) < 0)
15974 return I387_CW_ANY;
15975
15976 mode = get_attr_i387_cw (insn);
15977
15978 switch (entity)
15979 {
15980 case I387_TRUNC:
15981 if (mode == I387_CW_TRUNC)
15982 return mode;
15983 break;
15984
15985 case I387_FLOOR:
15986 if (mode == I387_CW_FLOOR)
15987 return mode;
15988 break;
15989
15990 case I387_CEIL:
15991 if (mode == I387_CW_CEIL)
15992 return mode;
15993 break;
15994
15995 case I387_MASK_PM:
15996 if (mode == I387_CW_MASK_PM)
15997 return mode;
15998 break;
15999
16000 default:
16001 gcc_unreachable ();
16002 }
16003
16004 return I387_CW_ANY;
16005 }
16006
16007 /* Return mode that entity must be switched into
16008 prior to the execution of insn. */
16009
16010 int
16011 ix86_mode_needed (int entity, rtx insn)
16012 {
16013 switch (entity)
16014 {
16015 case AVX_U128:
16016 return ix86_avx_u128_mode_needed (insn);
16017 case I387_TRUNC:
16018 case I387_FLOOR:
16019 case I387_CEIL:
16020 case I387_MASK_PM:
16021 return ix86_i387_mode_needed (entity, insn);
16022 default:
16023 gcc_unreachable ();
16024 }
16025 return 0;
16026 }
16027
16028 /* Check if a 256bit AVX register is referenced in stores. */
16029
16030 static void
16031 ix86_check_avx256_stores (rtx dest, const_rtx set ATTRIBUTE_UNUSED, void *data)
16032 {
16033 if (ix86_check_avx256_register (&dest, NULL))
16034 {
16035 bool *used = (bool *) data;
16036 *used = true;
16037 }
16038 }
16039
16040 /* Calculate mode of upper 128bit AVX registers after the insn. */
16041
16042 static int
16043 ix86_avx_u128_mode_after (int mode, rtx insn)
16044 {
16045 rtx pat = PATTERN (insn);
16046
16047 if (vzeroupper_operation (pat, VOIDmode)
16048 || vzeroall_operation (pat, VOIDmode))
16049 return AVX_U128_CLEAN;
16050
16051 /* We know that state is clean after CALL insn if there are no
16052 256bit registers used in the function return register. */
16053 if (CALL_P (insn))
16054 {
16055 bool avx_reg256_found = false;
16056 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16057
16058 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16059 }
16060
16061 /* Otherwise, return current mode. Remember that if insn
16062 references AVX 256bit registers, the mode was already changed
16063 to DIRTY from MODE_NEEDED. */
16064 return mode;
16065 }
16066
16067 /* Return the mode that an insn results in. */
16068
16069 int
16070 ix86_mode_after (int entity, int mode, rtx insn)
16071 {
16072 switch (entity)
16073 {
16074 case AVX_U128:
16075 return ix86_avx_u128_mode_after (mode, insn);
16076 case I387_TRUNC:
16077 case I387_FLOOR:
16078 case I387_CEIL:
16079 case I387_MASK_PM:
16080 return mode;
16081 default:
16082 gcc_unreachable ();
16083 }
16084 }
16085
16086 static int
16087 ix86_avx_u128_mode_entry (void)
16088 {
16089 tree arg;
16090
16091 /* Entry mode is set to AVX_U128_DIRTY if there are
16092 256bit modes used in function arguments. */
16093 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16094 arg = TREE_CHAIN (arg))
16095 {
16096 rtx incoming = DECL_INCOMING_RTL (arg);
16097
16098 if (incoming && ix86_check_avx256_register (&incoming, NULL))
16099 return AVX_U128_DIRTY;
16100 }
16101
16102 return AVX_U128_CLEAN;
16103 }
16104
16105 /* Return a mode that ENTITY is assumed to be
16106 switched to at function entry. */
16107
16108 int
16109 ix86_mode_entry (int entity)
16110 {
16111 switch (entity)
16112 {
16113 case AVX_U128:
16114 return ix86_avx_u128_mode_entry ();
16115 case I387_TRUNC:
16116 case I387_FLOOR:
16117 case I387_CEIL:
16118 case I387_MASK_PM:
16119 return I387_CW_ANY;
16120 default:
16121 gcc_unreachable ();
16122 }
16123 }
16124
16125 static int
16126 ix86_avx_u128_mode_exit (void)
16127 {
16128 rtx reg = crtl->return_rtx;
16129
16130 /* Exit mode is set to AVX_U128_DIRTY if there are
16131 256bit modes used in the function return register. */
16132 if (reg && ix86_check_avx256_register (&reg, NULL))
16133 return AVX_U128_DIRTY;
16134
16135 return AVX_U128_CLEAN;
16136 }
16137
16138 /* Return a mode that ENTITY is assumed to be
16139 switched to at function exit. */
16140
16141 int
16142 ix86_mode_exit (int entity)
16143 {
16144 switch (entity)
16145 {
16146 case AVX_U128:
16147 return ix86_avx_u128_mode_exit ();
16148 case I387_TRUNC:
16149 case I387_FLOOR:
16150 case I387_CEIL:
16151 case I387_MASK_PM:
16152 return I387_CW_ANY;
16153 default:
16154 gcc_unreachable ();
16155 }
16156 }
16157
16158 /* Output code to initialize control word copies used by trunc?f?i and
16159 rounding patterns. CURRENT_MODE is set to current control word,
16160 while NEW_MODE is set to new control word. */
16161
16162 static void
16163 emit_i387_cw_initialization (int mode)
16164 {
16165 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16166 rtx new_mode;
16167
16168 enum ix86_stack_slot slot;
16169
16170 rtx reg = gen_reg_rtx (HImode);
16171
16172 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16173 emit_move_insn (reg, copy_rtx (stored_mode));
16174
16175 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16176 || optimize_insn_for_size_p ())
16177 {
16178 switch (mode)
16179 {
16180 case I387_CW_TRUNC:
16181 /* round toward zero (truncate) */
16182 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16183 slot = SLOT_CW_TRUNC;
16184 break;
16185
16186 case I387_CW_FLOOR:
16187 /* round down toward -oo */
16188 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16189 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16190 slot = SLOT_CW_FLOOR;
16191 break;
16192
16193 case I387_CW_CEIL:
16194 /* round up toward +oo */
16195 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16196 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16197 slot = SLOT_CW_CEIL;
16198 break;
16199
16200 case I387_CW_MASK_PM:
16201 /* mask precision exception for nearbyint() */
16202 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16203 slot = SLOT_CW_MASK_PM;
16204 break;
16205
16206 default:
16207 gcc_unreachable ();
16208 }
16209 }
16210 else
16211 {
16212 switch (mode)
16213 {
16214 case I387_CW_TRUNC:
16215 /* round toward zero (truncate) */
16216 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16217 slot = SLOT_CW_TRUNC;
16218 break;
16219
16220 case I387_CW_FLOOR:
16221 /* round down toward -oo */
16222 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16223 slot = SLOT_CW_FLOOR;
16224 break;
16225
16226 case I387_CW_CEIL:
16227 /* round up toward +oo */
16228 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16229 slot = SLOT_CW_CEIL;
16230 break;
16231
16232 case I387_CW_MASK_PM:
16233 /* mask precision exception for nearbyint() */
16234 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16235 slot = SLOT_CW_MASK_PM;
16236 break;
16237
16238 default:
16239 gcc_unreachable ();
16240 }
16241 }
16242
16243 gcc_assert (slot < MAX_386_STACK_LOCALS);
16244
16245 new_mode = assign_386_stack_local (HImode, slot);
16246 emit_move_insn (new_mode, reg);
16247 }
16248
16249 /* Emit vzeroupper. */
16250
16251 void
16252 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16253 {
16254 int i;
16255
16256 /* Cancel automatic vzeroupper insertion if there are
16257 live call-saved SSE registers at the insertion point. */
16258
16259 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16260 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16261 return;
16262
16263 if (TARGET_64BIT)
16264 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16265 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16266 return;
16267
16268 emit_insn (gen_avx_vzeroupper ());
16269 }
16270
16271 /* Generate one or more insns to set ENTITY to MODE. */
16272
16273 void
16274 ix86_emit_mode_set (int entity, int mode, HARD_REG_SET regs_live)
16275 {
16276 switch (entity)
16277 {
16278 case AVX_U128:
16279 if (mode == AVX_U128_CLEAN)
16280 ix86_avx_emit_vzeroupper (regs_live);
16281 break;
16282 case I387_TRUNC:
16283 case I387_FLOOR:
16284 case I387_CEIL:
16285 case I387_MASK_PM:
16286 if (mode != I387_CW_ANY
16287 && mode != I387_CW_UNINITIALIZED)
16288 emit_i387_cw_initialization (mode);
16289 break;
16290 default:
16291 gcc_unreachable ();
16292 }
16293 }
16294
16295 /* Output code for INSN to convert a float to a signed int. OPERANDS
16296 are the insn operands. The output may be [HSD]Imode and the input
16297 operand may be [SDX]Fmode. */
16298
16299 const char *
16300 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
16301 {
16302 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16303 int dimode_p = GET_MODE (operands[0]) == DImode;
16304 int round_mode = get_attr_i387_cw (insn);
16305
16306 /* Jump through a hoop or two for DImode, since the hardware has no
16307 non-popping instruction. We used to do this a different way, but
16308 that was somewhat fragile and broke with post-reload splitters. */
16309 if ((dimode_p || fisttp) && !stack_top_dies)
16310 output_asm_insn ("fld\t%y1", operands);
16311
16312 gcc_assert (STACK_TOP_P (operands[1]));
16313 gcc_assert (MEM_P (operands[0]));
16314 gcc_assert (GET_MODE (operands[1]) != TFmode);
16315
16316 if (fisttp)
16317 output_asm_insn ("fisttp%Z0\t%0", operands);
16318 else
16319 {
16320 if (round_mode != I387_CW_ANY)
16321 output_asm_insn ("fldcw\t%3", operands);
16322 if (stack_top_dies || dimode_p)
16323 output_asm_insn ("fistp%Z0\t%0", operands);
16324 else
16325 output_asm_insn ("fist%Z0\t%0", operands);
16326 if (round_mode != I387_CW_ANY)
16327 output_asm_insn ("fldcw\t%2", operands);
16328 }
16329
16330 return "";
16331 }
16332
16333 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16334 have the values zero or one, indicates the ffreep insn's operand
16335 from the OPERANDS array. */
16336
16337 static const char *
16338 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
16339 {
16340 if (TARGET_USE_FFREEP)
16341 #ifdef HAVE_AS_IX86_FFREEP
16342 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
16343 #else
16344 {
16345 static char retval[32];
16346 int regno = REGNO (operands[opno]);
16347
16348 gcc_assert (STACK_REGNO_P (regno));
16349
16350 regno -= FIRST_STACK_REG;
16351
16352 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
16353 return retval;
16354 }
16355 #endif
16356
16357 return opno ? "fstp\t%y1" : "fstp\t%y0";
16358 }
16359
16360
16361 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16362 should be used. UNORDERED_P is true when fucom should be used. */
16363
16364 const char *
16365 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
16366 {
16367 int stack_top_dies;
16368 rtx cmp_op0, cmp_op1;
16369 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
16370
16371 if (eflags_p)
16372 {
16373 cmp_op0 = operands[0];
16374 cmp_op1 = operands[1];
16375 }
16376 else
16377 {
16378 cmp_op0 = operands[1];
16379 cmp_op1 = operands[2];
16380 }
16381
16382 if (is_sse)
16383 {
16384 if (GET_MODE (operands[0]) == SFmode)
16385 if (unordered_p)
16386 return "%vucomiss\t{%1, %0|%0, %1}";
16387 else
16388 return "%vcomiss\t{%1, %0|%0, %1}";
16389 else
16390 if (unordered_p)
16391 return "%vucomisd\t{%1, %0|%0, %1}";
16392 else
16393 return "%vcomisd\t{%1, %0|%0, %1}";
16394 }
16395
16396 gcc_assert (STACK_TOP_P (cmp_op0));
16397
16398 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16399
16400 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
16401 {
16402 if (stack_top_dies)
16403 {
16404 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
16405 return output_387_ffreep (operands, 1);
16406 }
16407 else
16408 return "ftst\n\tfnstsw\t%0";
16409 }
16410
16411 if (STACK_REG_P (cmp_op1)
16412 && stack_top_dies
16413 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
16414 && REGNO (cmp_op1) != FIRST_STACK_REG)
16415 {
16416 /* If both the top of the 387 stack dies, and the other operand
16417 is also a stack register that dies, then this must be a
16418 `fcompp' float compare */
16419
16420 if (eflags_p)
16421 {
16422 /* There is no double popping fcomi variant. Fortunately,
16423 eflags is immune from the fstp's cc clobbering. */
16424 if (unordered_p)
16425 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
16426 else
16427 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
16428 return output_387_ffreep (operands, 0);
16429 }
16430 else
16431 {
16432 if (unordered_p)
16433 return "fucompp\n\tfnstsw\t%0";
16434 else
16435 return "fcompp\n\tfnstsw\t%0";
16436 }
16437 }
16438 else
16439 {
16440 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
16441
16442 static const char * const alt[16] =
16443 {
16444 "fcom%Z2\t%y2\n\tfnstsw\t%0",
16445 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
16446 "fucom%Z2\t%y2\n\tfnstsw\t%0",
16447 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
16448
16449 "ficom%Z2\t%y2\n\tfnstsw\t%0",
16450 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
16451 NULL,
16452 NULL,
16453
16454 "fcomi\t{%y1, %0|%0, %y1}",
16455 "fcomip\t{%y1, %0|%0, %y1}",
16456 "fucomi\t{%y1, %0|%0, %y1}",
16457 "fucomip\t{%y1, %0|%0, %y1}",
16458
16459 NULL,
16460 NULL,
16461 NULL,
16462 NULL
16463 };
16464
16465 int mask;
16466 const char *ret;
16467
16468 mask = eflags_p << 3;
16469 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
16470 mask |= unordered_p << 1;
16471 mask |= stack_top_dies;
16472
16473 gcc_assert (mask < 16);
16474 ret = alt[mask];
16475 gcc_assert (ret);
16476
16477 return ret;
16478 }
16479 }
16480
16481 void
16482 ix86_output_addr_vec_elt (FILE *file, int value)
16483 {
16484 const char *directive = ASM_LONG;
16485
16486 #ifdef ASM_QUAD
16487 if (TARGET_LP64)
16488 directive = ASM_QUAD;
16489 #else
16490 gcc_assert (!TARGET_64BIT);
16491 #endif
16492
16493 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
16494 }
16495
16496 void
16497 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
16498 {
16499 const char *directive = ASM_LONG;
16500
16501 #ifdef ASM_QUAD
16502 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
16503 directive = ASM_QUAD;
16504 #else
16505 gcc_assert (!TARGET_64BIT);
16506 #endif
16507 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
16508 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
16509 fprintf (file, "%s%s%d-%s%d\n",
16510 directive, LPREFIX, value, LPREFIX, rel);
16511 else if (HAVE_AS_GOTOFF_IN_DATA)
16512 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
16513 #if TARGET_MACHO
16514 else if (TARGET_MACHO)
16515 {
16516 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
16517 machopic_output_function_base_name (file);
16518 putc ('\n', file);
16519 }
16520 #endif
16521 else
16522 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
16523 GOT_SYMBOL_NAME, LPREFIX, value);
16524 }
16525 \f
16526 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
16527 for the target. */
16528
16529 void
16530 ix86_expand_clear (rtx dest)
16531 {
16532 rtx tmp;
16533
16534 /* We play register width games, which are only valid after reload. */
16535 gcc_assert (reload_completed);
16536
16537 /* Avoid HImode and its attendant prefix byte. */
16538 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
16539 dest = gen_rtx_REG (SImode, REGNO (dest));
16540 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
16541
16542 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
16543 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
16544 {
16545 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16546 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
16547 }
16548
16549 emit_insn (tmp);
16550 }
16551
16552 /* X is an unchanging MEM. If it is a constant pool reference, return
16553 the constant pool rtx, else NULL. */
16554
16555 rtx
16556 maybe_get_pool_constant (rtx x)
16557 {
16558 x = ix86_delegitimize_address (XEXP (x, 0));
16559
16560 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
16561 return get_pool_constant (x);
16562
16563 return NULL_RTX;
16564 }
16565
16566 void
16567 ix86_expand_move (enum machine_mode mode, rtx operands[])
16568 {
16569 rtx op0, op1;
16570 enum tls_model model;
16571
16572 op0 = operands[0];
16573 op1 = operands[1];
16574
16575 if (GET_CODE (op1) == SYMBOL_REF)
16576 {
16577 rtx tmp;
16578
16579 model = SYMBOL_REF_TLS_MODEL (op1);
16580 if (model)
16581 {
16582 op1 = legitimize_tls_address (op1, model, true);
16583 op1 = force_operand (op1, op0);
16584 if (op1 == op0)
16585 return;
16586 op1 = convert_to_mode (mode, op1, 1);
16587 }
16588 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
16589 op1 = tmp;
16590 }
16591 else if (GET_CODE (op1) == CONST
16592 && GET_CODE (XEXP (op1, 0)) == PLUS
16593 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
16594 {
16595 rtx addend = XEXP (XEXP (op1, 0), 1);
16596 rtx symbol = XEXP (XEXP (op1, 0), 0);
16597 rtx tmp;
16598
16599 model = SYMBOL_REF_TLS_MODEL (symbol);
16600 if (model)
16601 tmp = legitimize_tls_address (symbol, model, true);
16602 else
16603 tmp = legitimize_pe_coff_symbol (symbol, true);
16604
16605 if (tmp)
16606 {
16607 tmp = force_operand (tmp, NULL);
16608 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
16609 op0, 1, OPTAB_DIRECT);
16610 if (tmp == op0)
16611 return;
16612 op1 = convert_to_mode (mode, tmp, 1);
16613 }
16614 }
16615
16616 if ((flag_pic || MACHOPIC_INDIRECT)
16617 && symbolic_operand (op1, mode))
16618 {
16619 if (TARGET_MACHO && !TARGET_64BIT)
16620 {
16621 #if TARGET_MACHO
16622 /* dynamic-no-pic */
16623 if (MACHOPIC_INDIRECT)
16624 {
16625 rtx temp = ((reload_in_progress
16626 || ((op0 && REG_P (op0))
16627 && mode == Pmode))
16628 ? op0 : gen_reg_rtx (Pmode));
16629 op1 = machopic_indirect_data_reference (op1, temp);
16630 if (MACHOPIC_PURE)
16631 op1 = machopic_legitimize_pic_address (op1, mode,
16632 temp == op1 ? 0 : temp);
16633 }
16634 if (op0 != op1 && GET_CODE (op0) != MEM)
16635 {
16636 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
16637 emit_insn (insn);
16638 return;
16639 }
16640 if (GET_CODE (op0) == MEM)
16641 op1 = force_reg (Pmode, op1);
16642 else
16643 {
16644 rtx temp = op0;
16645 if (GET_CODE (temp) != REG)
16646 temp = gen_reg_rtx (Pmode);
16647 temp = legitimize_pic_address (op1, temp);
16648 if (temp == op0)
16649 return;
16650 op1 = temp;
16651 }
16652 /* dynamic-no-pic */
16653 #endif
16654 }
16655 else
16656 {
16657 if (MEM_P (op0))
16658 op1 = force_reg (mode, op1);
16659 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
16660 {
16661 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
16662 op1 = legitimize_pic_address (op1, reg);
16663 if (op0 == op1)
16664 return;
16665 op1 = convert_to_mode (mode, op1, 1);
16666 }
16667 }
16668 }
16669 else
16670 {
16671 if (MEM_P (op0)
16672 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
16673 || !push_operand (op0, mode))
16674 && MEM_P (op1))
16675 op1 = force_reg (mode, op1);
16676
16677 if (push_operand (op0, mode)
16678 && ! general_no_elim_operand (op1, mode))
16679 op1 = copy_to_mode_reg (mode, op1);
16680
16681 /* Force large constants in 64bit compilation into register
16682 to get them CSEed. */
16683 if (can_create_pseudo_p ()
16684 && (mode == DImode) && TARGET_64BIT
16685 && immediate_operand (op1, mode)
16686 && !x86_64_zext_immediate_operand (op1, VOIDmode)
16687 && !register_operand (op0, mode)
16688 && optimize)
16689 op1 = copy_to_mode_reg (mode, op1);
16690
16691 if (can_create_pseudo_p ()
16692 && FLOAT_MODE_P (mode)
16693 && GET_CODE (op1) == CONST_DOUBLE)
16694 {
16695 /* If we are loading a floating point constant to a register,
16696 force the value to memory now, since we'll get better code
16697 out the back end. */
16698
16699 op1 = validize_mem (force_const_mem (mode, op1));
16700 if (!register_operand (op0, mode))
16701 {
16702 rtx temp = gen_reg_rtx (mode);
16703 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
16704 emit_move_insn (op0, temp);
16705 return;
16706 }
16707 }
16708 }
16709
16710 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16711 }
16712
16713 void
16714 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
16715 {
16716 rtx op0 = operands[0], op1 = operands[1];
16717 unsigned int align = GET_MODE_ALIGNMENT (mode);
16718
16719 /* Force constants other than zero into memory. We do not know how
16720 the instructions used to build constants modify the upper 64 bits
16721 of the register, once we have that information we may be able
16722 to handle some of them more efficiently. */
16723 if (can_create_pseudo_p ()
16724 && register_operand (op0, mode)
16725 && (CONSTANT_P (op1)
16726 || (GET_CODE (op1) == SUBREG
16727 && CONSTANT_P (SUBREG_REG (op1))))
16728 && !standard_sse_constant_p (op1))
16729 op1 = validize_mem (force_const_mem (mode, op1));
16730
16731 /* We need to check memory alignment for SSE mode since attribute
16732 can make operands unaligned. */
16733 if (can_create_pseudo_p ()
16734 && SSE_REG_MODE_P (mode)
16735 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
16736 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
16737 {
16738 rtx tmp[2];
16739
16740 /* ix86_expand_vector_move_misalign() does not like constants ... */
16741 if (CONSTANT_P (op1)
16742 || (GET_CODE (op1) == SUBREG
16743 && CONSTANT_P (SUBREG_REG (op1))))
16744 op1 = validize_mem (force_const_mem (mode, op1));
16745
16746 /* ... nor both arguments in memory. */
16747 if (!register_operand (op0, mode)
16748 && !register_operand (op1, mode))
16749 op1 = force_reg (mode, op1);
16750
16751 tmp[0] = op0; tmp[1] = op1;
16752 ix86_expand_vector_move_misalign (mode, tmp);
16753 return;
16754 }
16755
16756 /* Make operand1 a register if it isn't already. */
16757 if (can_create_pseudo_p ()
16758 && !register_operand (op0, mode)
16759 && !register_operand (op1, mode))
16760 {
16761 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
16762 return;
16763 }
16764
16765 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16766 }
16767
16768 /* Split 32-byte AVX unaligned load and store if needed. */
16769
16770 static void
16771 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
16772 {
16773 rtx m;
16774 rtx (*extract) (rtx, rtx, rtx);
16775 rtx (*load_unaligned) (rtx, rtx);
16776 rtx (*store_unaligned) (rtx, rtx);
16777 enum machine_mode mode;
16778
16779 switch (GET_MODE (op0))
16780 {
16781 default:
16782 gcc_unreachable ();
16783 case V32QImode:
16784 extract = gen_avx_vextractf128v32qi;
16785 load_unaligned = gen_avx_loaddquv32qi;
16786 store_unaligned = gen_avx_storedquv32qi;
16787 mode = V16QImode;
16788 break;
16789 case V8SFmode:
16790 extract = gen_avx_vextractf128v8sf;
16791 load_unaligned = gen_avx_loadups256;
16792 store_unaligned = gen_avx_storeups256;
16793 mode = V4SFmode;
16794 break;
16795 case V4DFmode:
16796 extract = gen_avx_vextractf128v4df;
16797 load_unaligned = gen_avx_loadupd256;
16798 store_unaligned = gen_avx_storeupd256;
16799 mode = V2DFmode;
16800 break;
16801 }
16802
16803 if (MEM_P (op1))
16804 {
16805 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
16806 {
16807 rtx r = gen_reg_rtx (mode);
16808 m = adjust_address (op1, mode, 0);
16809 emit_move_insn (r, m);
16810 m = adjust_address (op1, mode, 16);
16811 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
16812 emit_move_insn (op0, r);
16813 }
16814 /* Normal *mov<mode>_internal pattern will handle
16815 unaligned loads just fine if misaligned_operand
16816 is true, and without the UNSPEC it can be combined
16817 with arithmetic instructions. */
16818 else if (misaligned_operand (op1, GET_MODE (op1)))
16819 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16820 else
16821 emit_insn (load_unaligned (op0, op1));
16822 }
16823 else if (MEM_P (op0))
16824 {
16825 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
16826 {
16827 m = adjust_address (op0, mode, 0);
16828 emit_insn (extract (m, op1, const0_rtx));
16829 m = adjust_address (op0, mode, 16);
16830 emit_insn (extract (m, op1, const1_rtx));
16831 }
16832 else
16833 emit_insn (store_unaligned (op0, op1));
16834 }
16835 else
16836 gcc_unreachable ();
16837 }
16838
16839 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
16840 straight to ix86_expand_vector_move. */
16841 /* Code generation for scalar reg-reg moves of single and double precision data:
16842 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
16843 movaps reg, reg
16844 else
16845 movss reg, reg
16846 if (x86_sse_partial_reg_dependency == true)
16847 movapd reg, reg
16848 else
16849 movsd reg, reg
16850
16851 Code generation for scalar loads of double precision data:
16852 if (x86_sse_split_regs == true)
16853 movlpd mem, reg (gas syntax)
16854 else
16855 movsd mem, reg
16856
16857 Code generation for unaligned packed loads of single precision data
16858 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
16859 if (x86_sse_unaligned_move_optimal)
16860 movups mem, reg
16861
16862 if (x86_sse_partial_reg_dependency == true)
16863 {
16864 xorps reg, reg
16865 movlps mem, reg
16866 movhps mem+8, reg
16867 }
16868 else
16869 {
16870 movlps mem, reg
16871 movhps mem+8, reg
16872 }
16873
16874 Code generation for unaligned packed loads of double precision data
16875 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
16876 if (x86_sse_unaligned_move_optimal)
16877 movupd mem, reg
16878
16879 if (x86_sse_split_regs == true)
16880 {
16881 movlpd mem, reg
16882 movhpd mem+8, reg
16883 }
16884 else
16885 {
16886 movsd mem, reg
16887 movhpd mem+8, reg
16888 }
16889 */
16890
16891 void
16892 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
16893 {
16894 rtx op0, op1, orig_op0 = NULL_RTX, m;
16895 rtx (*load_unaligned) (rtx, rtx);
16896 rtx (*store_unaligned) (rtx, rtx);
16897
16898 op0 = operands[0];
16899 op1 = operands[1];
16900
16901 if (GET_MODE_SIZE (mode) == 64)
16902 {
16903 switch (GET_MODE_CLASS (mode))
16904 {
16905 case MODE_VECTOR_INT:
16906 case MODE_INT:
16907 if (GET_MODE (op0) != V16SImode)
16908 {
16909 if (!MEM_P (op0))
16910 {
16911 orig_op0 = op0;
16912 op0 = gen_reg_rtx (V16SImode);
16913 }
16914 else
16915 op0 = gen_lowpart (V16SImode, op0);
16916 }
16917 op1 = gen_lowpart (V16SImode, op1);
16918 /* FALLTHRU */
16919
16920 case MODE_VECTOR_FLOAT:
16921 switch (GET_MODE (op0))
16922 {
16923 default:
16924 gcc_unreachable ();
16925 case V16SImode:
16926 load_unaligned = gen_avx512f_loaddquv16si;
16927 store_unaligned = gen_avx512f_storedquv16si;
16928 break;
16929 case V16SFmode:
16930 load_unaligned = gen_avx512f_loadups512;
16931 store_unaligned = gen_avx512f_storeups512;
16932 break;
16933 case V8DFmode:
16934 load_unaligned = gen_avx512f_loadupd512;
16935 store_unaligned = gen_avx512f_storeupd512;
16936 break;
16937 }
16938
16939 if (MEM_P (op1))
16940 emit_insn (load_unaligned (op0, op1));
16941 else if (MEM_P (op0))
16942 emit_insn (store_unaligned (op0, op1));
16943 else
16944 gcc_unreachable ();
16945 if (orig_op0)
16946 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
16947 break;
16948
16949 default:
16950 gcc_unreachable ();
16951 }
16952
16953 return;
16954 }
16955
16956 if (TARGET_AVX
16957 && GET_MODE_SIZE (mode) == 32)
16958 {
16959 switch (GET_MODE_CLASS (mode))
16960 {
16961 case MODE_VECTOR_INT:
16962 case MODE_INT:
16963 if (GET_MODE (op0) != V32QImode)
16964 {
16965 if (!MEM_P (op0))
16966 {
16967 orig_op0 = op0;
16968 op0 = gen_reg_rtx (V32QImode);
16969 }
16970 else
16971 op0 = gen_lowpart (V32QImode, op0);
16972 }
16973 op1 = gen_lowpart (V32QImode, op1);
16974 /* FALLTHRU */
16975
16976 case MODE_VECTOR_FLOAT:
16977 ix86_avx256_split_vector_move_misalign (op0, op1);
16978 if (orig_op0)
16979 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
16980 break;
16981
16982 default:
16983 gcc_unreachable ();
16984 }
16985
16986 return;
16987 }
16988
16989 if (MEM_P (op1))
16990 {
16991 /* Normal *mov<mode>_internal pattern will handle
16992 unaligned loads just fine if misaligned_operand
16993 is true, and without the UNSPEC it can be combined
16994 with arithmetic instructions. */
16995 if (TARGET_AVX
16996 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
16997 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
16998 && misaligned_operand (op1, GET_MODE (op1)))
16999 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17000 /* ??? If we have typed data, then it would appear that using
17001 movdqu is the only way to get unaligned data loaded with
17002 integer type. */
17003 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17004 {
17005 if (GET_MODE (op0) != V16QImode)
17006 {
17007 orig_op0 = op0;
17008 op0 = gen_reg_rtx (V16QImode);
17009 }
17010 op1 = gen_lowpart (V16QImode, op1);
17011 /* We will eventually emit movups based on insn attributes. */
17012 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17013 if (orig_op0)
17014 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17015 }
17016 else if (TARGET_SSE2 && mode == V2DFmode)
17017 {
17018 rtx zero;
17019
17020 if (TARGET_AVX
17021 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17022 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17023 || optimize_insn_for_size_p ())
17024 {
17025 /* We will eventually emit movups based on insn attributes. */
17026 emit_insn (gen_sse2_loadupd (op0, op1));
17027 return;
17028 }
17029
17030 /* When SSE registers are split into halves, we can avoid
17031 writing to the top half twice. */
17032 if (TARGET_SSE_SPLIT_REGS)
17033 {
17034 emit_clobber (op0);
17035 zero = op0;
17036 }
17037 else
17038 {
17039 /* ??? Not sure about the best option for the Intel chips.
17040 The following would seem to satisfy; the register is
17041 entirely cleared, breaking the dependency chain. We
17042 then store to the upper half, with a dependency depth
17043 of one. A rumor has it that Intel recommends two movsd
17044 followed by an unpacklpd, but this is unconfirmed. And
17045 given that the dependency depth of the unpacklpd would
17046 still be one, I'm not sure why this would be better. */
17047 zero = CONST0_RTX (V2DFmode);
17048 }
17049
17050 m = adjust_address (op1, DFmode, 0);
17051 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17052 m = adjust_address (op1, DFmode, 8);
17053 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17054 }
17055 else
17056 {
17057 rtx t;
17058
17059 if (TARGET_AVX
17060 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17061 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17062 || optimize_insn_for_size_p ())
17063 {
17064 if (GET_MODE (op0) != V4SFmode)
17065 {
17066 orig_op0 = op0;
17067 op0 = gen_reg_rtx (V4SFmode);
17068 }
17069 op1 = gen_lowpart (V4SFmode, op1);
17070 emit_insn (gen_sse_loadups (op0, op1));
17071 if (orig_op0)
17072 emit_move_insn (orig_op0,
17073 gen_lowpart (GET_MODE (orig_op0), op0));
17074 return;
17075 }
17076
17077 if (mode != V4SFmode)
17078 t = gen_reg_rtx (V4SFmode);
17079 else
17080 t = op0;
17081
17082 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17083 emit_move_insn (t, CONST0_RTX (V4SFmode));
17084 else
17085 emit_clobber (t);
17086
17087 m = adjust_address (op1, V2SFmode, 0);
17088 emit_insn (gen_sse_loadlps (t, t, m));
17089 m = adjust_address (op1, V2SFmode, 8);
17090 emit_insn (gen_sse_loadhps (t, t, m));
17091 if (mode != V4SFmode)
17092 emit_move_insn (op0, gen_lowpart (mode, t));
17093 }
17094 }
17095 else if (MEM_P (op0))
17096 {
17097 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17098 {
17099 op0 = gen_lowpart (V16QImode, op0);
17100 op1 = gen_lowpart (V16QImode, op1);
17101 /* We will eventually emit movups based on insn attributes. */
17102 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17103 }
17104 else if (TARGET_SSE2 && mode == V2DFmode)
17105 {
17106 if (TARGET_AVX
17107 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17108 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17109 || optimize_insn_for_size_p ())
17110 /* We will eventually emit movups based on insn attributes. */
17111 emit_insn (gen_sse2_storeupd (op0, op1));
17112 else
17113 {
17114 m = adjust_address (op0, DFmode, 0);
17115 emit_insn (gen_sse2_storelpd (m, op1));
17116 m = adjust_address (op0, DFmode, 8);
17117 emit_insn (gen_sse2_storehpd (m, op1));
17118 }
17119 }
17120 else
17121 {
17122 if (mode != V4SFmode)
17123 op1 = gen_lowpart (V4SFmode, op1);
17124
17125 if (TARGET_AVX
17126 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17127 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17128 || optimize_insn_for_size_p ())
17129 {
17130 op0 = gen_lowpart (V4SFmode, op0);
17131 emit_insn (gen_sse_storeups (op0, op1));
17132 }
17133 else
17134 {
17135 m = adjust_address (op0, V2SFmode, 0);
17136 emit_insn (gen_sse_storelps (m, op1));
17137 m = adjust_address (op0, V2SFmode, 8);
17138 emit_insn (gen_sse_storehps (m, op1));
17139 }
17140 }
17141 }
17142 else
17143 gcc_unreachable ();
17144 }
17145
17146 /* Expand a push in MODE. This is some mode for which we do not support
17147 proper push instructions, at least from the registers that we expect
17148 the value to live in. */
17149
17150 void
17151 ix86_expand_push (enum machine_mode mode, rtx x)
17152 {
17153 rtx tmp;
17154
17155 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
17156 GEN_INT (-GET_MODE_SIZE (mode)),
17157 stack_pointer_rtx, 1, OPTAB_DIRECT);
17158 if (tmp != stack_pointer_rtx)
17159 emit_move_insn (stack_pointer_rtx, tmp);
17160
17161 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
17162
17163 /* When we push an operand onto stack, it has to be aligned at least
17164 at the function argument boundary. However since we don't have
17165 the argument type, we can't determine the actual argument
17166 boundary. */
17167 emit_move_insn (tmp, x);
17168 }
17169
17170 /* Helper function of ix86_fixup_binary_operands to canonicalize
17171 operand order. Returns true if the operands should be swapped. */
17172
17173 static bool
17174 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
17175 rtx operands[])
17176 {
17177 rtx dst = operands[0];
17178 rtx src1 = operands[1];
17179 rtx src2 = operands[2];
17180
17181 /* If the operation is not commutative, we can't do anything. */
17182 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17183 return false;
17184
17185 /* Highest priority is that src1 should match dst. */
17186 if (rtx_equal_p (dst, src1))
17187 return false;
17188 if (rtx_equal_p (dst, src2))
17189 return true;
17190
17191 /* Next highest priority is that immediate constants come second. */
17192 if (immediate_operand (src2, mode))
17193 return false;
17194 if (immediate_operand (src1, mode))
17195 return true;
17196
17197 /* Lowest priority is that memory references should come second. */
17198 if (MEM_P (src2))
17199 return false;
17200 if (MEM_P (src1))
17201 return true;
17202
17203 return false;
17204 }
17205
17206
17207 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17208 destination to use for the operation. If different from the true
17209 destination in operands[0], a copy operation will be required. */
17210
17211 rtx
17212 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
17213 rtx operands[])
17214 {
17215 rtx dst = operands[0];
17216 rtx src1 = operands[1];
17217 rtx src2 = operands[2];
17218
17219 /* Canonicalize operand order. */
17220 if (ix86_swap_binary_operands_p (code, mode, operands))
17221 {
17222 rtx temp;
17223
17224 /* It is invalid to swap operands of different modes. */
17225 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17226
17227 temp = src1;
17228 src1 = src2;
17229 src2 = temp;
17230 }
17231
17232 /* Both source operands cannot be in memory. */
17233 if (MEM_P (src1) && MEM_P (src2))
17234 {
17235 /* Optimization: Only read from memory once. */
17236 if (rtx_equal_p (src1, src2))
17237 {
17238 src2 = force_reg (mode, src2);
17239 src1 = src2;
17240 }
17241 else if (rtx_equal_p (dst, src1))
17242 src2 = force_reg (mode, src2);
17243 else
17244 src1 = force_reg (mode, src1);
17245 }
17246
17247 /* If the destination is memory, and we do not have matching source
17248 operands, do things in registers. */
17249 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17250 dst = gen_reg_rtx (mode);
17251
17252 /* Source 1 cannot be a constant. */
17253 if (CONSTANT_P (src1))
17254 src1 = force_reg (mode, src1);
17255
17256 /* Source 1 cannot be a non-matching memory. */
17257 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17258 src1 = force_reg (mode, src1);
17259
17260 /* Improve address combine. */
17261 if (code == PLUS
17262 && GET_MODE_CLASS (mode) == MODE_INT
17263 && MEM_P (src2))
17264 src2 = force_reg (mode, src2);
17265
17266 operands[1] = src1;
17267 operands[2] = src2;
17268 return dst;
17269 }
17270
17271 /* Similarly, but assume that the destination has already been
17272 set up properly. */
17273
17274 void
17275 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17276 enum machine_mode mode, rtx operands[])
17277 {
17278 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
17279 gcc_assert (dst == operands[0]);
17280 }
17281
17282 /* Attempt to expand a binary operator. Make the expansion closer to the
17283 actual machine, then just general_operand, which will allow 3 separate
17284 memory references (one output, two input) in a single insn. */
17285
17286 void
17287 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
17288 rtx operands[])
17289 {
17290 rtx src1, src2, dst, op, clob;
17291
17292 dst = ix86_fixup_binary_operands (code, mode, operands);
17293 src1 = operands[1];
17294 src2 = operands[2];
17295
17296 /* Emit the instruction. */
17297
17298 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
17299 if (reload_in_progress)
17300 {
17301 /* Reload doesn't know about the flags register, and doesn't know that
17302 it doesn't want to clobber it. We can only do this with PLUS. */
17303 gcc_assert (code == PLUS);
17304 emit_insn (op);
17305 }
17306 else if (reload_completed
17307 && code == PLUS
17308 && !rtx_equal_p (dst, src1))
17309 {
17310 /* This is going to be an LEA; avoid splitting it later. */
17311 emit_insn (op);
17312 }
17313 else
17314 {
17315 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17316 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17317 }
17318
17319 /* Fix up the destination if needed. */
17320 if (dst != operands[0])
17321 emit_move_insn (operands[0], dst);
17322 }
17323
17324 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17325 the given OPERANDS. */
17326
17327 void
17328 ix86_expand_vector_logical_operator (enum rtx_code code, enum machine_mode mode,
17329 rtx operands[])
17330 {
17331 rtx op1 = NULL_RTX, op2 = NULL_RTX;
17332 if (GET_CODE (operands[1]) == SUBREG)
17333 {
17334 op1 = operands[1];
17335 op2 = operands[2];
17336 }
17337 else if (GET_CODE (operands[2]) == SUBREG)
17338 {
17339 op1 = operands[2];
17340 op2 = operands[1];
17341 }
17342 /* Optimize (__m128i) d | (__m128i) e and similar code
17343 when d and e are float vectors into float vector logical
17344 insn. In C/C++ without using intrinsics there is no other way
17345 to express vector logical operation on float vectors than
17346 to cast them temporarily to integer vectors. */
17347 if (op1
17348 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17349 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
17350 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
17351 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
17352 && SUBREG_BYTE (op1) == 0
17353 && (GET_CODE (op2) == CONST_VECTOR
17354 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
17355 && SUBREG_BYTE (op2) == 0))
17356 && can_create_pseudo_p ())
17357 {
17358 rtx dst;
17359 switch (GET_MODE (SUBREG_REG (op1)))
17360 {
17361 case V4SFmode:
17362 case V8SFmode:
17363 case V2DFmode:
17364 case V4DFmode:
17365 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
17366 if (GET_CODE (op2) == CONST_VECTOR)
17367 {
17368 op2 = gen_lowpart (GET_MODE (dst), op2);
17369 op2 = force_reg (GET_MODE (dst), op2);
17370 }
17371 else
17372 {
17373 op1 = operands[1];
17374 op2 = SUBREG_REG (operands[2]);
17375 if (!nonimmediate_operand (op2, GET_MODE (dst)))
17376 op2 = force_reg (GET_MODE (dst), op2);
17377 }
17378 op1 = SUBREG_REG (op1);
17379 if (!nonimmediate_operand (op1, GET_MODE (dst)))
17380 op1 = force_reg (GET_MODE (dst), op1);
17381 emit_insn (gen_rtx_SET (VOIDmode, dst,
17382 gen_rtx_fmt_ee (code, GET_MODE (dst),
17383 op1, op2)));
17384 emit_move_insn (operands[0], gen_lowpart (mode, dst));
17385 return;
17386 default:
17387 break;
17388 }
17389 }
17390 if (!nonimmediate_operand (operands[1], mode))
17391 operands[1] = force_reg (mode, operands[1]);
17392 if (!nonimmediate_operand (operands[2], mode))
17393 operands[2] = force_reg (mode, operands[2]);
17394 ix86_fixup_binary_operands_no_copy (code, mode, operands);
17395 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17396 gen_rtx_fmt_ee (code, mode, operands[1],
17397 operands[2])));
17398 }
17399
17400 /* Return TRUE or FALSE depending on whether the binary operator meets the
17401 appropriate constraints. */
17402
17403 bool
17404 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
17405 rtx operands[3])
17406 {
17407 rtx dst = operands[0];
17408 rtx src1 = operands[1];
17409 rtx src2 = operands[2];
17410
17411 /* Both source operands cannot be in memory. */
17412 if (MEM_P (src1) && MEM_P (src2))
17413 return false;
17414
17415 /* Canonicalize operand order for commutative operators. */
17416 if (ix86_swap_binary_operands_p (code, mode, operands))
17417 {
17418 rtx temp = src1;
17419 src1 = src2;
17420 src2 = temp;
17421 }
17422
17423 /* If the destination is memory, we must have a matching source operand. */
17424 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17425 return false;
17426
17427 /* Source 1 cannot be a constant. */
17428 if (CONSTANT_P (src1))
17429 return false;
17430
17431 /* Source 1 cannot be a non-matching memory. */
17432 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17433 /* Support "andhi/andsi/anddi" as a zero-extending move. */
17434 return (code == AND
17435 && (mode == HImode
17436 || mode == SImode
17437 || (TARGET_64BIT && mode == DImode))
17438 && satisfies_constraint_L (src2));
17439
17440 return true;
17441 }
17442
17443 /* Attempt to expand a unary operator. Make the expansion closer to the
17444 actual machine, then just general_operand, which will allow 2 separate
17445 memory references (one output, one input) in a single insn. */
17446
17447 void
17448 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
17449 rtx operands[])
17450 {
17451 int matching_memory;
17452 rtx src, dst, op, clob;
17453
17454 dst = operands[0];
17455 src = operands[1];
17456
17457 /* If the destination is memory, and we do not have matching source
17458 operands, do things in registers. */
17459 matching_memory = 0;
17460 if (MEM_P (dst))
17461 {
17462 if (rtx_equal_p (dst, src))
17463 matching_memory = 1;
17464 else
17465 dst = gen_reg_rtx (mode);
17466 }
17467
17468 /* When source operand is memory, destination must match. */
17469 if (MEM_P (src) && !matching_memory)
17470 src = force_reg (mode, src);
17471
17472 /* Emit the instruction. */
17473
17474 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
17475 if (reload_in_progress || code == NOT)
17476 {
17477 /* Reload doesn't know about the flags register, and doesn't know that
17478 it doesn't want to clobber it. */
17479 gcc_assert (code == NOT);
17480 emit_insn (op);
17481 }
17482 else
17483 {
17484 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17485 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17486 }
17487
17488 /* Fix up the destination if needed. */
17489 if (dst != operands[0])
17490 emit_move_insn (operands[0], dst);
17491 }
17492
17493 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
17494 divisor are within the range [0-255]. */
17495
17496 void
17497 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
17498 bool signed_p)
17499 {
17500 rtx end_label, qimode_label;
17501 rtx insn, div, mod;
17502 rtx scratch, tmp0, tmp1, tmp2;
17503 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
17504 rtx (*gen_zero_extend) (rtx, rtx);
17505 rtx (*gen_test_ccno_1) (rtx, rtx);
17506
17507 switch (mode)
17508 {
17509 case SImode:
17510 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
17511 gen_test_ccno_1 = gen_testsi_ccno_1;
17512 gen_zero_extend = gen_zero_extendqisi2;
17513 break;
17514 case DImode:
17515 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
17516 gen_test_ccno_1 = gen_testdi_ccno_1;
17517 gen_zero_extend = gen_zero_extendqidi2;
17518 break;
17519 default:
17520 gcc_unreachable ();
17521 }
17522
17523 end_label = gen_label_rtx ();
17524 qimode_label = gen_label_rtx ();
17525
17526 scratch = gen_reg_rtx (mode);
17527
17528 /* Use 8bit unsigned divimod if dividend and divisor are within
17529 the range [0-255]. */
17530 emit_move_insn (scratch, operands[2]);
17531 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
17532 scratch, 1, OPTAB_DIRECT);
17533 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
17534 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
17535 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
17536 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
17537 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
17538 pc_rtx);
17539 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
17540 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17541 JUMP_LABEL (insn) = qimode_label;
17542
17543 /* Generate original signed/unsigned divimod. */
17544 div = gen_divmod4_1 (operands[0], operands[1],
17545 operands[2], operands[3]);
17546 emit_insn (div);
17547
17548 /* Branch to the end. */
17549 emit_jump_insn (gen_jump (end_label));
17550 emit_barrier ();
17551
17552 /* Generate 8bit unsigned divide. */
17553 emit_label (qimode_label);
17554 /* Don't use operands[0] for result of 8bit divide since not all
17555 registers support QImode ZERO_EXTRACT. */
17556 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
17557 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
17558 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
17559 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
17560
17561 if (signed_p)
17562 {
17563 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
17564 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
17565 }
17566 else
17567 {
17568 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
17569 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
17570 }
17571
17572 /* Extract remainder from AH. */
17573 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
17574 if (REG_P (operands[1]))
17575 insn = emit_move_insn (operands[1], tmp1);
17576 else
17577 {
17578 /* Need a new scratch register since the old one has result
17579 of 8bit divide. */
17580 scratch = gen_reg_rtx (mode);
17581 emit_move_insn (scratch, tmp1);
17582 insn = emit_move_insn (operands[1], scratch);
17583 }
17584 set_unique_reg_note (insn, REG_EQUAL, mod);
17585
17586 /* Zero extend quotient from AL. */
17587 tmp1 = gen_lowpart (QImode, tmp0);
17588 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
17589 set_unique_reg_note (insn, REG_EQUAL, div);
17590
17591 emit_label (end_label);
17592 }
17593
17594 /* Whether it is OK to emit CFI directives when emitting asm code. */
17595
17596 bool
17597 ix86_emit_cfi ()
17598 {
17599 return dwarf2out_do_cfi_asm ();
17600 }
17601
17602 #define LEA_MAX_STALL (3)
17603 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
17604
17605 /* Increase given DISTANCE in half-cycles according to
17606 dependencies between PREV and NEXT instructions.
17607 Add 1 half-cycle if there is no dependency and
17608 go to next cycle if there is some dependecy. */
17609
17610 static unsigned int
17611 increase_distance (rtx prev, rtx next, unsigned int distance)
17612 {
17613 df_ref *use_rec;
17614 df_ref *def_rec;
17615
17616 if (!prev || !next)
17617 return distance + (distance & 1) + 2;
17618
17619 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
17620 return distance + 1;
17621
17622 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
17623 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
17624 if (!DF_REF_IS_ARTIFICIAL (*def_rec)
17625 && DF_REF_REGNO (*use_rec) == DF_REF_REGNO (*def_rec))
17626 return distance + (distance & 1) + 2;
17627
17628 return distance + 1;
17629 }
17630
17631 /* Function checks if instruction INSN defines register number
17632 REGNO1 or REGNO2. */
17633
17634 static bool
17635 insn_defines_reg (unsigned int regno1, unsigned int regno2,
17636 rtx insn)
17637 {
17638 df_ref *def_rec;
17639
17640 for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
17641 if (DF_REF_REG_DEF_P (*def_rec)
17642 && !DF_REF_IS_ARTIFICIAL (*def_rec)
17643 && (regno1 == DF_REF_REGNO (*def_rec)
17644 || regno2 == DF_REF_REGNO (*def_rec)))
17645 {
17646 return true;
17647 }
17648
17649 return false;
17650 }
17651
17652 /* Function checks if instruction INSN uses register number
17653 REGNO as a part of address expression. */
17654
17655 static bool
17656 insn_uses_reg_mem (unsigned int regno, rtx insn)
17657 {
17658 df_ref *use_rec;
17659
17660 for (use_rec = DF_INSN_USES (insn); *use_rec; use_rec++)
17661 if (DF_REF_REG_MEM_P (*use_rec) && regno == DF_REF_REGNO (*use_rec))
17662 return true;
17663
17664 return false;
17665 }
17666
17667 /* Search backward for non-agu definition of register number REGNO1
17668 or register number REGNO2 in basic block starting from instruction
17669 START up to head of basic block or instruction INSN.
17670
17671 Function puts true value into *FOUND var if definition was found
17672 and false otherwise.
17673
17674 Distance in half-cycles between START and found instruction or head
17675 of BB is added to DISTANCE and returned. */
17676
17677 static int
17678 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
17679 rtx insn, int distance,
17680 rtx start, bool *found)
17681 {
17682 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
17683 rtx prev = start;
17684 rtx next = NULL;
17685
17686 *found = false;
17687
17688 while (prev
17689 && prev != insn
17690 && distance < LEA_SEARCH_THRESHOLD)
17691 {
17692 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
17693 {
17694 distance = increase_distance (prev, next, distance);
17695 if (insn_defines_reg (regno1, regno2, prev))
17696 {
17697 if (recog_memoized (prev) < 0
17698 || get_attr_type (prev) != TYPE_LEA)
17699 {
17700 *found = true;
17701 return distance;
17702 }
17703 }
17704
17705 next = prev;
17706 }
17707 if (prev == BB_HEAD (bb))
17708 break;
17709
17710 prev = PREV_INSN (prev);
17711 }
17712
17713 return distance;
17714 }
17715
17716 /* Search backward for non-agu definition of register number REGNO1
17717 or register number REGNO2 in INSN's basic block until
17718 1. Pass LEA_SEARCH_THRESHOLD instructions, or
17719 2. Reach neighbour BBs boundary, or
17720 3. Reach agu definition.
17721 Returns the distance between the non-agu definition point and INSN.
17722 If no definition point, returns -1. */
17723
17724 static int
17725 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
17726 rtx insn)
17727 {
17728 basic_block bb = BLOCK_FOR_INSN (insn);
17729 int distance = 0;
17730 bool found = false;
17731
17732 if (insn != BB_HEAD (bb))
17733 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
17734 distance, PREV_INSN (insn),
17735 &found);
17736
17737 if (!found && distance < LEA_SEARCH_THRESHOLD)
17738 {
17739 edge e;
17740 edge_iterator ei;
17741 bool simple_loop = false;
17742
17743 FOR_EACH_EDGE (e, ei, bb->preds)
17744 if (e->src == bb)
17745 {
17746 simple_loop = true;
17747 break;
17748 }
17749
17750 if (simple_loop)
17751 distance = distance_non_agu_define_in_bb (regno1, regno2,
17752 insn, distance,
17753 BB_END (bb), &found);
17754 else
17755 {
17756 int shortest_dist = -1;
17757 bool found_in_bb = false;
17758
17759 FOR_EACH_EDGE (e, ei, bb->preds)
17760 {
17761 int bb_dist
17762 = distance_non_agu_define_in_bb (regno1, regno2,
17763 insn, distance,
17764 BB_END (e->src),
17765 &found_in_bb);
17766 if (found_in_bb)
17767 {
17768 if (shortest_dist < 0)
17769 shortest_dist = bb_dist;
17770 else if (bb_dist > 0)
17771 shortest_dist = MIN (bb_dist, shortest_dist);
17772
17773 found = true;
17774 }
17775 }
17776
17777 distance = shortest_dist;
17778 }
17779 }
17780
17781 /* get_attr_type may modify recog data. We want to make sure
17782 that recog data is valid for instruction INSN, on which
17783 distance_non_agu_define is called. INSN is unchanged here. */
17784 extract_insn_cached (insn);
17785
17786 if (!found)
17787 return -1;
17788
17789 return distance >> 1;
17790 }
17791
17792 /* Return the distance in half-cycles between INSN and the next
17793 insn that uses register number REGNO in memory address added
17794 to DISTANCE. Return -1 if REGNO0 is set.
17795
17796 Put true value into *FOUND if register usage was found and
17797 false otherwise.
17798 Put true value into *REDEFINED if register redefinition was
17799 found and false otherwise. */
17800
17801 static int
17802 distance_agu_use_in_bb (unsigned int regno,
17803 rtx insn, int distance, rtx start,
17804 bool *found, bool *redefined)
17805 {
17806 basic_block bb = NULL;
17807 rtx next = start;
17808 rtx prev = NULL;
17809
17810 *found = false;
17811 *redefined = false;
17812
17813 if (start != NULL_RTX)
17814 {
17815 bb = BLOCK_FOR_INSN (start);
17816 if (start != BB_HEAD (bb))
17817 /* If insn and start belong to the same bb, set prev to insn,
17818 so the call to increase_distance will increase the distance
17819 between insns by 1. */
17820 prev = insn;
17821 }
17822
17823 while (next
17824 && next != insn
17825 && distance < LEA_SEARCH_THRESHOLD)
17826 {
17827 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
17828 {
17829 distance = increase_distance(prev, next, distance);
17830 if (insn_uses_reg_mem (regno, next))
17831 {
17832 /* Return DISTANCE if OP0 is used in memory
17833 address in NEXT. */
17834 *found = true;
17835 return distance;
17836 }
17837
17838 if (insn_defines_reg (regno, INVALID_REGNUM, next))
17839 {
17840 /* Return -1 if OP0 is set in NEXT. */
17841 *redefined = true;
17842 return -1;
17843 }
17844
17845 prev = next;
17846 }
17847
17848 if (next == BB_END (bb))
17849 break;
17850
17851 next = NEXT_INSN (next);
17852 }
17853
17854 return distance;
17855 }
17856
17857 /* Return the distance between INSN and the next insn that uses
17858 register number REGNO0 in memory address. Return -1 if no such
17859 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
17860
17861 static int
17862 distance_agu_use (unsigned int regno0, rtx insn)
17863 {
17864 basic_block bb = BLOCK_FOR_INSN (insn);
17865 int distance = 0;
17866 bool found = false;
17867 bool redefined = false;
17868
17869 if (insn != BB_END (bb))
17870 distance = distance_agu_use_in_bb (regno0, insn, distance,
17871 NEXT_INSN (insn),
17872 &found, &redefined);
17873
17874 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
17875 {
17876 edge e;
17877 edge_iterator ei;
17878 bool simple_loop = false;
17879
17880 FOR_EACH_EDGE (e, ei, bb->succs)
17881 if (e->dest == bb)
17882 {
17883 simple_loop = true;
17884 break;
17885 }
17886
17887 if (simple_loop)
17888 distance = distance_agu_use_in_bb (regno0, insn,
17889 distance, BB_HEAD (bb),
17890 &found, &redefined);
17891 else
17892 {
17893 int shortest_dist = -1;
17894 bool found_in_bb = false;
17895 bool redefined_in_bb = false;
17896
17897 FOR_EACH_EDGE (e, ei, bb->succs)
17898 {
17899 int bb_dist
17900 = distance_agu_use_in_bb (regno0, insn,
17901 distance, BB_HEAD (e->dest),
17902 &found_in_bb, &redefined_in_bb);
17903 if (found_in_bb)
17904 {
17905 if (shortest_dist < 0)
17906 shortest_dist = bb_dist;
17907 else if (bb_dist > 0)
17908 shortest_dist = MIN (bb_dist, shortest_dist);
17909
17910 found = true;
17911 }
17912 }
17913
17914 distance = shortest_dist;
17915 }
17916 }
17917
17918 if (!found || redefined)
17919 return -1;
17920
17921 return distance >> 1;
17922 }
17923
17924 /* Define this macro to tune LEA priority vs ADD, it take effect when
17925 there is a dilemma of choicing LEA or ADD
17926 Negative value: ADD is more preferred than LEA
17927 Zero: Netrual
17928 Positive value: LEA is more preferred than ADD*/
17929 #define IX86_LEA_PRIORITY 0
17930
17931 /* Return true if usage of lea INSN has performance advantage
17932 over a sequence of instructions. Instructions sequence has
17933 SPLIT_COST cycles higher latency than lea latency. */
17934
17935 static bool
17936 ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
17937 unsigned int regno2, int split_cost, bool has_scale)
17938 {
17939 int dist_define, dist_use;
17940
17941 /* For Silvermont if using a 2-source or 3-source LEA for
17942 non-destructive destination purposes, or due to wanting
17943 ability to use SCALE, the use of LEA is justified. */
17944 if (ix86_tune == PROCESSOR_SILVERMONT)
17945 {
17946 if (has_scale)
17947 return true;
17948 if (split_cost < 1)
17949 return false;
17950 if (regno0 == regno1 || regno0 == regno2)
17951 return false;
17952 return true;
17953 }
17954
17955 dist_define = distance_non_agu_define (regno1, regno2, insn);
17956 dist_use = distance_agu_use (regno0, insn);
17957
17958 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
17959 {
17960 /* If there is no non AGU operand definition, no AGU
17961 operand usage and split cost is 0 then both lea
17962 and non lea variants have same priority. Currently
17963 we prefer lea for 64 bit code and non lea on 32 bit
17964 code. */
17965 if (dist_use < 0 && split_cost == 0)
17966 return TARGET_64BIT || IX86_LEA_PRIORITY;
17967 else
17968 return true;
17969 }
17970
17971 /* With longer definitions distance lea is more preferable.
17972 Here we change it to take into account splitting cost and
17973 lea priority. */
17974 dist_define += split_cost + IX86_LEA_PRIORITY;
17975
17976 /* If there is no use in memory addess then we just check
17977 that split cost exceeds AGU stall. */
17978 if (dist_use < 0)
17979 return dist_define > LEA_MAX_STALL;
17980
17981 /* If this insn has both backward non-agu dependence and forward
17982 agu dependence, the one with short distance takes effect. */
17983 return dist_define >= dist_use;
17984 }
17985
17986 /* Return true if it is legal to clobber flags by INSN and
17987 false otherwise. */
17988
17989 static bool
17990 ix86_ok_to_clobber_flags (rtx insn)
17991 {
17992 basic_block bb = BLOCK_FOR_INSN (insn);
17993 df_ref *use;
17994 bitmap live;
17995
17996 while (insn)
17997 {
17998 if (NONDEBUG_INSN_P (insn))
17999 {
18000 for (use = DF_INSN_USES (insn); *use; use++)
18001 if (DF_REF_REG_USE_P (*use) && DF_REF_REGNO (*use) == FLAGS_REG)
18002 return false;
18003
18004 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18005 return true;
18006 }
18007
18008 if (insn == BB_END (bb))
18009 break;
18010
18011 insn = NEXT_INSN (insn);
18012 }
18013
18014 live = df_get_live_out(bb);
18015 return !REGNO_REG_SET_P (live, FLAGS_REG);
18016 }
18017
18018 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18019 move and add to avoid AGU stalls. */
18020
18021 bool
18022 ix86_avoid_lea_for_add (rtx insn, rtx operands[])
18023 {
18024 unsigned int regno0, regno1, regno2;
18025
18026 /* Check if we need to optimize. */
18027 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18028 return false;
18029
18030 /* Check it is correct to split here. */
18031 if (!ix86_ok_to_clobber_flags(insn))
18032 return false;
18033
18034 regno0 = true_regnum (operands[0]);
18035 regno1 = true_regnum (operands[1]);
18036 regno2 = true_regnum (operands[2]);
18037
18038 /* We need to split only adds with non destructive
18039 destination operand. */
18040 if (regno0 == regno1 || regno0 == regno2)
18041 return false;
18042 else
18043 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18044 }
18045
18046 /* Return true if we should emit lea instruction instead of mov
18047 instruction. */
18048
18049 bool
18050 ix86_use_lea_for_mov (rtx insn, rtx operands[])
18051 {
18052 unsigned int regno0, regno1;
18053
18054 /* Check if we need to optimize. */
18055 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18056 return false;
18057
18058 /* Use lea for reg to reg moves only. */
18059 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18060 return false;
18061
18062 regno0 = true_regnum (operands[0]);
18063 regno1 = true_regnum (operands[1]);
18064
18065 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18066 }
18067
18068 /* Return true if we need to split lea into a sequence of
18069 instructions to avoid AGU stalls. */
18070
18071 bool
18072 ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
18073 {
18074 unsigned int regno0, regno1, regno2;
18075 int split_cost;
18076 struct ix86_address parts;
18077 int ok;
18078
18079 /* Check we need to optimize. */
18080 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18081 return false;
18082
18083 /* Check it is correct to split here. */
18084 if (!ix86_ok_to_clobber_flags(insn))
18085 return false;
18086
18087 ok = ix86_decompose_address (operands[1], &parts);
18088 gcc_assert (ok);
18089
18090 /* There should be at least two components in the address. */
18091 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18092 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18093 return false;
18094
18095 /* We should not split into add if non legitimate pic
18096 operand is used as displacement. */
18097 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18098 return false;
18099
18100 regno0 = true_regnum (operands[0]) ;
18101 regno1 = INVALID_REGNUM;
18102 regno2 = INVALID_REGNUM;
18103
18104 if (parts.base)
18105 regno1 = true_regnum (parts.base);
18106 if (parts.index)
18107 regno2 = true_regnum (parts.index);
18108
18109 split_cost = 0;
18110
18111 /* Compute how many cycles we will add to execution time
18112 if split lea into a sequence of instructions. */
18113 if (parts.base || parts.index)
18114 {
18115 /* Have to use mov instruction if non desctructive
18116 destination form is used. */
18117 if (regno1 != regno0 && regno2 != regno0)
18118 split_cost += 1;
18119
18120 /* Have to add index to base if both exist. */
18121 if (parts.base && parts.index)
18122 split_cost += 1;
18123
18124 /* Have to use shift and adds if scale is 2 or greater. */
18125 if (parts.scale > 1)
18126 {
18127 if (regno0 != regno1)
18128 split_cost += 1;
18129 else if (regno2 == regno0)
18130 split_cost += 4;
18131 else
18132 split_cost += parts.scale;
18133 }
18134
18135 /* Have to use add instruction with immediate if
18136 disp is non zero. */
18137 if (parts.disp && parts.disp != const0_rtx)
18138 split_cost += 1;
18139
18140 /* Subtract the price of lea. */
18141 split_cost -= 1;
18142 }
18143
18144 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18145 parts.scale > 1);
18146 }
18147
18148 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18149 matches destination. RTX includes clobber of FLAGS_REG. */
18150
18151 static void
18152 ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
18153 rtx dst, rtx src)
18154 {
18155 rtx op, clob;
18156
18157 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18158 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18159
18160 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18161 }
18162
18163 /* Return true if regno1 def is nearest to the insn. */
18164
18165 static bool
18166 find_nearest_reg_def (rtx insn, int regno1, int regno2)
18167 {
18168 rtx prev = insn;
18169 rtx start = BB_HEAD (BLOCK_FOR_INSN (insn));
18170
18171 if (insn == start)
18172 return false;
18173 while (prev && prev != start)
18174 {
18175 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18176 {
18177 prev = PREV_INSN (prev);
18178 continue;
18179 }
18180 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18181 return true;
18182 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18183 return false;
18184 prev = PREV_INSN (prev);
18185 }
18186
18187 /* None of the regs is defined in the bb. */
18188 return false;
18189 }
18190
18191 /* Split lea instructions into a sequence of instructions
18192 which are executed on ALU to avoid AGU stalls.
18193 It is assumed that it is allowed to clobber flags register
18194 at lea position. */
18195
18196 void
18197 ix86_split_lea_for_addr (rtx insn, rtx operands[], enum machine_mode mode)
18198 {
18199 unsigned int regno0, regno1, regno2;
18200 struct ix86_address parts;
18201 rtx target, tmp;
18202 int ok, adds;
18203
18204 ok = ix86_decompose_address (operands[1], &parts);
18205 gcc_assert (ok);
18206
18207 target = gen_lowpart (mode, operands[0]);
18208
18209 regno0 = true_regnum (target);
18210 regno1 = INVALID_REGNUM;
18211 regno2 = INVALID_REGNUM;
18212
18213 if (parts.base)
18214 {
18215 parts.base = gen_lowpart (mode, parts.base);
18216 regno1 = true_regnum (parts.base);
18217 }
18218
18219 if (parts.index)
18220 {
18221 parts.index = gen_lowpart (mode, parts.index);
18222 regno2 = true_regnum (parts.index);
18223 }
18224
18225 if (parts.disp)
18226 parts.disp = gen_lowpart (mode, parts.disp);
18227
18228 if (parts.scale > 1)
18229 {
18230 /* Case r1 = r1 + ... */
18231 if (regno1 == regno0)
18232 {
18233 /* If we have a case r1 = r1 + C * r1 then we
18234 should use multiplication which is very
18235 expensive. Assume cost model is wrong if we
18236 have such case here. */
18237 gcc_assert (regno2 != regno0);
18238
18239 for (adds = parts.scale; adds > 0; adds--)
18240 ix86_emit_binop (PLUS, mode, target, parts.index);
18241 }
18242 else
18243 {
18244 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18245 if (regno0 != regno2)
18246 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18247
18248 /* Use shift for scaling. */
18249 ix86_emit_binop (ASHIFT, mode, target,
18250 GEN_INT (exact_log2 (parts.scale)));
18251
18252 if (parts.base)
18253 ix86_emit_binop (PLUS, mode, target, parts.base);
18254
18255 if (parts.disp && parts.disp != const0_rtx)
18256 ix86_emit_binop (PLUS, mode, target, parts.disp);
18257 }
18258 }
18259 else if (!parts.base && !parts.index)
18260 {
18261 gcc_assert(parts.disp);
18262 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18263 }
18264 else
18265 {
18266 if (!parts.base)
18267 {
18268 if (regno0 != regno2)
18269 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18270 }
18271 else if (!parts.index)
18272 {
18273 if (regno0 != regno1)
18274 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
18275 }
18276 else
18277 {
18278 if (regno0 == regno1)
18279 tmp = parts.index;
18280 else if (regno0 == regno2)
18281 tmp = parts.base;
18282 else
18283 {
18284 rtx tmp1;
18285
18286 /* Find better operand for SET instruction, depending
18287 on which definition is farther from the insn. */
18288 if (find_nearest_reg_def (insn, regno1, regno2))
18289 tmp = parts.index, tmp1 = parts.base;
18290 else
18291 tmp = parts.base, tmp1 = parts.index;
18292
18293 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18294
18295 if (parts.disp && parts.disp != const0_rtx)
18296 ix86_emit_binop (PLUS, mode, target, parts.disp);
18297
18298 ix86_emit_binop (PLUS, mode, target, tmp1);
18299 return;
18300 }
18301
18302 ix86_emit_binop (PLUS, mode, target, tmp);
18303 }
18304
18305 if (parts.disp && parts.disp != const0_rtx)
18306 ix86_emit_binop (PLUS, mode, target, parts.disp);
18307 }
18308 }
18309
18310 /* Return true if it is ok to optimize an ADD operation to LEA
18311 operation to avoid flag register consumation. For most processors,
18312 ADD is faster than LEA. For the processors like BONNELL, if the
18313 destination register of LEA holds an actual address which will be
18314 used soon, LEA is better and otherwise ADD is better. */
18315
18316 bool
18317 ix86_lea_for_add_ok (rtx insn, rtx operands[])
18318 {
18319 unsigned int regno0 = true_regnum (operands[0]);
18320 unsigned int regno1 = true_regnum (operands[1]);
18321 unsigned int regno2 = true_regnum (operands[2]);
18322
18323 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18324 if (regno0 != regno1 && regno0 != regno2)
18325 return true;
18326
18327 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18328 return false;
18329
18330 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
18331 }
18332
18333 /* Return true if destination reg of SET_BODY is shift count of
18334 USE_BODY. */
18335
18336 static bool
18337 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
18338 {
18339 rtx set_dest;
18340 rtx shift_rtx;
18341 int i;
18342
18343 /* Retrieve destination of SET_BODY. */
18344 switch (GET_CODE (set_body))
18345 {
18346 case SET:
18347 set_dest = SET_DEST (set_body);
18348 if (!set_dest || !REG_P (set_dest))
18349 return false;
18350 break;
18351 case PARALLEL:
18352 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
18353 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
18354 use_body))
18355 return true;
18356 default:
18357 return false;
18358 break;
18359 }
18360
18361 /* Retrieve shift count of USE_BODY. */
18362 switch (GET_CODE (use_body))
18363 {
18364 case SET:
18365 shift_rtx = XEXP (use_body, 1);
18366 break;
18367 case PARALLEL:
18368 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
18369 if (ix86_dep_by_shift_count_body (set_body,
18370 XVECEXP (use_body, 0, i)))
18371 return true;
18372 default:
18373 return false;
18374 break;
18375 }
18376
18377 if (shift_rtx
18378 && (GET_CODE (shift_rtx) == ASHIFT
18379 || GET_CODE (shift_rtx) == LSHIFTRT
18380 || GET_CODE (shift_rtx) == ASHIFTRT
18381 || GET_CODE (shift_rtx) == ROTATE
18382 || GET_CODE (shift_rtx) == ROTATERT))
18383 {
18384 rtx shift_count = XEXP (shift_rtx, 1);
18385
18386 /* Return true if shift count is dest of SET_BODY. */
18387 if (REG_P (shift_count))
18388 {
18389 /* Add check since it can be invoked before register
18390 allocation in pre-reload schedule. */
18391 if (reload_completed
18392 && true_regnum (set_dest) == true_regnum (shift_count))
18393 return true;
18394 else if (REGNO(set_dest) == REGNO(shift_count))
18395 return true;
18396 }
18397 }
18398
18399 return false;
18400 }
18401
18402 /* Return true if destination reg of SET_INSN is shift count of
18403 USE_INSN. */
18404
18405 bool
18406 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
18407 {
18408 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
18409 PATTERN (use_insn));
18410 }
18411
18412 /* Return TRUE or FALSE depending on whether the unary operator meets the
18413 appropriate constraints. */
18414
18415 bool
18416 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
18417 enum machine_mode mode ATTRIBUTE_UNUSED,
18418 rtx operands[2])
18419 {
18420 /* If one of operands is memory, source and destination must match. */
18421 if ((MEM_P (operands[0])
18422 || MEM_P (operands[1]))
18423 && ! rtx_equal_p (operands[0], operands[1]))
18424 return false;
18425 return true;
18426 }
18427
18428 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
18429 are ok, keeping in mind the possible movddup alternative. */
18430
18431 bool
18432 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
18433 {
18434 if (MEM_P (operands[0]))
18435 return rtx_equal_p (operands[0], operands[1 + high]);
18436 if (MEM_P (operands[1]) && MEM_P (operands[2]))
18437 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
18438 return true;
18439 }
18440
18441 /* Post-reload splitter for converting an SF or DFmode value in an
18442 SSE register into an unsigned SImode. */
18443
18444 void
18445 ix86_split_convert_uns_si_sse (rtx operands[])
18446 {
18447 enum machine_mode vecmode;
18448 rtx value, large, zero_or_two31, input, two31, x;
18449
18450 large = operands[1];
18451 zero_or_two31 = operands[2];
18452 input = operands[3];
18453 two31 = operands[4];
18454 vecmode = GET_MODE (large);
18455 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
18456
18457 /* Load up the value into the low element. We must ensure that the other
18458 elements are valid floats -- zero is the easiest such value. */
18459 if (MEM_P (input))
18460 {
18461 if (vecmode == V4SFmode)
18462 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
18463 else
18464 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
18465 }
18466 else
18467 {
18468 input = gen_rtx_REG (vecmode, REGNO (input));
18469 emit_move_insn (value, CONST0_RTX (vecmode));
18470 if (vecmode == V4SFmode)
18471 emit_insn (gen_sse_movss (value, value, input));
18472 else
18473 emit_insn (gen_sse2_movsd (value, value, input));
18474 }
18475
18476 emit_move_insn (large, two31);
18477 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
18478
18479 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
18480 emit_insn (gen_rtx_SET (VOIDmode, large, x));
18481
18482 x = gen_rtx_AND (vecmode, zero_or_two31, large);
18483 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
18484
18485 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
18486 emit_insn (gen_rtx_SET (VOIDmode, value, x));
18487
18488 large = gen_rtx_REG (V4SImode, REGNO (large));
18489 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
18490
18491 x = gen_rtx_REG (V4SImode, REGNO (value));
18492 if (vecmode == V4SFmode)
18493 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
18494 else
18495 emit_insn (gen_sse2_cvttpd2dq (x, value));
18496 value = x;
18497
18498 emit_insn (gen_xorv4si3 (value, value, large));
18499 }
18500
18501 /* Convert an unsigned DImode value into a DFmode, using only SSE.
18502 Expects the 64-bit DImode to be supplied in a pair of integral
18503 registers. Requires SSE2; will use SSE3 if available. For x86_32,
18504 -mfpmath=sse, !optimize_size only. */
18505
18506 void
18507 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
18508 {
18509 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
18510 rtx int_xmm, fp_xmm;
18511 rtx biases, exponents;
18512 rtx x;
18513
18514 int_xmm = gen_reg_rtx (V4SImode);
18515 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
18516 emit_insn (gen_movdi_to_sse (int_xmm, input));
18517 else if (TARGET_SSE_SPLIT_REGS)
18518 {
18519 emit_clobber (int_xmm);
18520 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
18521 }
18522 else
18523 {
18524 x = gen_reg_rtx (V2DImode);
18525 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
18526 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
18527 }
18528
18529 x = gen_rtx_CONST_VECTOR (V4SImode,
18530 gen_rtvec (4, GEN_INT (0x43300000UL),
18531 GEN_INT (0x45300000UL),
18532 const0_rtx, const0_rtx));
18533 exponents = validize_mem (force_const_mem (V4SImode, x));
18534
18535 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
18536 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
18537
18538 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
18539 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
18540 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
18541 (0x1.0p84 + double(fp_value_hi_xmm)).
18542 Note these exponents differ by 32. */
18543
18544 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
18545
18546 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
18547 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
18548 real_ldexp (&bias_lo_rvt, &dconst1, 52);
18549 real_ldexp (&bias_hi_rvt, &dconst1, 84);
18550 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
18551 x = const_double_from_real_value (bias_hi_rvt, DFmode);
18552 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
18553 biases = validize_mem (force_const_mem (V2DFmode, biases));
18554 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
18555
18556 /* Add the upper and lower DFmode values together. */
18557 if (TARGET_SSE3)
18558 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
18559 else
18560 {
18561 x = copy_to_mode_reg (V2DFmode, fp_xmm);
18562 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
18563 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
18564 }
18565
18566 ix86_expand_vector_extract (false, target, fp_xmm, 0);
18567 }
18568
18569 /* Not used, but eases macroization of patterns. */
18570 void
18571 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
18572 rtx input ATTRIBUTE_UNUSED)
18573 {
18574 gcc_unreachable ();
18575 }
18576
18577 /* Convert an unsigned SImode value into a DFmode. Only currently used
18578 for SSE, but applicable anywhere. */
18579
18580 void
18581 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
18582 {
18583 REAL_VALUE_TYPE TWO31r;
18584 rtx x, fp;
18585
18586 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
18587 NULL, 1, OPTAB_DIRECT);
18588
18589 fp = gen_reg_rtx (DFmode);
18590 emit_insn (gen_floatsidf2 (fp, x));
18591
18592 real_ldexp (&TWO31r, &dconst1, 31);
18593 x = const_double_from_real_value (TWO31r, DFmode);
18594
18595 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
18596 if (x != target)
18597 emit_move_insn (target, x);
18598 }
18599
18600 /* Convert a signed DImode value into a DFmode. Only used for SSE in
18601 32-bit mode; otherwise we have a direct convert instruction. */
18602
18603 void
18604 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
18605 {
18606 REAL_VALUE_TYPE TWO32r;
18607 rtx fp_lo, fp_hi, x;
18608
18609 fp_lo = gen_reg_rtx (DFmode);
18610 fp_hi = gen_reg_rtx (DFmode);
18611
18612 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
18613
18614 real_ldexp (&TWO32r, &dconst1, 32);
18615 x = const_double_from_real_value (TWO32r, DFmode);
18616 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
18617
18618 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
18619
18620 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
18621 0, OPTAB_DIRECT);
18622 if (x != target)
18623 emit_move_insn (target, x);
18624 }
18625
18626 /* Convert an unsigned SImode value into a SFmode, using only SSE.
18627 For x86_32, -mfpmath=sse, !optimize_size only. */
18628 void
18629 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
18630 {
18631 REAL_VALUE_TYPE ONE16r;
18632 rtx fp_hi, fp_lo, int_hi, int_lo, x;
18633
18634 real_ldexp (&ONE16r, &dconst1, 16);
18635 x = const_double_from_real_value (ONE16r, SFmode);
18636 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
18637 NULL, 0, OPTAB_DIRECT);
18638 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
18639 NULL, 0, OPTAB_DIRECT);
18640 fp_hi = gen_reg_rtx (SFmode);
18641 fp_lo = gen_reg_rtx (SFmode);
18642 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
18643 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
18644 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
18645 0, OPTAB_DIRECT);
18646 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
18647 0, OPTAB_DIRECT);
18648 if (!rtx_equal_p (target, fp_hi))
18649 emit_move_insn (target, fp_hi);
18650 }
18651
18652 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
18653 a vector of unsigned ints VAL to vector of floats TARGET. */
18654
18655 void
18656 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
18657 {
18658 rtx tmp[8];
18659 REAL_VALUE_TYPE TWO16r;
18660 enum machine_mode intmode = GET_MODE (val);
18661 enum machine_mode fltmode = GET_MODE (target);
18662 rtx (*cvt) (rtx, rtx);
18663
18664 if (intmode == V4SImode)
18665 cvt = gen_floatv4siv4sf2;
18666 else
18667 cvt = gen_floatv8siv8sf2;
18668 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
18669 tmp[0] = force_reg (intmode, tmp[0]);
18670 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
18671 OPTAB_DIRECT);
18672 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
18673 NULL_RTX, 1, OPTAB_DIRECT);
18674 tmp[3] = gen_reg_rtx (fltmode);
18675 emit_insn (cvt (tmp[3], tmp[1]));
18676 tmp[4] = gen_reg_rtx (fltmode);
18677 emit_insn (cvt (tmp[4], tmp[2]));
18678 real_ldexp (&TWO16r, &dconst1, 16);
18679 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
18680 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
18681 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
18682 OPTAB_DIRECT);
18683 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
18684 OPTAB_DIRECT);
18685 if (tmp[7] != target)
18686 emit_move_insn (target, tmp[7]);
18687 }
18688
18689 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
18690 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
18691 This is done by doing just signed conversion if < 0x1p31, and otherwise by
18692 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
18693
18694 rtx
18695 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
18696 {
18697 REAL_VALUE_TYPE TWO31r;
18698 rtx two31r, tmp[4];
18699 enum machine_mode mode = GET_MODE (val);
18700 enum machine_mode scalarmode = GET_MODE_INNER (mode);
18701 enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
18702 rtx (*cmp) (rtx, rtx, rtx, rtx);
18703 int i;
18704
18705 for (i = 0; i < 3; i++)
18706 tmp[i] = gen_reg_rtx (mode);
18707 real_ldexp (&TWO31r, &dconst1, 31);
18708 two31r = const_double_from_real_value (TWO31r, scalarmode);
18709 two31r = ix86_build_const_vector (mode, 1, two31r);
18710 two31r = force_reg (mode, two31r);
18711 switch (mode)
18712 {
18713 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
18714 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
18715 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
18716 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
18717 default: gcc_unreachable ();
18718 }
18719 tmp[3] = gen_rtx_LE (mode, two31r, val);
18720 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
18721 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
18722 0, OPTAB_DIRECT);
18723 if (intmode == V4SImode || TARGET_AVX2)
18724 *xorp = expand_simple_binop (intmode, ASHIFT,
18725 gen_lowpart (intmode, tmp[0]),
18726 GEN_INT (31), NULL_RTX, 0,
18727 OPTAB_DIRECT);
18728 else
18729 {
18730 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
18731 two31 = ix86_build_const_vector (intmode, 1, two31);
18732 *xorp = expand_simple_binop (intmode, AND,
18733 gen_lowpart (intmode, tmp[0]),
18734 two31, NULL_RTX, 0,
18735 OPTAB_DIRECT);
18736 }
18737 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
18738 0, OPTAB_DIRECT);
18739 }
18740
18741 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
18742 then replicate the value for all elements of the vector
18743 register. */
18744
18745 rtx
18746 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
18747 {
18748 int i, n_elt;
18749 rtvec v;
18750 enum machine_mode scalar_mode;
18751
18752 switch (mode)
18753 {
18754 case V64QImode:
18755 case V32QImode:
18756 case V16QImode:
18757 case V32HImode:
18758 case V16HImode:
18759 case V8HImode:
18760 case V16SImode:
18761 case V8SImode:
18762 case V4SImode:
18763 case V8DImode:
18764 case V4DImode:
18765 case V2DImode:
18766 gcc_assert (vect);
18767 case V16SFmode:
18768 case V8SFmode:
18769 case V4SFmode:
18770 case V8DFmode:
18771 case V4DFmode:
18772 case V2DFmode:
18773 n_elt = GET_MODE_NUNITS (mode);
18774 v = rtvec_alloc (n_elt);
18775 scalar_mode = GET_MODE_INNER (mode);
18776
18777 RTVEC_ELT (v, 0) = value;
18778
18779 for (i = 1; i < n_elt; ++i)
18780 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
18781
18782 return gen_rtx_CONST_VECTOR (mode, v);
18783
18784 default:
18785 gcc_unreachable ();
18786 }
18787 }
18788
18789 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
18790 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
18791 for an SSE register. If VECT is true, then replicate the mask for
18792 all elements of the vector register. If INVERT is true, then create
18793 a mask excluding the sign bit. */
18794
18795 rtx
18796 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
18797 {
18798 enum machine_mode vec_mode, imode;
18799 HOST_WIDE_INT hi, lo;
18800 int shift = 63;
18801 rtx v;
18802 rtx mask;
18803
18804 /* Find the sign bit, sign extended to 2*HWI. */
18805 switch (mode)
18806 {
18807 case V16SImode:
18808 case V16SFmode:
18809 case V8SImode:
18810 case V4SImode:
18811 case V8SFmode:
18812 case V4SFmode:
18813 vec_mode = mode;
18814 mode = GET_MODE_INNER (mode);
18815 imode = SImode;
18816 lo = 0x80000000, hi = lo < 0;
18817 break;
18818
18819 case V8DImode:
18820 case V4DImode:
18821 case V2DImode:
18822 case V8DFmode:
18823 case V4DFmode:
18824 case V2DFmode:
18825 vec_mode = mode;
18826 mode = GET_MODE_INNER (mode);
18827 imode = DImode;
18828 if (HOST_BITS_PER_WIDE_INT >= 64)
18829 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
18830 else
18831 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
18832 break;
18833
18834 case TImode:
18835 case TFmode:
18836 vec_mode = VOIDmode;
18837 if (HOST_BITS_PER_WIDE_INT >= 64)
18838 {
18839 imode = TImode;
18840 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
18841 }
18842 else
18843 {
18844 rtvec vec;
18845
18846 imode = DImode;
18847 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
18848
18849 if (invert)
18850 {
18851 lo = ~lo, hi = ~hi;
18852 v = constm1_rtx;
18853 }
18854 else
18855 v = const0_rtx;
18856
18857 mask = immed_double_const (lo, hi, imode);
18858
18859 vec = gen_rtvec (2, v, mask);
18860 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
18861 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
18862
18863 return v;
18864 }
18865 break;
18866
18867 default:
18868 gcc_unreachable ();
18869 }
18870
18871 if (invert)
18872 lo = ~lo, hi = ~hi;
18873
18874 /* Force this value into the low part of a fp vector constant. */
18875 mask = immed_double_const (lo, hi, imode);
18876 mask = gen_lowpart (mode, mask);
18877
18878 if (vec_mode == VOIDmode)
18879 return force_reg (mode, mask);
18880
18881 v = ix86_build_const_vector (vec_mode, vect, mask);
18882 return force_reg (vec_mode, v);
18883 }
18884
18885 /* Generate code for floating point ABS or NEG. */
18886
18887 void
18888 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
18889 rtx operands[])
18890 {
18891 rtx mask, set, dst, src;
18892 bool use_sse = false;
18893 bool vector_mode = VECTOR_MODE_P (mode);
18894 enum machine_mode vmode = mode;
18895
18896 if (vector_mode)
18897 use_sse = true;
18898 else if (mode == TFmode)
18899 use_sse = true;
18900 else if (TARGET_SSE_MATH)
18901 {
18902 use_sse = SSE_FLOAT_MODE_P (mode);
18903 if (mode == SFmode)
18904 vmode = V4SFmode;
18905 else if (mode == DFmode)
18906 vmode = V2DFmode;
18907 }
18908
18909 /* NEG and ABS performed with SSE use bitwise mask operations.
18910 Create the appropriate mask now. */
18911 if (use_sse)
18912 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
18913 else
18914 mask = NULL_RTX;
18915
18916 dst = operands[0];
18917 src = operands[1];
18918
18919 set = gen_rtx_fmt_e (code, mode, src);
18920 set = gen_rtx_SET (VOIDmode, dst, set);
18921
18922 if (mask)
18923 {
18924 rtx use, clob;
18925 rtvec par;
18926
18927 use = gen_rtx_USE (VOIDmode, mask);
18928 if (vector_mode)
18929 par = gen_rtvec (2, set, use);
18930 else
18931 {
18932 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18933 par = gen_rtvec (3, set, use, clob);
18934 }
18935 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
18936 }
18937 else
18938 emit_insn (set);
18939 }
18940
18941 /* Expand a copysign operation. Special case operand 0 being a constant. */
18942
18943 void
18944 ix86_expand_copysign (rtx operands[])
18945 {
18946 enum machine_mode mode, vmode;
18947 rtx dest, op0, op1, mask, nmask;
18948
18949 dest = operands[0];
18950 op0 = operands[1];
18951 op1 = operands[2];
18952
18953 mode = GET_MODE (dest);
18954
18955 if (mode == SFmode)
18956 vmode = V4SFmode;
18957 else if (mode == DFmode)
18958 vmode = V2DFmode;
18959 else
18960 vmode = mode;
18961
18962 if (GET_CODE (op0) == CONST_DOUBLE)
18963 {
18964 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
18965
18966 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
18967 op0 = simplify_unary_operation (ABS, mode, op0, mode);
18968
18969 if (mode == SFmode || mode == DFmode)
18970 {
18971 if (op0 == CONST0_RTX (mode))
18972 op0 = CONST0_RTX (vmode);
18973 else
18974 {
18975 rtx v = ix86_build_const_vector (vmode, false, op0);
18976
18977 op0 = force_reg (vmode, v);
18978 }
18979 }
18980 else if (op0 != CONST0_RTX (mode))
18981 op0 = force_reg (mode, op0);
18982
18983 mask = ix86_build_signbit_mask (vmode, 0, 0);
18984
18985 if (mode == SFmode)
18986 copysign_insn = gen_copysignsf3_const;
18987 else if (mode == DFmode)
18988 copysign_insn = gen_copysigndf3_const;
18989 else
18990 copysign_insn = gen_copysigntf3_const;
18991
18992 emit_insn (copysign_insn (dest, op0, op1, mask));
18993 }
18994 else
18995 {
18996 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
18997
18998 nmask = ix86_build_signbit_mask (vmode, 0, 1);
18999 mask = ix86_build_signbit_mask (vmode, 0, 0);
19000
19001 if (mode == SFmode)
19002 copysign_insn = gen_copysignsf3_var;
19003 else if (mode == DFmode)
19004 copysign_insn = gen_copysigndf3_var;
19005 else
19006 copysign_insn = gen_copysigntf3_var;
19007
19008 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19009 }
19010 }
19011
19012 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19013 be a constant, and so has already been expanded into a vector constant. */
19014
19015 void
19016 ix86_split_copysign_const (rtx operands[])
19017 {
19018 enum machine_mode mode, vmode;
19019 rtx dest, op0, mask, x;
19020
19021 dest = operands[0];
19022 op0 = operands[1];
19023 mask = operands[3];
19024
19025 mode = GET_MODE (dest);
19026 vmode = GET_MODE (mask);
19027
19028 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19029 x = gen_rtx_AND (vmode, dest, mask);
19030 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19031
19032 if (op0 != CONST0_RTX (vmode))
19033 {
19034 x = gen_rtx_IOR (vmode, dest, op0);
19035 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19036 }
19037 }
19038
19039 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19040 so we have to do two masks. */
19041
19042 void
19043 ix86_split_copysign_var (rtx operands[])
19044 {
19045 enum machine_mode mode, vmode;
19046 rtx dest, scratch, op0, op1, mask, nmask, x;
19047
19048 dest = operands[0];
19049 scratch = operands[1];
19050 op0 = operands[2];
19051 op1 = operands[3];
19052 nmask = operands[4];
19053 mask = operands[5];
19054
19055 mode = GET_MODE (dest);
19056 vmode = GET_MODE (mask);
19057
19058 if (rtx_equal_p (op0, op1))
19059 {
19060 /* Shouldn't happen often (it's useless, obviously), but when it does
19061 we'd generate incorrect code if we continue below. */
19062 emit_move_insn (dest, op0);
19063 return;
19064 }
19065
19066 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19067 {
19068 gcc_assert (REGNO (op1) == REGNO (scratch));
19069
19070 x = gen_rtx_AND (vmode, scratch, mask);
19071 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19072
19073 dest = mask;
19074 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19075 x = gen_rtx_NOT (vmode, dest);
19076 x = gen_rtx_AND (vmode, x, op0);
19077 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19078 }
19079 else
19080 {
19081 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19082 {
19083 x = gen_rtx_AND (vmode, scratch, mask);
19084 }
19085 else /* alternative 2,4 */
19086 {
19087 gcc_assert (REGNO (mask) == REGNO (scratch));
19088 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19089 x = gen_rtx_AND (vmode, scratch, op1);
19090 }
19091 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19092
19093 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19094 {
19095 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19096 x = gen_rtx_AND (vmode, dest, nmask);
19097 }
19098 else /* alternative 3,4 */
19099 {
19100 gcc_assert (REGNO (nmask) == REGNO (dest));
19101 dest = nmask;
19102 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19103 x = gen_rtx_AND (vmode, dest, op0);
19104 }
19105 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19106 }
19107
19108 x = gen_rtx_IOR (vmode, dest, scratch);
19109 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19110 }
19111
19112 /* Return TRUE or FALSE depending on whether the first SET in INSN
19113 has source and destination with matching CC modes, and that the
19114 CC mode is at least as constrained as REQ_MODE. */
19115
19116 bool
19117 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
19118 {
19119 rtx set;
19120 enum machine_mode set_mode;
19121
19122 set = PATTERN (insn);
19123 if (GET_CODE (set) == PARALLEL)
19124 set = XVECEXP (set, 0, 0);
19125 gcc_assert (GET_CODE (set) == SET);
19126 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19127
19128 set_mode = GET_MODE (SET_DEST (set));
19129 switch (set_mode)
19130 {
19131 case CCNOmode:
19132 if (req_mode != CCNOmode
19133 && (req_mode != CCmode
19134 || XEXP (SET_SRC (set), 1) != const0_rtx))
19135 return false;
19136 break;
19137 case CCmode:
19138 if (req_mode == CCGCmode)
19139 return false;
19140 /* FALLTHRU */
19141 case CCGCmode:
19142 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19143 return false;
19144 /* FALLTHRU */
19145 case CCGOCmode:
19146 if (req_mode == CCZmode)
19147 return false;
19148 /* FALLTHRU */
19149 case CCZmode:
19150 break;
19151
19152 case CCAmode:
19153 case CCCmode:
19154 case CCOmode:
19155 case CCSmode:
19156 if (set_mode != req_mode)
19157 return false;
19158 break;
19159
19160 default:
19161 gcc_unreachable ();
19162 }
19163
19164 return GET_MODE (SET_SRC (set)) == set_mode;
19165 }
19166
19167 /* Generate insn patterns to do an integer compare of OPERANDS. */
19168
19169 static rtx
19170 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19171 {
19172 enum machine_mode cmpmode;
19173 rtx tmp, flags;
19174
19175 cmpmode = SELECT_CC_MODE (code, op0, op1);
19176 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19177
19178 /* This is very simple, but making the interface the same as in the
19179 FP case makes the rest of the code easier. */
19180 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19181 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19182
19183 /* Return the test that should be put into the flags user, i.e.
19184 the bcc, scc, or cmov instruction. */
19185 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19186 }
19187
19188 /* Figure out whether to use ordered or unordered fp comparisons.
19189 Return the appropriate mode to use. */
19190
19191 enum machine_mode
19192 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
19193 {
19194 /* ??? In order to make all comparisons reversible, we do all comparisons
19195 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19196 all forms trapping and nontrapping comparisons, we can make inequality
19197 comparisons trapping again, since it results in better code when using
19198 FCOM based compares. */
19199 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19200 }
19201
19202 enum machine_mode
19203 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19204 {
19205 enum machine_mode mode = GET_MODE (op0);
19206
19207 if (SCALAR_FLOAT_MODE_P (mode))
19208 {
19209 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19210 return ix86_fp_compare_mode (code);
19211 }
19212
19213 switch (code)
19214 {
19215 /* Only zero flag is needed. */
19216 case EQ: /* ZF=0 */
19217 case NE: /* ZF!=0 */
19218 return CCZmode;
19219 /* Codes needing carry flag. */
19220 case GEU: /* CF=0 */
19221 case LTU: /* CF=1 */
19222 /* Detect overflow checks. They need just the carry flag. */
19223 if (GET_CODE (op0) == PLUS
19224 && rtx_equal_p (op1, XEXP (op0, 0)))
19225 return CCCmode;
19226 else
19227 return CCmode;
19228 case GTU: /* CF=0 & ZF=0 */
19229 case LEU: /* CF=1 | ZF=1 */
19230 return CCmode;
19231 /* Codes possibly doable only with sign flag when
19232 comparing against zero. */
19233 case GE: /* SF=OF or SF=0 */
19234 case LT: /* SF<>OF or SF=1 */
19235 if (op1 == const0_rtx)
19236 return CCGOCmode;
19237 else
19238 /* For other cases Carry flag is not required. */
19239 return CCGCmode;
19240 /* Codes doable only with sign flag when comparing
19241 against zero, but we miss jump instruction for it
19242 so we need to use relational tests against overflow
19243 that thus needs to be zero. */
19244 case GT: /* ZF=0 & SF=OF */
19245 case LE: /* ZF=1 | SF<>OF */
19246 if (op1 == const0_rtx)
19247 return CCNOmode;
19248 else
19249 return CCGCmode;
19250 /* strcmp pattern do (use flags) and combine may ask us for proper
19251 mode. */
19252 case USE:
19253 return CCmode;
19254 default:
19255 gcc_unreachable ();
19256 }
19257 }
19258
19259 /* Return the fixed registers used for condition codes. */
19260
19261 static bool
19262 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19263 {
19264 *p1 = FLAGS_REG;
19265 *p2 = FPSR_REG;
19266 return true;
19267 }
19268
19269 /* If two condition code modes are compatible, return a condition code
19270 mode which is compatible with both. Otherwise, return
19271 VOIDmode. */
19272
19273 static enum machine_mode
19274 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
19275 {
19276 if (m1 == m2)
19277 return m1;
19278
19279 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19280 return VOIDmode;
19281
19282 if ((m1 == CCGCmode && m2 == CCGOCmode)
19283 || (m1 == CCGOCmode && m2 == CCGCmode))
19284 return CCGCmode;
19285
19286 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19287 return m2;
19288 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19289 return m1;
19290
19291 switch (m1)
19292 {
19293 default:
19294 gcc_unreachable ();
19295
19296 case CCmode:
19297 case CCGCmode:
19298 case CCGOCmode:
19299 case CCNOmode:
19300 case CCAmode:
19301 case CCCmode:
19302 case CCOmode:
19303 case CCSmode:
19304 case CCZmode:
19305 switch (m2)
19306 {
19307 default:
19308 return VOIDmode;
19309
19310 case CCmode:
19311 case CCGCmode:
19312 case CCGOCmode:
19313 case CCNOmode:
19314 case CCAmode:
19315 case CCCmode:
19316 case CCOmode:
19317 case CCSmode:
19318 case CCZmode:
19319 return CCmode;
19320 }
19321
19322 case CCFPmode:
19323 case CCFPUmode:
19324 /* These are only compatible with themselves, which we already
19325 checked above. */
19326 return VOIDmode;
19327 }
19328 }
19329
19330
19331 /* Return a comparison we can do and that it is equivalent to
19332 swap_condition (code) apart possibly from orderedness.
19333 But, never change orderedness if TARGET_IEEE_FP, returning
19334 UNKNOWN in that case if necessary. */
19335
19336 static enum rtx_code
19337 ix86_fp_swap_condition (enum rtx_code code)
19338 {
19339 switch (code)
19340 {
19341 case GT: /* GTU - CF=0 & ZF=0 */
19342 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
19343 case GE: /* GEU - CF=0 */
19344 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
19345 case UNLT: /* LTU - CF=1 */
19346 return TARGET_IEEE_FP ? UNKNOWN : GT;
19347 case UNLE: /* LEU - CF=1 | ZF=1 */
19348 return TARGET_IEEE_FP ? UNKNOWN : GE;
19349 default:
19350 return swap_condition (code);
19351 }
19352 }
19353
19354 /* Return cost of comparison CODE using the best strategy for performance.
19355 All following functions do use number of instructions as a cost metrics.
19356 In future this should be tweaked to compute bytes for optimize_size and
19357 take into account performance of various instructions on various CPUs. */
19358
19359 static int
19360 ix86_fp_comparison_cost (enum rtx_code code)
19361 {
19362 int arith_cost;
19363
19364 /* The cost of code using bit-twiddling on %ah. */
19365 switch (code)
19366 {
19367 case UNLE:
19368 case UNLT:
19369 case LTGT:
19370 case GT:
19371 case GE:
19372 case UNORDERED:
19373 case ORDERED:
19374 case UNEQ:
19375 arith_cost = 4;
19376 break;
19377 case LT:
19378 case NE:
19379 case EQ:
19380 case UNGE:
19381 arith_cost = TARGET_IEEE_FP ? 5 : 4;
19382 break;
19383 case LE:
19384 case UNGT:
19385 arith_cost = TARGET_IEEE_FP ? 6 : 4;
19386 break;
19387 default:
19388 gcc_unreachable ();
19389 }
19390
19391 switch (ix86_fp_comparison_strategy (code))
19392 {
19393 case IX86_FPCMP_COMI:
19394 return arith_cost > 4 ? 3 : 2;
19395 case IX86_FPCMP_SAHF:
19396 return arith_cost > 4 ? 4 : 3;
19397 default:
19398 return arith_cost;
19399 }
19400 }
19401
19402 /* Return strategy to use for floating-point. We assume that fcomi is always
19403 preferrable where available, since that is also true when looking at size
19404 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
19405
19406 enum ix86_fpcmp_strategy
19407 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
19408 {
19409 /* Do fcomi/sahf based test when profitable. */
19410
19411 if (TARGET_CMOVE)
19412 return IX86_FPCMP_COMI;
19413
19414 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
19415 return IX86_FPCMP_SAHF;
19416
19417 return IX86_FPCMP_ARITH;
19418 }
19419
19420 /* Swap, force into registers, or otherwise massage the two operands
19421 to a fp comparison. The operands are updated in place; the new
19422 comparison code is returned. */
19423
19424 static enum rtx_code
19425 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
19426 {
19427 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
19428 rtx op0 = *pop0, op1 = *pop1;
19429 enum machine_mode op_mode = GET_MODE (op0);
19430 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
19431
19432 /* All of the unordered compare instructions only work on registers.
19433 The same is true of the fcomi compare instructions. The XFmode
19434 compare instructions require registers except when comparing
19435 against zero or when converting operand 1 from fixed point to
19436 floating point. */
19437
19438 if (!is_sse
19439 && (fpcmp_mode == CCFPUmode
19440 || (op_mode == XFmode
19441 && ! (standard_80387_constant_p (op0) == 1
19442 || standard_80387_constant_p (op1) == 1)
19443 && GET_CODE (op1) != FLOAT)
19444 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
19445 {
19446 op0 = force_reg (op_mode, op0);
19447 op1 = force_reg (op_mode, op1);
19448 }
19449 else
19450 {
19451 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
19452 things around if they appear profitable, otherwise force op0
19453 into a register. */
19454
19455 if (standard_80387_constant_p (op0) == 0
19456 || (MEM_P (op0)
19457 && ! (standard_80387_constant_p (op1) == 0
19458 || MEM_P (op1))))
19459 {
19460 enum rtx_code new_code = ix86_fp_swap_condition (code);
19461 if (new_code != UNKNOWN)
19462 {
19463 rtx tmp;
19464 tmp = op0, op0 = op1, op1 = tmp;
19465 code = new_code;
19466 }
19467 }
19468
19469 if (!REG_P (op0))
19470 op0 = force_reg (op_mode, op0);
19471
19472 if (CONSTANT_P (op1))
19473 {
19474 int tmp = standard_80387_constant_p (op1);
19475 if (tmp == 0)
19476 op1 = validize_mem (force_const_mem (op_mode, op1));
19477 else if (tmp == 1)
19478 {
19479 if (TARGET_CMOVE)
19480 op1 = force_reg (op_mode, op1);
19481 }
19482 else
19483 op1 = force_reg (op_mode, op1);
19484 }
19485 }
19486
19487 /* Try to rearrange the comparison to make it cheaper. */
19488 if (ix86_fp_comparison_cost (code)
19489 > ix86_fp_comparison_cost (swap_condition (code))
19490 && (REG_P (op1) || can_create_pseudo_p ()))
19491 {
19492 rtx tmp;
19493 tmp = op0, op0 = op1, op1 = tmp;
19494 code = swap_condition (code);
19495 if (!REG_P (op0))
19496 op0 = force_reg (op_mode, op0);
19497 }
19498
19499 *pop0 = op0;
19500 *pop1 = op1;
19501 return code;
19502 }
19503
19504 /* Convert comparison codes we use to represent FP comparison to integer
19505 code that will result in proper branch. Return UNKNOWN if no such code
19506 is available. */
19507
19508 enum rtx_code
19509 ix86_fp_compare_code_to_integer (enum rtx_code code)
19510 {
19511 switch (code)
19512 {
19513 case GT:
19514 return GTU;
19515 case GE:
19516 return GEU;
19517 case ORDERED:
19518 case UNORDERED:
19519 return code;
19520 break;
19521 case UNEQ:
19522 return EQ;
19523 break;
19524 case UNLT:
19525 return LTU;
19526 break;
19527 case UNLE:
19528 return LEU;
19529 break;
19530 case LTGT:
19531 return NE;
19532 break;
19533 default:
19534 return UNKNOWN;
19535 }
19536 }
19537
19538 /* Generate insn patterns to do a floating point compare of OPERANDS. */
19539
19540 static rtx
19541 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
19542 {
19543 enum machine_mode fpcmp_mode, intcmp_mode;
19544 rtx tmp, tmp2;
19545
19546 fpcmp_mode = ix86_fp_compare_mode (code);
19547 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
19548
19549 /* Do fcomi/sahf based test when profitable. */
19550 switch (ix86_fp_comparison_strategy (code))
19551 {
19552 case IX86_FPCMP_COMI:
19553 intcmp_mode = fpcmp_mode;
19554 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19555 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19556 tmp);
19557 emit_insn (tmp);
19558 break;
19559
19560 case IX86_FPCMP_SAHF:
19561 intcmp_mode = fpcmp_mode;
19562 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19563 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19564 tmp);
19565
19566 if (!scratch)
19567 scratch = gen_reg_rtx (HImode);
19568 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
19569 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
19570 break;
19571
19572 case IX86_FPCMP_ARITH:
19573 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
19574 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19575 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
19576 if (!scratch)
19577 scratch = gen_reg_rtx (HImode);
19578 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
19579
19580 /* In the unordered case, we have to check C2 for NaN's, which
19581 doesn't happen to work out to anything nice combination-wise.
19582 So do some bit twiddling on the value we've got in AH to come
19583 up with an appropriate set of condition codes. */
19584
19585 intcmp_mode = CCNOmode;
19586 switch (code)
19587 {
19588 case GT:
19589 case UNGT:
19590 if (code == GT || !TARGET_IEEE_FP)
19591 {
19592 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19593 code = EQ;
19594 }
19595 else
19596 {
19597 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19598 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19599 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
19600 intcmp_mode = CCmode;
19601 code = GEU;
19602 }
19603 break;
19604 case LT:
19605 case UNLT:
19606 if (code == LT && TARGET_IEEE_FP)
19607 {
19608 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19609 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
19610 intcmp_mode = CCmode;
19611 code = EQ;
19612 }
19613 else
19614 {
19615 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
19616 code = NE;
19617 }
19618 break;
19619 case GE:
19620 case UNGE:
19621 if (code == GE || !TARGET_IEEE_FP)
19622 {
19623 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
19624 code = EQ;
19625 }
19626 else
19627 {
19628 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19629 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
19630 code = NE;
19631 }
19632 break;
19633 case LE:
19634 case UNLE:
19635 if (code == LE && TARGET_IEEE_FP)
19636 {
19637 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19638 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19639 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19640 intcmp_mode = CCmode;
19641 code = LTU;
19642 }
19643 else
19644 {
19645 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19646 code = NE;
19647 }
19648 break;
19649 case EQ:
19650 case UNEQ:
19651 if (code == EQ && TARGET_IEEE_FP)
19652 {
19653 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19654 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19655 intcmp_mode = CCmode;
19656 code = EQ;
19657 }
19658 else
19659 {
19660 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19661 code = NE;
19662 }
19663 break;
19664 case NE:
19665 case LTGT:
19666 if (code == NE && TARGET_IEEE_FP)
19667 {
19668 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19669 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
19670 GEN_INT (0x40)));
19671 code = NE;
19672 }
19673 else
19674 {
19675 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19676 code = EQ;
19677 }
19678 break;
19679
19680 case UNORDERED:
19681 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19682 code = NE;
19683 break;
19684 case ORDERED:
19685 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19686 code = EQ;
19687 break;
19688
19689 default:
19690 gcc_unreachable ();
19691 }
19692 break;
19693
19694 default:
19695 gcc_unreachable();
19696 }
19697
19698 /* Return the test that should be put into the flags user, i.e.
19699 the bcc, scc, or cmov instruction. */
19700 return gen_rtx_fmt_ee (code, VOIDmode,
19701 gen_rtx_REG (intcmp_mode, FLAGS_REG),
19702 const0_rtx);
19703 }
19704
19705 static rtx
19706 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
19707 {
19708 rtx ret;
19709
19710 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
19711 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
19712
19713 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
19714 {
19715 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
19716 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
19717 }
19718 else
19719 ret = ix86_expand_int_compare (code, op0, op1);
19720
19721 return ret;
19722 }
19723
19724 void
19725 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
19726 {
19727 enum machine_mode mode = GET_MODE (op0);
19728 rtx tmp;
19729
19730 switch (mode)
19731 {
19732 case SFmode:
19733 case DFmode:
19734 case XFmode:
19735 case QImode:
19736 case HImode:
19737 case SImode:
19738 simple:
19739 tmp = ix86_expand_compare (code, op0, op1);
19740 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19741 gen_rtx_LABEL_REF (VOIDmode, label),
19742 pc_rtx);
19743 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19744 return;
19745
19746 case DImode:
19747 if (TARGET_64BIT)
19748 goto simple;
19749 case TImode:
19750 /* Expand DImode branch into multiple compare+branch. */
19751 {
19752 rtx lo[2], hi[2], label2;
19753 enum rtx_code code1, code2, code3;
19754 enum machine_mode submode;
19755
19756 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
19757 {
19758 tmp = op0, op0 = op1, op1 = tmp;
19759 code = swap_condition (code);
19760 }
19761
19762 split_double_mode (mode, &op0, 1, lo+0, hi+0);
19763 split_double_mode (mode, &op1, 1, lo+1, hi+1);
19764
19765 submode = mode == DImode ? SImode : DImode;
19766
19767 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
19768 avoid two branches. This costs one extra insn, so disable when
19769 optimizing for size. */
19770
19771 if ((code == EQ || code == NE)
19772 && (!optimize_insn_for_size_p ()
19773 || hi[1] == const0_rtx || lo[1] == const0_rtx))
19774 {
19775 rtx xor0, xor1;
19776
19777 xor1 = hi[0];
19778 if (hi[1] != const0_rtx)
19779 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
19780 NULL_RTX, 0, OPTAB_WIDEN);
19781
19782 xor0 = lo[0];
19783 if (lo[1] != const0_rtx)
19784 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
19785 NULL_RTX, 0, OPTAB_WIDEN);
19786
19787 tmp = expand_binop (submode, ior_optab, xor1, xor0,
19788 NULL_RTX, 0, OPTAB_WIDEN);
19789
19790 ix86_expand_branch (code, tmp, const0_rtx, label);
19791 return;
19792 }
19793
19794 /* Otherwise, if we are doing less-than or greater-or-equal-than,
19795 op1 is a constant and the low word is zero, then we can just
19796 examine the high word. Similarly for low word -1 and
19797 less-or-equal-than or greater-than. */
19798
19799 if (CONST_INT_P (hi[1]))
19800 switch (code)
19801 {
19802 case LT: case LTU: case GE: case GEU:
19803 if (lo[1] == const0_rtx)
19804 {
19805 ix86_expand_branch (code, hi[0], hi[1], label);
19806 return;
19807 }
19808 break;
19809 case LE: case LEU: case GT: case GTU:
19810 if (lo[1] == constm1_rtx)
19811 {
19812 ix86_expand_branch (code, hi[0], hi[1], label);
19813 return;
19814 }
19815 break;
19816 default:
19817 break;
19818 }
19819
19820 /* Otherwise, we need two or three jumps. */
19821
19822 label2 = gen_label_rtx ();
19823
19824 code1 = code;
19825 code2 = swap_condition (code);
19826 code3 = unsigned_condition (code);
19827
19828 switch (code)
19829 {
19830 case LT: case GT: case LTU: case GTU:
19831 break;
19832
19833 case LE: code1 = LT; code2 = GT; break;
19834 case GE: code1 = GT; code2 = LT; break;
19835 case LEU: code1 = LTU; code2 = GTU; break;
19836 case GEU: code1 = GTU; code2 = LTU; break;
19837
19838 case EQ: code1 = UNKNOWN; code2 = NE; break;
19839 case NE: code2 = UNKNOWN; break;
19840
19841 default:
19842 gcc_unreachable ();
19843 }
19844
19845 /*
19846 * a < b =>
19847 * if (hi(a) < hi(b)) goto true;
19848 * if (hi(a) > hi(b)) goto false;
19849 * if (lo(a) < lo(b)) goto true;
19850 * false:
19851 */
19852
19853 if (code1 != UNKNOWN)
19854 ix86_expand_branch (code1, hi[0], hi[1], label);
19855 if (code2 != UNKNOWN)
19856 ix86_expand_branch (code2, hi[0], hi[1], label2);
19857
19858 ix86_expand_branch (code3, lo[0], lo[1], label);
19859
19860 if (code2 != UNKNOWN)
19861 emit_label (label2);
19862 return;
19863 }
19864
19865 default:
19866 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
19867 goto simple;
19868 }
19869 }
19870
19871 /* Split branch based on floating point condition. */
19872 void
19873 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
19874 rtx target1, rtx target2, rtx tmp, rtx pushed)
19875 {
19876 rtx condition;
19877 rtx i;
19878
19879 if (target2 != pc_rtx)
19880 {
19881 rtx tmp = target2;
19882 code = reverse_condition_maybe_unordered (code);
19883 target2 = target1;
19884 target1 = tmp;
19885 }
19886
19887 condition = ix86_expand_fp_compare (code, op1, op2,
19888 tmp);
19889
19890 /* Remove pushed operand from stack. */
19891 if (pushed)
19892 ix86_free_from_memory (GET_MODE (pushed));
19893
19894 i = emit_jump_insn (gen_rtx_SET
19895 (VOIDmode, pc_rtx,
19896 gen_rtx_IF_THEN_ELSE (VOIDmode,
19897 condition, target1, target2)));
19898 if (split_branch_probability >= 0)
19899 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
19900 }
19901
19902 void
19903 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
19904 {
19905 rtx ret;
19906
19907 gcc_assert (GET_MODE (dest) == QImode);
19908
19909 ret = ix86_expand_compare (code, op0, op1);
19910 PUT_MODE (ret, QImode);
19911 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
19912 }
19913
19914 /* Expand comparison setting or clearing carry flag. Return true when
19915 successful and set pop for the operation. */
19916 static bool
19917 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
19918 {
19919 enum machine_mode mode =
19920 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
19921
19922 /* Do not handle double-mode compares that go through special path. */
19923 if (mode == (TARGET_64BIT ? TImode : DImode))
19924 return false;
19925
19926 if (SCALAR_FLOAT_MODE_P (mode))
19927 {
19928 rtx compare_op, compare_seq;
19929
19930 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19931
19932 /* Shortcut: following common codes never translate
19933 into carry flag compares. */
19934 if (code == EQ || code == NE || code == UNEQ || code == LTGT
19935 || code == ORDERED || code == UNORDERED)
19936 return false;
19937
19938 /* These comparisons require zero flag; swap operands so they won't. */
19939 if ((code == GT || code == UNLE || code == LE || code == UNGT)
19940 && !TARGET_IEEE_FP)
19941 {
19942 rtx tmp = op0;
19943 op0 = op1;
19944 op1 = tmp;
19945 code = swap_condition (code);
19946 }
19947
19948 /* Try to expand the comparison and verify that we end up with
19949 carry flag based comparison. This fails to be true only when
19950 we decide to expand comparison using arithmetic that is not
19951 too common scenario. */
19952 start_sequence ();
19953 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
19954 compare_seq = get_insns ();
19955 end_sequence ();
19956
19957 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
19958 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
19959 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
19960 else
19961 code = GET_CODE (compare_op);
19962
19963 if (code != LTU && code != GEU)
19964 return false;
19965
19966 emit_insn (compare_seq);
19967 *pop = compare_op;
19968 return true;
19969 }
19970
19971 if (!INTEGRAL_MODE_P (mode))
19972 return false;
19973
19974 switch (code)
19975 {
19976 case LTU:
19977 case GEU:
19978 break;
19979
19980 /* Convert a==0 into (unsigned)a<1. */
19981 case EQ:
19982 case NE:
19983 if (op1 != const0_rtx)
19984 return false;
19985 op1 = const1_rtx;
19986 code = (code == EQ ? LTU : GEU);
19987 break;
19988
19989 /* Convert a>b into b<a or a>=b-1. */
19990 case GTU:
19991 case LEU:
19992 if (CONST_INT_P (op1))
19993 {
19994 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
19995 /* Bail out on overflow. We still can swap operands but that
19996 would force loading of the constant into register. */
19997 if (op1 == const0_rtx
19998 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
19999 return false;
20000 code = (code == GTU ? GEU : LTU);
20001 }
20002 else
20003 {
20004 rtx tmp = op1;
20005 op1 = op0;
20006 op0 = tmp;
20007 code = (code == GTU ? LTU : GEU);
20008 }
20009 break;
20010
20011 /* Convert a>=0 into (unsigned)a<0x80000000. */
20012 case LT:
20013 case GE:
20014 if (mode == DImode || op1 != const0_rtx)
20015 return false;
20016 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20017 code = (code == LT ? GEU : LTU);
20018 break;
20019 case LE:
20020 case GT:
20021 if (mode == DImode || op1 != constm1_rtx)
20022 return false;
20023 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20024 code = (code == LE ? GEU : LTU);
20025 break;
20026
20027 default:
20028 return false;
20029 }
20030 /* Swapping operands may cause constant to appear as first operand. */
20031 if (!nonimmediate_operand (op0, VOIDmode))
20032 {
20033 if (!can_create_pseudo_p ())
20034 return false;
20035 op0 = force_reg (mode, op0);
20036 }
20037 *pop = ix86_expand_compare (code, op0, op1);
20038 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20039 return true;
20040 }
20041
20042 bool
20043 ix86_expand_int_movcc (rtx operands[])
20044 {
20045 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20046 rtx compare_seq, compare_op;
20047 enum machine_mode mode = GET_MODE (operands[0]);
20048 bool sign_bit_compare_p = false;
20049 rtx op0 = XEXP (operands[1], 0);
20050 rtx op1 = XEXP (operands[1], 1);
20051
20052 if (GET_MODE (op0) == TImode
20053 || (GET_MODE (op0) == DImode
20054 && !TARGET_64BIT))
20055 return false;
20056
20057 start_sequence ();
20058 compare_op = ix86_expand_compare (code, op0, op1);
20059 compare_seq = get_insns ();
20060 end_sequence ();
20061
20062 compare_code = GET_CODE (compare_op);
20063
20064 if ((op1 == const0_rtx && (code == GE || code == LT))
20065 || (op1 == constm1_rtx && (code == GT || code == LE)))
20066 sign_bit_compare_p = true;
20067
20068 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20069 HImode insns, we'd be swallowed in word prefix ops. */
20070
20071 if ((mode != HImode || TARGET_FAST_PREFIX)
20072 && (mode != (TARGET_64BIT ? TImode : DImode))
20073 && CONST_INT_P (operands[2])
20074 && CONST_INT_P (operands[3]))
20075 {
20076 rtx out = operands[0];
20077 HOST_WIDE_INT ct = INTVAL (operands[2]);
20078 HOST_WIDE_INT cf = INTVAL (operands[3]);
20079 HOST_WIDE_INT diff;
20080
20081 diff = ct - cf;
20082 /* Sign bit compares are better done using shifts than we do by using
20083 sbb. */
20084 if (sign_bit_compare_p
20085 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20086 {
20087 /* Detect overlap between destination and compare sources. */
20088 rtx tmp = out;
20089
20090 if (!sign_bit_compare_p)
20091 {
20092 rtx flags;
20093 bool fpcmp = false;
20094
20095 compare_code = GET_CODE (compare_op);
20096
20097 flags = XEXP (compare_op, 0);
20098
20099 if (GET_MODE (flags) == CCFPmode
20100 || GET_MODE (flags) == CCFPUmode)
20101 {
20102 fpcmp = true;
20103 compare_code
20104 = ix86_fp_compare_code_to_integer (compare_code);
20105 }
20106
20107 /* To simplify rest of code, restrict to the GEU case. */
20108 if (compare_code == LTU)
20109 {
20110 HOST_WIDE_INT tmp = ct;
20111 ct = cf;
20112 cf = tmp;
20113 compare_code = reverse_condition (compare_code);
20114 code = reverse_condition (code);
20115 }
20116 else
20117 {
20118 if (fpcmp)
20119 PUT_CODE (compare_op,
20120 reverse_condition_maybe_unordered
20121 (GET_CODE (compare_op)));
20122 else
20123 PUT_CODE (compare_op,
20124 reverse_condition (GET_CODE (compare_op)));
20125 }
20126 diff = ct - cf;
20127
20128 if (reg_overlap_mentioned_p (out, op0)
20129 || reg_overlap_mentioned_p (out, op1))
20130 tmp = gen_reg_rtx (mode);
20131
20132 if (mode == DImode)
20133 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20134 else
20135 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20136 flags, compare_op));
20137 }
20138 else
20139 {
20140 if (code == GT || code == GE)
20141 code = reverse_condition (code);
20142 else
20143 {
20144 HOST_WIDE_INT tmp = ct;
20145 ct = cf;
20146 cf = tmp;
20147 diff = ct - cf;
20148 }
20149 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20150 }
20151
20152 if (diff == 1)
20153 {
20154 /*
20155 * cmpl op0,op1
20156 * sbbl dest,dest
20157 * [addl dest, ct]
20158 *
20159 * Size 5 - 8.
20160 */
20161 if (ct)
20162 tmp = expand_simple_binop (mode, PLUS,
20163 tmp, GEN_INT (ct),
20164 copy_rtx (tmp), 1, OPTAB_DIRECT);
20165 }
20166 else if (cf == -1)
20167 {
20168 /*
20169 * cmpl op0,op1
20170 * sbbl dest,dest
20171 * orl $ct, dest
20172 *
20173 * Size 8.
20174 */
20175 tmp = expand_simple_binop (mode, IOR,
20176 tmp, GEN_INT (ct),
20177 copy_rtx (tmp), 1, OPTAB_DIRECT);
20178 }
20179 else if (diff == -1 && ct)
20180 {
20181 /*
20182 * cmpl op0,op1
20183 * sbbl dest,dest
20184 * notl dest
20185 * [addl dest, cf]
20186 *
20187 * Size 8 - 11.
20188 */
20189 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20190 if (cf)
20191 tmp = expand_simple_binop (mode, PLUS,
20192 copy_rtx (tmp), GEN_INT (cf),
20193 copy_rtx (tmp), 1, OPTAB_DIRECT);
20194 }
20195 else
20196 {
20197 /*
20198 * cmpl op0,op1
20199 * sbbl dest,dest
20200 * [notl dest]
20201 * andl cf - ct, dest
20202 * [addl dest, ct]
20203 *
20204 * Size 8 - 11.
20205 */
20206
20207 if (cf == 0)
20208 {
20209 cf = ct;
20210 ct = 0;
20211 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20212 }
20213
20214 tmp = expand_simple_binop (mode, AND,
20215 copy_rtx (tmp),
20216 gen_int_mode (cf - ct, mode),
20217 copy_rtx (tmp), 1, OPTAB_DIRECT);
20218 if (ct)
20219 tmp = expand_simple_binop (mode, PLUS,
20220 copy_rtx (tmp), GEN_INT (ct),
20221 copy_rtx (tmp), 1, OPTAB_DIRECT);
20222 }
20223
20224 if (!rtx_equal_p (tmp, out))
20225 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20226
20227 return true;
20228 }
20229
20230 if (diff < 0)
20231 {
20232 enum machine_mode cmp_mode = GET_MODE (op0);
20233
20234 HOST_WIDE_INT tmp;
20235 tmp = ct, ct = cf, cf = tmp;
20236 diff = -diff;
20237
20238 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20239 {
20240 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20241
20242 /* We may be reversing unordered compare to normal compare, that
20243 is not valid in general (we may convert non-trapping condition
20244 to trapping one), however on i386 we currently emit all
20245 comparisons unordered. */
20246 compare_code = reverse_condition_maybe_unordered (compare_code);
20247 code = reverse_condition_maybe_unordered (code);
20248 }
20249 else
20250 {
20251 compare_code = reverse_condition (compare_code);
20252 code = reverse_condition (code);
20253 }
20254 }
20255
20256 compare_code = UNKNOWN;
20257 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20258 && CONST_INT_P (op1))
20259 {
20260 if (op1 == const0_rtx
20261 && (code == LT || code == GE))
20262 compare_code = code;
20263 else if (op1 == constm1_rtx)
20264 {
20265 if (code == LE)
20266 compare_code = LT;
20267 else if (code == GT)
20268 compare_code = GE;
20269 }
20270 }
20271
20272 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20273 if (compare_code != UNKNOWN
20274 && GET_MODE (op0) == GET_MODE (out)
20275 && (cf == -1 || ct == -1))
20276 {
20277 /* If lea code below could be used, only optimize
20278 if it results in a 2 insn sequence. */
20279
20280 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20281 || diff == 3 || diff == 5 || diff == 9)
20282 || (compare_code == LT && ct == -1)
20283 || (compare_code == GE && cf == -1))
20284 {
20285 /*
20286 * notl op1 (if necessary)
20287 * sarl $31, op1
20288 * orl cf, op1
20289 */
20290 if (ct != -1)
20291 {
20292 cf = ct;
20293 ct = -1;
20294 code = reverse_condition (code);
20295 }
20296
20297 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20298
20299 out = expand_simple_binop (mode, IOR,
20300 out, GEN_INT (cf),
20301 out, 1, OPTAB_DIRECT);
20302 if (out != operands[0])
20303 emit_move_insn (operands[0], out);
20304
20305 return true;
20306 }
20307 }
20308
20309
20310 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
20311 || diff == 3 || diff == 5 || diff == 9)
20312 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
20313 && (mode != DImode
20314 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
20315 {
20316 /*
20317 * xorl dest,dest
20318 * cmpl op1,op2
20319 * setcc dest
20320 * lea cf(dest*(ct-cf)),dest
20321 *
20322 * Size 14.
20323 *
20324 * This also catches the degenerate setcc-only case.
20325 */
20326
20327 rtx tmp;
20328 int nops;
20329
20330 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20331
20332 nops = 0;
20333 /* On x86_64 the lea instruction operates on Pmode, so we need
20334 to get arithmetics done in proper mode to match. */
20335 if (diff == 1)
20336 tmp = copy_rtx (out);
20337 else
20338 {
20339 rtx out1;
20340 out1 = copy_rtx (out);
20341 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
20342 nops++;
20343 if (diff & 1)
20344 {
20345 tmp = gen_rtx_PLUS (mode, tmp, out1);
20346 nops++;
20347 }
20348 }
20349 if (cf != 0)
20350 {
20351 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
20352 nops++;
20353 }
20354 if (!rtx_equal_p (tmp, out))
20355 {
20356 if (nops == 1)
20357 out = force_operand (tmp, copy_rtx (out));
20358 else
20359 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
20360 }
20361 if (!rtx_equal_p (out, operands[0]))
20362 emit_move_insn (operands[0], copy_rtx (out));
20363
20364 return true;
20365 }
20366
20367 /*
20368 * General case: Jumpful:
20369 * xorl dest,dest cmpl op1, op2
20370 * cmpl op1, op2 movl ct, dest
20371 * setcc dest jcc 1f
20372 * decl dest movl cf, dest
20373 * andl (cf-ct),dest 1:
20374 * addl ct,dest
20375 *
20376 * Size 20. Size 14.
20377 *
20378 * This is reasonably steep, but branch mispredict costs are
20379 * high on modern cpus, so consider failing only if optimizing
20380 * for space.
20381 */
20382
20383 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20384 && BRANCH_COST (optimize_insn_for_speed_p (),
20385 false) >= 2)
20386 {
20387 if (cf == 0)
20388 {
20389 enum machine_mode cmp_mode = GET_MODE (op0);
20390
20391 cf = ct;
20392 ct = 0;
20393
20394 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20395 {
20396 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20397
20398 /* We may be reversing unordered compare to normal compare,
20399 that is not valid in general (we may convert non-trapping
20400 condition to trapping one), however on i386 we currently
20401 emit all comparisons unordered. */
20402 code = reverse_condition_maybe_unordered (code);
20403 }
20404 else
20405 {
20406 code = reverse_condition (code);
20407 if (compare_code != UNKNOWN)
20408 compare_code = reverse_condition (compare_code);
20409 }
20410 }
20411
20412 if (compare_code != UNKNOWN)
20413 {
20414 /* notl op1 (if needed)
20415 sarl $31, op1
20416 andl (cf-ct), op1
20417 addl ct, op1
20418
20419 For x < 0 (resp. x <= -1) there will be no notl,
20420 so if possible swap the constants to get rid of the
20421 complement.
20422 True/false will be -1/0 while code below (store flag
20423 followed by decrement) is 0/-1, so the constants need
20424 to be exchanged once more. */
20425
20426 if (compare_code == GE || !cf)
20427 {
20428 code = reverse_condition (code);
20429 compare_code = LT;
20430 }
20431 else
20432 {
20433 HOST_WIDE_INT tmp = cf;
20434 cf = ct;
20435 ct = tmp;
20436 }
20437
20438 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20439 }
20440 else
20441 {
20442 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20443
20444 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
20445 constm1_rtx,
20446 copy_rtx (out), 1, OPTAB_DIRECT);
20447 }
20448
20449 out = expand_simple_binop (mode, AND, copy_rtx (out),
20450 gen_int_mode (cf - ct, mode),
20451 copy_rtx (out), 1, OPTAB_DIRECT);
20452 if (ct)
20453 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
20454 copy_rtx (out), 1, OPTAB_DIRECT);
20455 if (!rtx_equal_p (out, operands[0]))
20456 emit_move_insn (operands[0], copy_rtx (out));
20457
20458 return true;
20459 }
20460 }
20461
20462 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20463 {
20464 /* Try a few things more with specific constants and a variable. */
20465
20466 optab op;
20467 rtx var, orig_out, out, tmp;
20468
20469 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
20470 return false;
20471
20472 /* If one of the two operands is an interesting constant, load a
20473 constant with the above and mask it in with a logical operation. */
20474
20475 if (CONST_INT_P (operands[2]))
20476 {
20477 var = operands[3];
20478 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
20479 operands[3] = constm1_rtx, op = and_optab;
20480 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
20481 operands[3] = const0_rtx, op = ior_optab;
20482 else
20483 return false;
20484 }
20485 else if (CONST_INT_P (operands[3]))
20486 {
20487 var = operands[2];
20488 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
20489 operands[2] = constm1_rtx, op = and_optab;
20490 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
20491 operands[2] = const0_rtx, op = ior_optab;
20492 else
20493 return false;
20494 }
20495 else
20496 return false;
20497
20498 orig_out = operands[0];
20499 tmp = gen_reg_rtx (mode);
20500 operands[0] = tmp;
20501
20502 /* Recurse to get the constant loaded. */
20503 if (ix86_expand_int_movcc (operands) == 0)
20504 return false;
20505
20506 /* Mask in the interesting variable. */
20507 out = expand_binop (mode, op, var, tmp, orig_out, 0,
20508 OPTAB_WIDEN);
20509 if (!rtx_equal_p (out, orig_out))
20510 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
20511
20512 return true;
20513 }
20514
20515 /*
20516 * For comparison with above,
20517 *
20518 * movl cf,dest
20519 * movl ct,tmp
20520 * cmpl op1,op2
20521 * cmovcc tmp,dest
20522 *
20523 * Size 15.
20524 */
20525
20526 if (! nonimmediate_operand (operands[2], mode))
20527 operands[2] = force_reg (mode, operands[2]);
20528 if (! nonimmediate_operand (operands[3], mode))
20529 operands[3] = force_reg (mode, operands[3]);
20530
20531 if (! register_operand (operands[2], VOIDmode)
20532 && (mode == QImode
20533 || ! register_operand (operands[3], VOIDmode)))
20534 operands[2] = force_reg (mode, operands[2]);
20535
20536 if (mode == QImode
20537 && ! register_operand (operands[3], VOIDmode))
20538 operands[3] = force_reg (mode, operands[3]);
20539
20540 emit_insn (compare_seq);
20541 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
20542 gen_rtx_IF_THEN_ELSE (mode,
20543 compare_op, operands[2],
20544 operands[3])));
20545 return true;
20546 }
20547
20548 /* Swap, force into registers, or otherwise massage the two operands
20549 to an sse comparison with a mask result. Thus we differ a bit from
20550 ix86_prepare_fp_compare_args which expects to produce a flags result.
20551
20552 The DEST operand exists to help determine whether to commute commutative
20553 operators. The POP0/POP1 operands are updated in place. The new
20554 comparison code is returned, or UNKNOWN if not implementable. */
20555
20556 static enum rtx_code
20557 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
20558 rtx *pop0, rtx *pop1)
20559 {
20560 rtx tmp;
20561
20562 switch (code)
20563 {
20564 case LTGT:
20565 case UNEQ:
20566 /* AVX supports all the needed comparisons. */
20567 if (TARGET_AVX)
20568 break;
20569 /* We have no LTGT as an operator. We could implement it with
20570 NE & ORDERED, but this requires an extra temporary. It's
20571 not clear that it's worth it. */
20572 return UNKNOWN;
20573
20574 case LT:
20575 case LE:
20576 case UNGT:
20577 case UNGE:
20578 /* These are supported directly. */
20579 break;
20580
20581 case EQ:
20582 case NE:
20583 case UNORDERED:
20584 case ORDERED:
20585 /* AVX has 3 operand comparisons, no need to swap anything. */
20586 if (TARGET_AVX)
20587 break;
20588 /* For commutative operators, try to canonicalize the destination
20589 operand to be first in the comparison - this helps reload to
20590 avoid extra moves. */
20591 if (!dest || !rtx_equal_p (dest, *pop1))
20592 break;
20593 /* FALLTHRU */
20594
20595 case GE:
20596 case GT:
20597 case UNLE:
20598 case UNLT:
20599 /* These are not supported directly before AVX, and furthermore
20600 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
20601 comparison operands to transform into something that is
20602 supported. */
20603 tmp = *pop0;
20604 *pop0 = *pop1;
20605 *pop1 = tmp;
20606 code = swap_condition (code);
20607 break;
20608
20609 default:
20610 gcc_unreachable ();
20611 }
20612
20613 return code;
20614 }
20615
20616 /* Detect conditional moves that exactly match min/max operational
20617 semantics. Note that this is IEEE safe, as long as we don't
20618 interchange the operands.
20619
20620 Returns FALSE if this conditional move doesn't match a MIN/MAX,
20621 and TRUE if the operation is successful and instructions are emitted. */
20622
20623 static bool
20624 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
20625 rtx cmp_op1, rtx if_true, rtx if_false)
20626 {
20627 enum machine_mode mode;
20628 bool is_min;
20629 rtx tmp;
20630
20631 if (code == LT)
20632 ;
20633 else if (code == UNGE)
20634 {
20635 tmp = if_true;
20636 if_true = if_false;
20637 if_false = tmp;
20638 }
20639 else
20640 return false;
20641
20642 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
20643 is_min = true;
20644 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
20645 is_min = false;
20646 else
20647 return false;
20648
20649 mode = GET_MODE (dest);
20650
20651 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
20652 but MODE may be a vector mode and thus not appropriate. */
20653 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
20654 {
20655 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
20656 rtvec v;
20657
20658 if_true = force_reg (mode, if_true);
20659 v = gen_rtvec (2, if_true, if_false);
20660 tmp = gen_rtx_UNSPEC (mode, v, u);
20661 }
20662 else
20663 {
20664 code = is_min ? SMIN : SMAX;
20665 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
20666 }
20667
20668 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
20669 return true;
20670 }
20671
20672 /* Expand an sse vector comparison. Return the register with the result. */
20673
20674 static rtx
20675 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
20676 rtx op_true, rtx op_false)
20677 {
20678 enum machine_mode mode = GET_MODE (dest);
20679 enum machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
20680
20681 /* In general case result of comparison can differ from operands' type. */
20682 enum machine_mode cmp_mode;
20683
20684 /* In AVX512F the result of comparison is an integer mask. */
20685 bool maskcmp = false;
20686 rtx x;
20687
20688 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
20689 {
20690 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
20691 gcc_assert (cmp_mode != BLKmode);
20692
20693 maskcmp = true;
20694 }
20695 else
20696 cmp_mode = cmp_ops_mode;
20697
20698
20699 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
20700 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
20701 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
20702
20703 if (optimize
20704 || reg_overlap_mentioned_p (dest, op_true)
20705 || reg_overlap_mentioned_p (dest, op_false))
20706 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
20707
20708 /* Compare patterns for int modes are unspec in AVX512F only. */
20709 if (maskcmp && (code == GT || code == EQ))
20710 {
20711 rtx (*gen)(rtx, rtx, rtx);
20712
20713 switch (cmp_ops_mode)
20714 {
20715 case V16SImode:
20716 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
20717 break;
20718 case V8DImode:
20719 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
20720 break;
20721 default:
20722 gen = NULL;
20723 }
20724
20725 if (gen)
20726 {
20727 emit_insn (gen (dest, cmp_op0, cmp_op1));
20728 return dest;
20729 }
20730 }
20731 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
20732
20733 if (cmp_mode != mode && !maskcmp)
20734 {
20735 x = force_reg (cmp_ops_mode, x);
20736 convert_move (dest, x, false);
20737 }
20738 else
20739 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20740
20741 return dest;
20742 }
20743
20744 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
20745 operations. This is used for both scalar and vector conditional moves. */
20746
20747 static void
20748 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
20749 {
20750 enum machine_mode mode = GET_MODE (dest);
20751 enum machine_mode cmpmode = GET_MODE (cmp);
20752
20753 /* In AVX512F the result of comparison is an integer mask. */
20754 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
20755
20756 rtx t2, t3, x;
20757
20758 if (vector_all_ones_operand (op_true, mode)
20759 && rtx_equal_p (op_false, CONST0_RTX (mode))
20760 && !maskcmp)
20761 {
20762 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
20763 }
20764 else if (op_false == CONST0_RTX (mode)
20765 && !maskcmp)
20766 {
20767 op_true = force_reg (mode, op_true);
20768 x = gen_rtx_AND (mode, cmp, op_true);
20769 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20770 }
20771 else if (op_true == CONST0_RTX (mode)
20772 && !maskcmp)
20773 {
20774 op_false = force_reg (mode, op_false);
20775 x = gen_rtx_NOT (mode, cmp);
20776 x = gen_rtx_AND (mode, x, op_false);
20777 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20778 }
20779 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
20780 && !maskcmp)
20781 {
20782 op_false = force_reg (mode, op_false);
20783 x = gen_rtx_IOR (mode, cmp, op_false);
20784 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20785 }
20786 else if (TARGET_XOP
20787 && !maskcmp)
20788 {
20789 op_true = force_reg (mode, op_true);
20790
20791 if (!nonimmediate_operand (op_false, mode))
20792 op_false = force_reg (mode, op_false);
20793
20794 emit_insn (gen_rtx_SET (mode, dest,
20795 gen_rtx_IF_THEN_ELSE (mode, cmp,
20796 op_true,
20797 op_false)));
20798 }
20799 else
20800 {
20801 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
20802 rtx d = dest;
20803
20804 if (!nonimmediate_operand (op_true, mode))
20805 op_true = force_reg (mode, op_true);
20806
20807 op_false = force_reg (mode, op_false);
20808
20809 switch (mode)
20810 {
20811 case V4SFmode:
20812 if (TARGET_SSE4_1)
20813 gen = gen_sse4_1_blendvps;
20814 break;
20815 case V2DFmode:
20816 if (TARGET_SSE4_1)
20817 gen = gen_sse4_1_blendvpd;
20818 break;
20819 case V16QImode:
20820 case V8HImode:
20821 case V4SImode:
20822 case V2DImode:
20823 if (TARGET_SSE4_1)
20824 {
20825 gen = gen_sse4_1_pblendvb;
20826 if (mode != V16QImode)
20827 d = gen_reg_rtx (V16QImode);
20828 op_false = gen_lowpart (V16QImode, op_false);
20829 op_true = gen_lowpart (V16QImode, op_true);
20830 cmp = gen_lowpart (V16QImode, cmp);
20831 }
20832 break;
20833 case V8SFmode:
20834 if (TARGET_AVX)
20835 gen = gen_avx_blendvps256;
20836 break;
20837 case V4DFmode:
20838 if (TARGET_AVX)
20839 gen = gen_avx_blendvpd256;
20840 break;
20841 case V32QImode:
20842 case V16HImode:
20843 case V8SImode:
20844 case V4DImode:
20845 if (TARGET_AVX2)
20846 {
20847 gen = gen_avx2_pblendvb;
20848 if (mode != V32QImode)
20849 d = gen_reg_rtx (V32QImode);
20850 op_false = gen_lowpart (V32QImode, op_false);
20851 op_true = gen_lowpart (V32QImode, op_true);
20852 cmp = gen_lowpart (V32QImode, cmp);
20853 }
20854 break;
20855
20856 case V16SImode:
20857 gen = gen_avx512f_blendmv16si;
20858 break;
20859 case V8DImode:
20860 gen = gen_avx512f_blendmv8di;
20861 break;
20862 case V8DFmode:
20863 gen = gen_avx512f_blendmv8df;
20864 break;
20865 case V16SFmode:
20866 gen = gen_avx512f_blendmv16sf;
20867 break;
20868
20869 default:
20870 break;
20871 }
20872
20873 if (gen != NULL)
20874 {
20875 emit_insn (gen (d, op_false, op_true, cmp));
20876 if (d != dest)
20877 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
20878 }
20879 else
20880 {
20881 op_true = force_reg (mode, op_true);
20882
20883 t2 = gen_reg_rtx (mode);
20884 if (optimize)
20885 t3 = gen_reg_rtx (mode);
20886 else
20887 t3 = dest;
20888
20889 x = gen_rtx_AND (mode, op_true, cmp);
20890 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
20891
20892 x = gen_rtx_NOT (mode, cmp);
20893 x = gen_rtx_AND (mode, x, op_false);
20894 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
20895
20896 x = gen_rtx_IOR (mode, t3, t2);
20897 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20898 }
20899 }
20900 }
20901
20902 /* Expand a floating-point conditional move. Return true if successful. */
20903
20904 bool
20905 ix86_expand_fp_movcc (rtx operands[])
20906 {
20907 enum machine_mode mode = GET_MODE (operands[0]);
20908 enum rtx_code code = GET_CODE (operands[1]);
20909 rtx tmp, compare_op;
20910 rtx op0 = XEXP (operands[1], 0);
20911 rtx op1 = XEXP (operands[1], 1);
20912
20913 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
20914 {
20915 enum machine_mode cmode;
20916
20917 /* Since we've no cmove for sse registers, don't force bad register
20918 allocation just to gain access to it. Deny movcc when the
20919 comparison mode doesn't match the move mode. */
20920 cmode = GET_MODE (op0);
20921 if (cmode == VOIDmode)
20922 cmode = GET_MODE (op1);
20923 if (cmode != mode)
20924 return false;
20925
20926 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
20927 if (code == UNKNOWN)
20928 return false;
20929
20930 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
20931 operands[2], operands[3]))
20932 return true;
20933
20934 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
20935 operands[2], operands[3]);
20936 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
20937 return true;
20938 }
20939
20940 if (GET_MODE (op0) == TImode
20941 || (GET_MODE (op0) == DImode
20942 && !TARGET_64BIT))
20943 return false;
20944
20945 /* The floating point conditional move instructions don't directly
20946 support conditions resulting from a signed integer comparison. */
20947
20948 compare_op = ix86_expand_compare (code, op0, op1);
20949 if (!fcmov_comparison_operator (compare_op, VOIDmode))
20950 {
20951 tmp = gen_reg_rtx (QImode);
20952 ix86_expand_setcc (tmp, code, op0, op1);
20953
20954 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
20955 }
20956
20957 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
20958 gen_rtx_IF_THEN_ELSE (mode, compare_op,
20959 operands[2], operands[3])));
20960
20961 return true;
20962 }
20963
20964 /* Expand a floating-point vector conditional move; a vcond operation
20965 rather than a movcc operation. */
20966
20967 bool
20968 ix86_expand_fp_vcond (rtx operands[])
20969 {
20970 enum rtx_code code = GET_CODE (operands[3]);
20971 rtx cmp;
20972
20973 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
20974 &operands[4], &operands[5]);
20975 if (code == UNKNOWN)
20976 {
20977 rtx temp;
20978 switch (GET_CODE (operands[3]))
20979 {
20980 case LTGT:
20981 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
20982 operands[5], operands[0], operands[0]);
20983 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
20984 operands[5], operands[1], operands[2]);
20985 code = AND;
20986 break;
20987 case UNEQ:
20988 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
20989 operands[5], operands[0], operands[0]);
20990 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
20991 operands[5], operands[1], operands[2]);
20992 code = IOR;
20993 break;
20994 default:
20995 gcc_unreachable ();
20996 }
20997 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
20998 OPTAB_DIRECT);
20999 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21000 return true;
21001 }
21002
21003 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21004 operands[5], operands[1], operands[2]))
21005 return true;
21006
21007 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21008 operands[1], operands[2]);
21009 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21010 return true;
21011 }
21012
21013 /* Expand a signed/unsigned integral vector conditional move. */
21014
21015 bool
21016 ix86_expand_int_vcond (rtx operands[])
21017 {
21018 enum machine_mode data_mode = GET_MODE (operands[0]);
21019 enum machine_mode mode = GET_MODE (operands[4]);
21020 enum rtx_code code = GET_CODE (operands[3]);
21021 bool negate = false;
21022 rtx x, cop0, cop1;
21023
21024 cop0 = operands[4];
21025 cop1 = operands[5];
21026
21027 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21028 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21029 if ((code == LT || code == GE)
21030 && data_mode == mode
21031 && cop1 == CONST0_RTX (mode)
21032 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21033 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21034 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21035 && (GET_MODE_SIZE (data_mode) == 16
21036 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21037 {
21038 rtx negop = operands[2 - (code == LT)];
21039 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21040 if (negop == CONST1_RTX (data_mode))
21041 {
21042 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21043 operands[0], 1, OPTAB_DIRECT);
21044 if (res != operands[0])
21045 emit_move_insn (operands[0], res);
21046 return true;
21047 }
21048 else if (GET_MODE_INNER (data_mode) != DImode
21049 && vector_all_ones_operand (negop, data_mode))
21050 {
21051 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21052 operands[0], 0, OPTAB_DIRECT);
21053 if (res != operands[0])
21054 emit_move_insn (operands[0], res);
21055 return true;
21056 }
21057 }
21058
21059 if (!nonimmediate_operand (cop1, mode))
21060 cop1 = force_reg (mode, cop1);
21061 if (!general_operand (operands[1], data_mode))
21062 operands[1] = force_reg (data_mode, operands[1]);
21063 if (!general_operand (operands[2], data_mode))
21064 operands[2] = force_reg (data_mode, operands[2]);
21065
21066 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21067 if (TARGET_XOP
21068 && (mode == V16QImode || mode == V8HImode
21069 || mode == V4SImode || mode == V2DImode))
21070 ;
21071 else
21072 {
21073 /* Canonicalize the comparison to EQ, GT, GTU. */
21074 switch (code)
21075 {
21076 case EQ:
21077 case GT:
21078 case GTU:
21079 break;
21080
21081 case NE:
21082 case LE:
21083 case LEU:
21084 code = reverse_condition (code);
21085 negate = true;
21086 break;
21087
21088 case GE:
21089 case GEU:
21090 code = reverse_condition (code);
21091 negate = true;
21092 /* FALLTHRU */
21093
21094 case LT:
21095 case LTU:
21096 code = swap_condition (code);
21097 x = cop0, cop0 = cop1, cop1 = x;
21098 break;
21099
21100 default:
21101 gcc_unreachable ();
21102 }
21103
21104 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21105 if (mode == V2DImode)
21106 {
21107 switch (code)
21108 {
21109 case EQ:
21110 /* SSE4.1 supports EQ. */
21111 if (!TARGET_SSE4_1)
21112 return false;
21113 break;
21114
21115 case GT:
21116 case GTU:
21117 /* SSE4.2 supports GT/GTU. */
21118 if (!TARGET_SSE4_2)
21119 return false;
21120 break;
21121
21122 default:
21123 gcc_unreachable ();
21124 }
21125 }
21126
21127 /* Unsigned parallel compare is not supported by the hardware.
21128 Play some tricks to turn this into a signed comparison
21129 against 0. */
21130 if (code == GTU)
21131 {
21132 cop0 = force_reg (mode, cop0);
21133
21134 switch (mode)
21135 {
21136 case V16SImode:
21137 case V8DImode:
21138 case V8SImode:
21139 case V4DImode:
21140 case V4SImode:
21141 case V2DImode:
21142 {
21143 rtx t1, t2, mask;
21144 rtx (*gen_sub3) (rtx, rtx, rtx);
21145
21146 switch (mode)
21147 {
21148 case V16SImode: gen_sub3 = gen_subv16si3; break;
21149 case V8DImode: gen_sub3 = gen_subv8di3; break;
21150 case V8SImode: gen_sub3 = gen_subv8si3; break;
21151 case V4DImode: gen_sub3 = gen_subv4di3; break;
21152 case V4SImode: gen_sub3 = gen_subv4si3; break;
21153 case V2DImode: gen_sub3 = gen_subv2di3; break;
21154 default:
21155 gcc_unreachable ();
21156 }
21157 /* Subtract (-(INT MAX) - 1) from both operands to make
21158 them signed. */
21159 mask = ix86_build_signbit_mask (mode, true, false);
21160 t1 = gen_reg_rtx (mode);
21161 emit_insn (gen_sub3 (t1, cop0, mask));
21162
21163 t2 = gen_reg_rtx (mode);
21164 emit_insn (gen_sub3 (t2, cop1, mask));
21165
21166 cop0 = t1;
21167 cop1 = t2;
21168 code = GT;
21169 }
21170 break;
21171
21172 case V32QImode:
21173 case V16HImode:
21174 case V16QImode:
21175 case V8HImode:
21176 /* Perform a parallel unsigned saturating subtraction. */
21177 x = gen_reg_rtx (mode);
21178 emit_insn (gen_rtx_SET (VOIDmode, x,
21179 gen_rtx_US_MINUS (mode, cop0, cop1)));
21180
21181 cop0 = x;
21182 cop1 = CONST0_RTX (mode);
21183 code = EQ;
21184 negate = !negate;
21185 break;
21186
21187 default:
21188 gcc_unreachable ();
21189 }
21190 }
21191 }
21192
21193 /* Allow the comparison to be done in one mode, but the movcc to
21194 happen in another mode. */
21195 if (data_mode == mode)
21196 {
21197 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21198 operands[1+negate], operands[2-negate]);
21199 }
21200 else
21201 {
21202 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21203 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21204 operands[1+negate], operands[2-negate]);
21205 if (GET_MODE (x) == mode)
21206 x = gen_lowpart (data_mode, x);
21207 }
21208
21209 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21210 operands[2-negate]);
21211 return true;
21212 }
21213
21214 static bool
21215 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1)
21216 {
21217 enum machine_mode mode = GET_MODE (op0);
21218 switch (mode)
21219 {
21220 case V16SImode:
21221 emit_insn (gen_avx512f_vpermi2varv16si3 (target, op0,
21222 force_reg (V16SImode, mask),
21223 op1));
21224 return true;
21225 case V16SFmode:
21226 emit_insn (gen_avx512f_vpermi2varv16sf3 (target, op0,
21227 force_reg (V16SImode, mask),
21228 op1));
21229 return true;
21230 case V8DImode:
21231 emit_insn (gen_avx512f_vpermi2varv8di3 (target, op0,
21232 force_reg (V8DImode, mask), op1));
21233 return true;
21234 case V8DFmode:
21235 emit_insn (gen_avx512f_vpermi2varv8df3 (target, op0,
21236 force_reg (V8DImode, mask), op1));
21237 return true;
21238 default:
21239 return false;
21240 }
21241 }
21242
21243 /* Expand a variable vector permutation. */
21244
21245 void
21246 ix86_expand_vec_perm (rtx operands[])
21247 {
21248 rtx target = operands[0];
21249 rtx op0 = operands[1];
21250 rtx op1 = operands[2];
21251 rtx mask = operands[3];
21252 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
21253 enum machine_mode mode = GET_MODE (op0);
21254 enum machine_mode maskmode = GET_MODE (mask);
21255 int w, e, i;
21256 bool one_operand_shuffle = rtx_equal_p (op0, op1);
21257
21258 /* Number of elements in the vector. */
21259 w = GET_MODE_NUNITS (mode);
21260 e = GET_MODE_UNIT_SIZE (mode);
21261 gcc_assert (w <= 64);
21262
21263 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1))
21264 return;
21265
21266 if (TARGET_AVX2)
21267 {
21268 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
21269 {
21270 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
21271 an constant shuffle operand. With a tiny bit of effort we can
21272 use VPERMD instead. A re-interpretation stall for V4DFmode is
21273 unfortunate but there's no avoiding it.
21274 Similarly for V16HImode we don't have instructions for variable
21275 shuffling, while for V32QImode we can use after preparing suitable
21276 masks vpshufb; vpshufb; vpermq; vpor. */
21277
21278 if (mode == V16HImode)
21279 {
21280 maskmode = mode = V32QImode;
21281 w = 32;
21282 e = 1;
21283 }
21284 else
21285 {
21286 maskmode = mode = V8SImode;
21287 w = 8;
21288 e = 4;
21289 }
21290 t1 = gen_reg_rtx (maskmode);
21291
21292 /* Replicate the low bits of the V4DImode mask into V8SImode:
21293 mask = { A B C D }
21294 t1 = { A A B B C C D D }. */
21295 for (i = 0; i < w / 2; ++i)
21296 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
21297 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21298 vt = force_reg (maskmode, vt);
21299 mask = gen_lowpart (maskmode, mask);
21300 if (maskmode == V8SImode)
21301 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
21302 else
21303 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
21304
21305 /* Multiply the shuffle indicies by two. */
21306 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
21307 OPTAB_DIRECT);
21308
21309 /* Add one to the odd shuffle indicies:
21310 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
21311 for (i = 0; i < w / 2; ++i)
21312 {
21313 vec[i * 2] = const0_rtx;
21314 vec[i * 2 + 1] = const1_rtx;
21315 }
21316 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21317 vt = validize_mem (force_const_mem (maskmode, vt));
21318 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
21319 OPTAB_DIRECT);
21320
21321 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
21322 operands[3] = mask = t1;
21323 target = gen_reg_rtx (mode);
21324 op0 = gen_lowpart (mode, op0);
21325 op1 = gen_lowpart (mode, op1);
21326 }
21327
21328 switch (mode)
21329 {
21330 case V8SImode:
21331 /* The VPERMD and VPERMPS instructions already properly ignore
21332 the high bits of the shuffle elements. No need for us to
21333 perform an AND ourselves. */
21334 if (one_operand_shuffle)
21335 {
21336 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
21337 if (target != operands[0])
21338 emit_move_insn (operands[0],
21339 gen_lowpart (GET_MODE (operands[0]), target));
21340 }
21341 else
21342 {
21343 t1 = gen_reg_rtx (V8SImode);
21344 t2 = gen_reg_rtx (V8SImode);
21345 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
21346 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
21347 goto merge_two;
21348 }
21349 return;
21350
21351 case V8SFmode:
21352 mask = gen_lowpart (V8SFmode, mask);
21353 if (one_operand_shuffle)
21354 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
21355 else
21356 {
21357 t1 = gen_reg_rtx (V8SFmode);
21358 t2 = gen_reg_rtx (V8SFmode);
21359 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
21360 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
21361 goto merge_two;
21362 }
21363 return;
21364
21365 case V4SImode:
21366 /* By combining the two 128-bit input vectors into one 256-bit
21367 input vector, we can use VPERMD and VPERMPS for the full
21368 two-operand shuffle. */
21369 t1 = gen_reg_rtx (V8SImode);
21370 t2 = gen_reg_rtx (V8SImode);
21371 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
21372 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21373 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
21374 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
21375 return;
21376
21377 case V4SFmode:
21378 t1 = gen_reg_rtx (V8SFmode);
21379 t2 = gen_reg_rtx (V8SImode);
21380 mask = gen_lowpart (V4SImode, mask);
21381 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
21382 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21383 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
21384 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
21385 return;
21386
21387 case V32QImode:
21388 t1 = gen_reg_rtx (V32QImode);
21389 t2 = gen_reg_rtx (V32QImode);
21390 t3 = gen_reg_rtx (V32QImode);
21391 vt2 = GEN_INT (128);
21392 for (i = 0; i < 32; i++)
21393 vec[i] = vt2;
21394 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21395 vt = force_reg (V32QImode, vt);
21396 for (i = 0; i < 32; i++)
21397 vec[i] = i < 16 ? vt2 : const0_rtx;
21398 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21399 vt2 = force_reg (V32QImode, vt2);
21400 /* From mask create two adjusted masks, which contain the same
21401 bits as mask in the low 7 bits of each vector element.
21402 The first mask will have the most significant bit clear
21403 if it requests element from the same 128-bit lane
21404 and MSB set if it requests element from the other 128-bit lane.
21405 The second mask will have the opposite values of the MSB,
21406 and additionally will have its 128-bit lanes swapped.
21407 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
21408 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
21409 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
21410 stands for other 12 bytes. */
21411 /* The bit whether element is from the same lane or the other
21412 lane is bit 4, so shift it up by 3 to the MSB position. */
21413 t5 = gen_reg_rtx (V4DImode);
21414 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
21415 GEN_INT (3)));
21416 /* Clear MSB bits from the mask just in case it had them set. */
21417 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
21418 /* After this t1 will have MSB set for elements from other lane. */
21419 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
21420 /* Clear bits other than MSB. */
21421 emit_insn (gen_andv32qi3 (t1, t1, vt));
21422 /* Or in the lower bits from mask into t3. */
21423 emit_insn (gen_iorv32qi3 (t3, t1, t2));
21424 /* And invert MSB bits in t1, so MSB is set for elements from the same
21425 lane. */
21426 emit_insn (gen_xorv32qi3 (t1, t1, vt));
21427 /* Swap 128-bit lanes in t3. */
21428 t6 = gen_reg_rtx (V4DImode);
21429 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
21430 const2_rtx, GEN_INT (3),
21431 const0_rtx, const1_rtx));
21432 /* And or in the lower bits from mask into t1. */
21433 emit_insn (gen_iorv32qi3 (t1, t1, t2));
21434 if (one_operand_shuffle)
21435 {
21436 /* Each of these shuffles will put 0s in places where
21437 element from the other 128-bit lane is needed, otherwise
21438 will shuffle in the requested value. */
21439 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
21440 gen_lowpart (V32QImode, t6)));
21441 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
21442 /* For t3 the 128-bit lanes are swapped again. */
21443 t7 = gen_reg_rtx (V4DImode);
21444 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
21445 const2_rtx, GEN_INT (3),
21446 const0_rtx, const1_rtx));
21447 /* And oring both together leads to the result. */
21448 emit_insn (gen_iorv32qi3 (target, t1,
21449 gen_lowpart (V32QImode, t7)));
21450 if (target != operands[0])
21451 emit_move_insn (operands[0],
21452 gen_lowpart (GET_MODE (operands[0]), target));
21453 return;
21454 }
21455
21456 t4 = gen_reg_rtx (V32QImode);
21457 /* Similarly to the above one_operand_shuffle code,
21458 just for repeated twice for each operand. merge_two:
21459 code will merge the two results together. */
21460 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
21461 gen_lowpart (V32QImode, t6)));
21462 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
21463 gen_lowpart (V32QImode, t6)));
21464 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
21465 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
21466 t7 = gen_reg_rtx (V4DImode);
21467 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
21468 const2_rtx, GEN_INT (3),
21469 const0_rtx, const1_rtx));
21470 t8 = gen_reg_rtx (V4DImode);
21471 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
21472 const2_rtx, GEN_INT (3),
21473 const0_rtx, const1_rtx));
21474 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
21475 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
21476 t1 = t4;
21477 t2 = t3;
21478 goto merge_two;
21479
21480 default:
21481 gcc_assert (GET_MODE_SIZE (mode) <= 16);
21482 break;
21483 }
21484 }
21485
21486 if (TARGET_XOP)
21487 {
21488 /* The XOP VPPERM insn supports three inputs. By ignoring the
21489 one_operand_shuffle special case, we avoid creating another
21490 set of constant vectors in memory. */
21491 one_operand_shuffle = false;
21492
21493 /* mask = mask & {2*w-1, ...} */
21494 vt = GEN_INT (2*w - 1);
21495 }
21496 else
21497 {
21498 /* mask = mask & {w-1, ...} */
21499 vt = GEN_INT (w - 1);
21500 }
21501
21502 for (i = 0; i < w; i++)
21503 vec[i] = vt;
21504 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21505 mask = expand_simple_binop (maskmode, AND, mask, vt,
21506 NULL_RTX, 0, OPTAB_DIRECT);
21507
21508 /* For non-QImode operations, convert the word permutation control
21509 into a byte permutation control. */
21510 if (mode != V16QImode)
21511 {
21512 mask = expand_simple_binop (maskmode, ASHIFT, mask,
21513 GEN_INT (exact_log2 (e)),
21514 NULL_RTX, 0, OPTAB_DIRECT);
21515
21516 /* Convert mask to vector of chars. */
21517 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
21518
21519 /* Replicate each of the input bytes into byte positions:
21520 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
21521 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
21522 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
21523 for (i = 0; i < 16; ++i)
21524 vec[i] = GEN_INT (i/e * e);
21525 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21526 vt = validize_mem (force_const_mem (V16QImode, vt));
21527 if (TARGET_XOP)
21528 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
21529 else
21530 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
21531
21532 /* Convert it into the byte positions by doing
21533 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
21534 for (i = 0; i < 16; ++i)
21535 vec[i] = GEN_INT (i % e);
21536 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21537 vt = validize_mem (force_const_mem (V16QImode, vt));
21538 emit_insn (gen_addv16qi3 (mask, mask, vt));
21539 }
21540
21541 /* The actual shuffle operations all operate on V16QImode. */
21542 op0 = gen_lowpart (V16QImode, op0);
21543 op1 = gen_lowpart (V16QImode, op1);
21544
21545 if (TARGET_XOP)
21546 {
21547 if (GET_MODE (target) != V16QImode)
21548 target = gen_reg_rtx (V16QImode);
21549 emit_insn (gen_xop_pperm (target, op0, op1, mask));
21550 if (target != operands[0])
21551 emit_move_insn (operands[0],
21552 gen_lowpart (GET_MODE (operands[0]), target));
21553 }
21554 else if (one_operand_shuffle)
21555 {
21556 if (GET_MODE (target) != V16QImode)
21557 target = gen_reg_rtx (V16QImode);
21558 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
21559 if (target != operands[0])
21560 emit_move_insn (operands[0],
21561 gen_lowpart (GET_MODE (operands[0]), target));
21562 }
21563 else
21564 {
21565 rtx xops[6];
21566 bool ok;
21567
21568 /* Shuffle the two input vectors independently. */
21569 t1 = gen_reg_rtx (V16QImode);
21570 t2 = gen_reg_rtx (V16QImode);
21571 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
21572 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
21573
21574 merge_two:
21575 /* Then merge them together. The key is whether any given control
21576 element contained a bit set that indicates the second word. */
21577 mask = operands[3];
21578 vt = GEN_INT (w);
21579 if (maskmode == V2DImode && !TARGET_SSE4_1)
21580 {
21581 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
21582 more shuffle to convert the V2DI input mask into a V4SI
21583 input mask. At which point the masking that expand_int_vcond
21584 will work as desired. */
21585 rtx t3 = gen_reg_rtx (V4SImode);
21586 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
21587 const0_rtx, const0_rtx,
21588 const2_rtx, const2_rtx));
21589 mask = t3;
21590 maskmode = V4SImode;
21591 e = w = 4;
21592 }
21593
21594 for (i = 0; i < w; i++)
21595 vec[i] = vt;
21596 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21597 vt = force_reg (maskmode, vt);
21598 mask = expand_simple_binop (maskmode, AND, mask, vt,
21599 NULL_RTX, 0, OPTAB_DIRECT);
21600
21601 if (GET_MODE (target) != mode)
21602 target = gen_reg_rtx (mode);
21603 xops[0] = target;
21604 xops[1] = gen_lowpart (mode, t2);
21605 xops[2] = gen_lowpart (mode, t1);
21606 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
21607 xops[4] = mask;
21608 xops[5] = vt;
21609 ok = ix86_expand_int_vcond (xops);
21610 gcc_assert (ok);
21611 if (target != operands[0])
21612 emit_move_insn (operands[0],
21613 gen_lowpart (GET_MODE (operands[0]), target));
21614 }
21615 }
21616
21617 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
21618 true if we should do zero extension, else sign extension. HIGH_P is
21619 true if we want the N/2 high elements, else the low elements. */
21620
21621 void
21622 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
21623 {
21624 enum machine_mode imode = GET_MODE (src);
21625 rtx tmp;
21626
21627 if (TARGET_SSE4_1)
21628 {
21629 rtx (*unpack)(rtx, rtx);
21630 rtx (*extract)(rtx, rtx) = NULL;
21631 enum machine_mode halfmode = BLKmode;
21632
21633 switch (imode)
21634 {
21635 case V32QImode:
21636 if (unsigned_p)
21637 unpack = gen_avx2_zero_extendv16qiv16hi2;
21638 else
21639 unpack = gen_avx2_sign_extendv16qiv16hi2;
21640 halfmode = V16QImode;
21641 extract
21642 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
21643 break;
21644 case V32HImode:
21645 if (unsigned_p)
21646 unpack = gen_avx512f_zero_extendv16hiv16si2;
21647 else
21648 unpack = gen_avx512f_sign_extendv16hiv16si2;
21649 halfmode = V16HImode;
21650 extract
21651 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
21652 break;
21653 case V16HImode:
21654 if (unsigned_p)
21655 unpack = gen_avx2_zero_extendv8hiv8si2;
21656 else
21657 unpack = gen_avx2_sign_extendv8hiv8si2;
21658 halfmode = V8HImode;
21659 extract
21660 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
21661 break;
21662 case V16SImode:
21663 if (unsigned_p)
21664 unpack = gen_avx512f_zero_extendv8siv8di2;
21665 else
21666 unpack = gen_avx512f_sign_extendv8siv8di2;
21667 halfmode = V8SImode;
21668 extract
21669 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
21670 break;
21671 case V8SImode:
21672 if (unsigned_p)
21673 unpack = gen_avx2_zero_extendv4siv4di2;
21674 else
21675 unpack = gen_avx2_sign_extendv4siv4di2;
21676 halfmode = V4SImode;
21677 extract
21678 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
21679 break;
21680 case V16QImode:
21681 if (unsigned_p)
21682 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
21683 else
21684 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
21685 break;
21686 case V8HImode:
21687 if (unsigned_p)
21688 unpack = gen_sse4_1_zero_extendv4hiv4si2;
21689 else
21690 unpack = gen_sse4_1_sign_extendv4hiv4si2;
21691 break;
21692 case V4SImode:
21693 if (unsigned_p)
21694 unpack = gen_sse4_1_zero_extendv2siv2di2;
21695 else
21696 unpack = gen_sse4_1_sign_extendv2siv2di2;
21697 break;
21698 default:
21699 gcc_unreachable ();
21700 }
21701
21702 if (GET_MODE_SIZE (imode) >= 32)
21703 {
21704 tmp = gen_reg_rtx (halfmode);
21705 emit_insn (extract (tmp, src));
21706 }
21707 else if (high_p)
21708 {
21709 /* Shift higher 8 bytes to lower 8 bytes. */
21710 tmp = gen_reg_rtx (V1TImode);
21711 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
21712 GEN_INT (64)));
21713 tmp = gen_lowpart (imode, tmp);
21714 }
21715 else
21716 tmp = src;
21717
21718 emit_insn (unpack (dest, tmp));
21719 }
21720 else
21721 {
21722 rtx (*unpack)(rtx, rtx, rtx);
21723
21724 switch (imode)
21725 {
21726 case V16QImode:
21727 if (high_p)
21728 unpack = gen_vec_interleave_highv16qi;
21729 else
21730 unpack = gen_vec_interleave_lowv16qi;
21731 break;
21732 case V8HImode:
21733 if (high_p)
21734 unpack = gen_vec_interleave_highv8hi;
21735 else
21736 unpack = gen_vec_interleave_lowv8hi;
21737 break;
21738 case V4SImode:
21739 if (high_p)
21740 unpack = gen_vec_interleave_highv4si;
21741 else
21742 unpack = gen_vec_interleave_lowv4si;
21743 break;
21744 default:
21745 gcc_unreachable ();
21746 }
21747
21748 if (unsigned_p)
21749 tmp = force_reg (imode, CONST0_RTX (imode));
21750 else
21751 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
21752 src, pc_rtx, pc_rtx);
21753
21754 rtx tmp2 = gen_reg_rtx (imode);
21755 emit_insn (unpack (tmp2, src, tmp));
21756 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
21757 }
21758 }
21759
21760 /* Expand conditional increment or decrement using adb/sbb instructions.
21761 The default case using setcc followed by the conditional move can be
21762 done by generic code. */
21763 bool
21764 ix86_expand_int_addcc (rtx operands[])
21765 {
21766 enum rtx_code code = GET_CODE (operands[1]);
21767 rtx flags;
21768 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
21769 rtx compare_op;
21770 rtx val = const0_rtx;
21771 bool fpcmp = false;
21772 enum machine_mode mode;
21773 rtx op0 = XEXP (operands[1], 0);
21774 rtx op1 = XEXP (operands[1], 1);
21775
21776 if (operands[3] != const1_rtx
21777 && operands[3] != constm1_rtx)
21778 return false;
21779 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
21780 return false;
21781 code = GET_CODE (compare_op);
21782
21783 flags = XEXP (compare_op, 0);
21784
21785 if (GET_MODE (flags) == CCFPmode
21786 || GET_MODE (flags) == CCFPUmode)
21787 {
21788 fpcmp = true;
21789 code = ix86_fp_compare_code_to_integer (code);
21790 }
21791
21792 if (code != LTU)
21793 {
21794 val = constm1_rtx;
21795 if (fpcmp)
21796 PUT_CODE (compare_op,
21797 reverse_condition_maybe_unordered
21798 (GET_CODE (compare_op)));
21799 else
21800 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
21801 }
21802
21803 mode = GET_MODE (operands[0]);
21804
21805 /* Construct either adc or sbb insn. */
21806 if ((code == LTU) == (operands[3] == constm1_rtx))
21807 {
21808 switch (mode)
21809 {
21810 case QImode:
21811 insn = gen_subqi3_carry;
21812 break;
21813 case HImode:
21814 insn = gen_subhi3_carry;
21815 break;
21816 case SImode:
21817 insn = gen_subsi3_carry;
21818 break;
21819 case DImode:
21820 insn = gen_subdi3_carry;
21821 break;
21822 default:
21823 gcc_unreachable ();
21824 }
21825 }
21826 else
21827 {
21828 switch (mode)
21829 {
21830 case QImode:
21831 insn = gen_addqi3_carry;
21832 break;
21833 case HImode:
21834 insn = gen_addhi3_carry;
21835 break;
21836 case SImode:
21837 insn = gen_addsi3_carry;
21838 break;
21839 case DImode:
21840 insn = gen_adddi3_carry;
21841 break;
21842 default:
21843 gcc_unreachable ();
21844 }
21845 }
21846 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
21847
21848 return true;
21849 }
21850
21851
21852 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
21853 but works for floating pointer parameters and nonoffsetable memories.
21854 For pushes, it returns just stack offsets; the values will be saved
21855 in the right order. Maximally three parts are generated. */
21856
21857 static int
21858 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
21859 {
21860 int size;
21861
21862 if (!TARGET_64BIT)
21863 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
21864 else
21865 size = (GET_MODE_SIZE (mode) + 4) / 8;
21866
21867 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
21868 gcc_assert (size >= 2 && size <= 4);
21869
21870 /* Optimize constant pool reference to immediates. This is used by fp
21871 moves, that force all constants to memory to allow combining. */
21872 if (MEM_P (operand) && MEM_READONLY_P (operand))
21873 {
21874 rtx tmp = maybe_get_pool_constant (operand);
21875 if (tmp)
21876 operand = tmp;
21877 }
21878
21879 if (MEM_P (operand) && !offsettable_memref_p (operand))
21880 {
21881 /* The only non-offsetable memories we handle are pushes. */
21882 int ok = push_operand (operand, VOIDmode);
21883
21884 gcc_assert (ok);
21885
21886 operand = copy_rtx (operand);
21887 PUT_MODE (operand, word_mode);
21888 parts[0] = parts[1] = parts[2] = parts[3] = operand;
21889 return size;
21890 }
21891
21892 if (GET_CODE (operand) == CONST_VECTOR)
21893 {
21894 enum machine_mode imode = int_mode_for_mode (mode);
21895 /* Caution: if we looked through a constant pool memory above,
21896 the operand may actually have a different mode now. That's
21897 ok, since we want to pun this all the way back to an integer. */
21898 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
21899 gcc_assert (operand != NULL);
21900 mode = imode;
21901 }
21902
21903 if (!TARGET_64BIT)
21904 {
21905 if (mode == DImode)
21906 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
21907 else
21908 {
21909 int i;
21910
21911 if (REG_P (operand))
21912 {
21913 gcc_assert (reload_completed);
21914 for (i = 0; i < size; i++)
21915 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
21916 }
21917 else if (offsettable_memref_p (operand))
21918 {
21919 operand = adjust_address (operand, SImode, 0);
21920 parts[0] = operand;
21921 for (i = 1; i < size; i++)
21922 parts[i] = adjust_address (operand, SImode, 4 * i);
21923 }
21924 else if (GET_CODE (operand) == CONST_DOUBLE)
21925 {
21926 REAL_VALUE_TYPE r;
21927 long l[4];
21928
21929 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
21930 switch (mode)
21931 {
21932 case TFmode:
21933 real_to_target (l, &r, mode);
21934 parts[3] = gen_int_mode (l[3], SImode);
21935 parts[2] = gen_int_mode (l[2], SImode);
21936 break;
21937 case XFmode:
21938 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
21939 long double may not be 80-bit. */
21940 real_to_target (l, &r, mode);
21941 parts[2] = gen_int_mode (l[2], SImode);
21942 break;
21943 case DFmode:
21944 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
21945 break;
21946 default:
21947 gcc_unreachable ();
21948 }
21949 parts[1] = gen_int_mode (l[1], SImode);
21950 parts[0] = gen_int_mode (l[0], SImode);
21951 }
21952 else
21953 gcc_unreachable ();
21954 }
21955 }
21956 else
21957 {
21958 if (mode == TImode)
21959 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
21960 if (mode == XFmode || mode == TFmode)
21961 {
21962 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
21963 if (REG_P (operand))
21964 {
21965 gcc_assert (reload_completed);
21966 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
21967 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
21968 }
21969 else if (offsettable_memref_p (operand))
21970 {
21971 operand = adjust_address (operand, DImode, 0);
21972 parts[0] = operand;
21973 parts[1] = adjust_address (operand, upper_mode, 8);
21974 }
21975 else if (GET_CODE (operand) == CONST_DOUBLE)
21976 {
21977 REAL_VALUE_TYPE r;
21978 long l[4];
21979
21980 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
21981 real_to_target (l, &r, mode);
21982
21983 /* Do not use shift by 32 to avoid warning on 32bit systems. */
21984 if (HOST_BITS_PER_WIDE_INT >= 64)
21985 parts[0]
21986 = gen_int_mode
21987 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
21988 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
21989 DImode);
21990 else
21991 parts[0] = immed_double_const (l[0], l[1], DImode);
21992
21993 if (upper_mode == SImode)
21994 parts[1] = gen_int_mode (l[2], SImode);
21995 else if (HOST_BITS_PER_WIDE_INT >= 64)
21996 parts[1]
21997 = gen_int_mode
21998 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
21999 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22000 DImode);
22001 else
22002 parts[1] = immed_double_const (l[2], l[3], DImode);
22003 }
22004 else
22005 gcc_unreachable ();
22006 }
22007 }
22008
22009 return size;
22010 }
22011
22012 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22013 Return false when normal moves are needed; true when all required
22014 insns have been emitted. Operands 2-4 contain the input values
22015 int the correct order; operands 5-7 contain the output values. */
22016
22017 void
22018 ix86_split_long_move (rtx operands[])
22019 {
22020 rtx part[2][4];
22021 int nparts, i, j;
22022 int push = 0;
22023 int collisions = 0;
22024 enum machine_mode mode = GET_MODE (operands[0]);
22025 bool collisionparts[4];
22026
22027 /* The DFmode expanders may ask us to move double.
22028 For 64bit target this is single move. By hiding the fact
22029 here we simplify i386.md splitters. */
22030 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22031 {
22032 /* Optimize constant pool reference to immediates. This is used by
22033 fp moves, that force all constants to memory to allow combining. */
22034
22035 if (MEM_P (operands[1])
22036 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22037 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22038 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22039 if (push_operand (operands[0], VOIDmode))
22040 {
22041 operands[0] = copy_rtx (operands[0]);
22042 PUT_MODE (operands[0], word_mode);
22043 }
22044 else
22045 operands[0] = gen_lowpart (DImode, operands[0]);
22046 operands[1] = gen_lowpart (DImode, operands[1]);
22047 emit_move_insn (operands[0], operands[1]);
22048 return;
22049 }
22050
22051 /* The only non-offsettable memory we handle is push. */
22052 if (push_operand (operands[0], VOIDmode))
22053 push = 1;
22054 else
22055 gcc_assert (!MEM_P (operands[0])
22056 || offsettable_memref_p (operands[0]));
22057
22058 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22059 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22060
22061 /* When emitting push, take care for source operands on the stack. */
22062 if (push && MEM_P (operands[1])
22063 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22064 {
22065 rtx src_base = XEXP (part[1][nparts - 1], 0);
22066
22067 /* Compensate for the stack decrement by 4. */
22068 if (!TARGET_64BIT && nparts == 3
22069 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22070 src_base = plus_constant (Pmode, src_base, 4);
22071
22072 /* src_base refers to the stack pointer and is
22073 automatically decreased by emitted push. */
22074 for (i = 0; i < nparts; i++)
22075 part[1][i] = change_address (part[1][i],
22076 GET_MODE (part[1][i]), src_base);
22077 }
22078
22079 /* We need to do copy in the right order in case an address register
22080 of the source overlaps the destination. */
22081 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22082 {
22083 rtx tmp;
22084
22085 for (i = 0; i < nparts; i++)
22086 {
22087 collisionparts[i]
22088 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22089 if (collisionparts[i])
22090 collisions++;
22091 }
22092
22093 /* Collision in the middle part can be handled by reordering. */
22094 if (collisions == 1 && nparts == 3 && collisionparts [1])
22095 {
22096 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22097 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22098 }
22099 else if (collisions == 1
22100 && nparts == 4
22101 && (collisionparts [1] || collisionparts [2]))
22102 {
22103 if (collisionparts [1])
22104 {
22105 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22106 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22107 }
22108 else
22109 {
22110 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
22111 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
22112 }
22113 }
22114
22115 /* If there are more collisions, we can't handle it by reordering.
22116 Do an lea to the last part and use only one colliding move. */
22117 else if (collisions > 1)
22118 {
22119 rtx base;
22120
22121 collisions = 1;
22122
22123 base = part[0][nparts - 1];
22124
22125 /* Handle the case when the last part isn't valid for lea.
22126 Happens in 64-bit mode storing the 12-byte XFmode. */
22127 if (GET_MODE (base) != Pmode)
22128 base = gen_rtx_REG (Pmode, REGNO (base));
22129
22130 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
22131 part[1][0] = replace_equiv_address (part[1][0], base);
22132 for (i = 1; i < nparts; i++)
22133 {
22134 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22135 part[1][i] = replace_equiv_address (part[1][i], tmp);
22136 }
22137 }
22138 }
22139
22140 if (push)
22141 {
22142 if (!TARGET_64BIT)
22143 {
22144 if (nparts == 3)
22145 {
22146 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22147 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22148 stack_pointer_rtx, GEN_INT (-4)));
22149 emit_move_insn (part[0][2], part[1][2]);
22150 }
22151 else if (nparts == 4)
22152 {
22153 emit_move_insn (part[0][3], part[1][3]);
22154 emit_move_insn (part[0][2], part[1][2]);
22155 }
22156 }
22157 else
22158 {
22159 /* In 64bit mode we don't have 32bit push available. In case this is
22160 register, it is OK - we will just use larger counterpart. We also
22161 retype memory - these comes from attempt to avoid REX prefix on
22162 moving of second half of TFmode value. */
22163 if (GET_MODE (part[1][1]) == SImode)
22164 {
22165 switch (GET_CODE (part[1][1]))
22166 {
22167 case MEM:
22168 part[1][1] = adjust_address (part[1][1], DImode, 0);
22169 break;
22170
22171 case REG:
22172 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
22173 break;
22174
22175 default:
22176 gcc_unreachable ();
22177 }
22178
22179 if (GET_MODE (part[1][0]) == SImode)
22180 part[1][0] = part[1][1];
22181 }
22182 }
22183 emit_move_insn (part[0][1], part[1][1]);
22184 emit_move_insn (part[0][0], part[1][0]);
22185 return;
22186 }
22187
22188 /* Choose correct order to not overwrite the source before it is copied. */
22189 if ((REG_P (part[0][0])
22190 && REG_P (part[1][1])
22191 && (REGNO (part[0][0]) == REGNO (part[1][1])
22192 || (nparts == 3
22193 && REGNO (part[0][0]) == REGNO (part[1][2]))
22194 || (nparts == 4
22195 && REGNO (part[0][0]) == REGNO (part[1][3]))))
22196 || (collisions > 0
22197 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
22198 {
22199 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
22200 {
22201 operands[2 + i] = part[0][j];
22202 operands[6 + i] = part[1][j];
22203 }
22204 }
22205 else
22206 {
22207 for (i = 0; i < nparts; i++)
22208 {
22209 operands[2 + i] = part[0][i];
22210 operands[6 + i] = part[1][i];
22211 }
22212 }
22213
22214 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
22215 if (optimize_insn_for_size_p ())
22216 {
22217 for (j = 0; j < nparts - 1; j++)
22218 if (CONST_INT_P (operands[6 + j])
22219 && operands[6 + j] != const0_rtx
22220 && REG_P (operands[2 + j]))
22221 for (i = j; i < nparts - 1; i++)
22222 if (CONST_INT_P (operands[7 + i])
22223 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
22224 operands[7 + i] = operands[2 + j];
22225 }
22226
22227 for (i = 0; i < nparts; i++)
22228 emit_move_insn (operands[2 + i], operands[6 + i]);
22229
22230 return;
22231 }
22232
22233 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
22234 left shift by a constant, either using a single shift or
22235 a sequence of add instructions. */
22236
22237 static void
22238 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
22239 {
22240 rtx (*insn)(rtx, rtx, rtx);
22241
22242 if (count == 1
22243 || (count * ix86_cost->add <= ix86_cost->shift_const
22244 && !optimize_insn_for_size_p ()))
22245 {
22246 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
22247 while (count-- > 0)
22248 emit_insn (insn (operand, operand, operand));
22249 }
22250 else
22251 {
22252 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22253 emit_insn (insn (operand, operand, GEN_INT (count)));
22254 }
22255 }
22256
22257 void
22258 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
22259 {
22260 rtx (*gen_ashl3)(rtx, rtx, rtx);
22261 rtx (*gen_shld)(rtx, rtx, rtx);
22262 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22263
22264 rtx low[2], high[2];
22265 int count;
22266
22267 if (CONST_INT_P (operands[2]))
22268 {
22269 split_double_mode (mode, operands, 2, low, high);
22270 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22271
22272 if (count >= half_width)
22273 {
22274 emit_move_insn (high[0], low[1]);
22275 emit_move_insn (low[0], const0_rtx);
22276
22277 if (count > half_width)
22278 ix86_expand_ashl_const (high[0], count - half_width, mode);
22279 }
22280 else
22281 {
22282 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22283
22284 if (!rtx_equal_p (operands[0], operands[1]))
22285 emit_move_insn (operands[0], operands[1]);
22286
22287 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
22288 ix86_expand_ashl_const (low[0], count, mode);
22289 }
22290 return;
22291 }
22292
22293 split_double_mode (mode, operands, 1, low, high);
22294
22295 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22296
22297 if (operands[1] == const1_rtx)
22298 {
22299 /* Assuming we've chosen a QImode capable registers, then 1 << N
22300 can be done with two 32/64-bit shifts, no branches, no cmoves. */
22301 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
22302 {
22303 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
22304
22305 ix86_expand_clear (low[0]);
22306 ix86_expand_clear (high[0]);
22307 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
22308
22309 d = gen_lowpart (QImode, low[0]);
22310 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22311 s = gen_rtx_EQ (QImode, flags, const0_rtx);
22312 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22313
22314 d = gen_lowpart (QImode, high[0]);
22315 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22316 s = gen_rtx_NE (QImode, flags, const0_rtx);
22317 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22318 }
22319
22320 /* Otherwise, we can get the same results by manually performing
22321 a bit extract operation on bit 5/6, and then performing the two
22322 shifts. The two methods of getting 0/1 into low/high are exactly
22323 the same size. Avoiding the shift in the bit extract case helps
22324 pentium4 a bit; no one else seems to care much either way. */
22325 else
22326 {
22327 enum machine_mode half_mode;
22328 rtx (*gen_lshr3)(rtx, rtx, rtx);
22329 rtx (*gen_and3)(rtx, rtx, rtx);
22330 rtx (*gen_xor3)(rtx, rtx, rtx);
22331 HOST_WIDE_INT bits;
22332 rtx x;
22333
22334 if (mode == DImode)
22335 {
22336 half_mode = SImode;
22337 gen_lshr3 = gen_lshrsi3;
22338 gen_and3 = gen_andsi3;
22339 gen_xor3 = gen_xorsi3;
22340 bits = 5;
22341 }
22342 else
22343 {
22344 half_mode = DImode;
22345 gen_lshr3 = gen_lshrdi3;
22346 gen_and3 = gen_anddi3;
22347 gen_xor3 = gen_xordi3;
22348 bits = 6;
22349 }
22350
22351 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
22352 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
22353 else
22354 x = gen_lowpart (half_mode, operands[2]);
22355 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
22356
22357 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
22358 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
22359 emit_move_insn (low[0], high[0]);
22360 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
22361 }
22362
22363 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22364 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
22365 return;
22366 }
22367
22368 if (operands[1] == constm1_rtx)
22369 {
22370 /* For -1 << N, we can avoid the shld instruction, because we
22371 know that we're shifting 0...31/63 ones into a -1. */
22372 emit_move_insn (low[0], constm1_rtx);
22373 if (optimize_insn_for_size_p ())
22374 emit_move_insn (high[0], low[0]);
22375 else
22376 emit_move_insn (high[0], constm1_rtx);
22377 }
22378 else
22379 {
22380 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22381
22382 if (!rtx_equal_p (operands[0], operands[1]))
22383 emit_move_insn (operands[0], operands[1]);
22384
22385 split_double_mode (mode, operands, 1, low, high);
22386 emit_insn (gen_shld (high[0], low[0], operands[2]));
22387 }
22388
22389 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22390
22391 if (TARGET_CMOVE && scratch)
22392 {
22393 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22394 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22395
22396 ix86_expand_clear (scratch);
22397 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
22398 }
22399 else
22400 {
22401 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22402 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22403
22404 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
22405 }
22406 }
22407
22408 void
22409 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
22410 {
22411 rtx (*gen_ashr3)(rtx, rtx, rtx)
22412 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
22413 rtx (*gen_shrd)(rtx, rtx, rtx);
22414 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22415
22416 rtx low[2], high[2];
22417 int count;
22418
22419 if (CONST_INT_P (operands[2]))
22420 {
22421 split_double_mode (mode, operands, 2, low, high);
22422 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22423
22424 if (count == GET_MODE_BITSIZE (mode) - 1)
22425 {
22426 emit_move_insn (high[0], high[1]);
22427 emit_insn (gen_ashr3 (high[0], high[0],
22428 GEN_INT (half_width - 1)));
22429 emit_move_insn (low[0], high[0]);
22430
22431 }
22432 else if (count >= half_width)
22433 {
22434 emit_move_insn (low[0], high[1]);
22435 emit_move_insn (high[0], low[0]);
22436 emit_insn (gen_ashr3 (high[0], high[0],
22437 GEN_INT (half_width - 1)));
22438
22439 if (count > half_width)
22440 emit_insn (gen_ashr3 (low[0], low[0],
22441 GEN_INT (count - half_width)));
22442 }
22443 else
22444 {
22445 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22446
22447 if (!rtx_equal_p (operands[0], operands[1]))
22448 emit_move_insn (operands[0], operands[1]);
22449
22450 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22451 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
22452 }
22453 }
22454 else
22455 {
22456 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22457
22458 if (!rtx_equal_p (operands[0], operands[1]))
22459 emit_move_insn (operands[0], operands[1]);
22460
22461 split_double_mode (mode, operands, 1, low, high);
22462
22463 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22464 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
22465
22466 if (TARGET_CMOVE && scratch)
22467 {
22468 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22469 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22470
22471 emit_move_insn (scratch, high[0]);
22472 emit_insn (gen_ashr3 (scratch, scratch,
22473 GEN_INT (half_width - 1)));
22474 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22475 scratch));
22476 }
22477 else
22478 {
22479 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
22480 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
22481
22482 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
22483 }
22484 }
22485 }
22486
22487 void
22488 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
22489 {
22490 rtx (*gen_lshr3)(rtx, rtx, rtx)
22491 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
22492 rtx (*gen_shrd)(rtx, rtx, rtx);
22493 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22494
22495 rtx low[2], high[2];
22496 int count;
22497
22498 if (CONST_INT_P (operands[2]))
22499 {
22500 split_double_mode (mode, operands, 2, low, high);
22501 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22502
22503 if (count >= half_width)
22504 {
22505 emit_move_insn (low[0], high[1]);
22506 ix86_expand_clear (high[0]);
22507
22508 if (count > half_width)
22509 emit_insn (gen_lshr3 (low[0], low[0],
22510 GEN_INT (count - half_width)));
22511 }
22512 else
22513 {
22514 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22515
22516 if (!rtx_equal_p (operands[0], operands[1]))
22517 emit_move_insn (operands[0], operands[1]);
22518
22519 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22520 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
22521 }
22522 }
22523 else
22524 {
22525 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22526
22527 if (!rtx_equal_p (operands[0], operands[1]))
22528 emit_move_insn (operands[0], operands[1]);
22529
22530 split_double_mode (mode, operands, 1, low, high);
22531
22532 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22533 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
22534
22535 if (TARGET_CMOVE && scratch)
22536 {
22537 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22538 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22539
22540 ix86_expand_clear (scratch);
22541 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22542 scratch));
22543 }
22544 else
22545 {
22546 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22547 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22548
22549 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
22550 }
22551 }
22552 }
22553
22554 /* Predict just emitted jump instruction to be taken with probability PROB. */
22555 static void
22556 predict_jump (int prob)
22557 {
22558 rtx insn = get_last_insn ();
22559 gcc_assert (JUMP_P (insn));
22560 add_int_reg_note (insn, REG_BR_PROB, prob);
22561 }
22562
22563 /* Helper function for the string operations below. Dest VARIABLE whether
22564 it is aligned to VALUE bytes. If true, jump to the label. */
22565 static rtx
22566 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
22567 {
22568 rtx label = gen_label_rtx ();
22569 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
22570 if (GET_MODE (variable) == DImode)
22571 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
22572 else
22573 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
22574 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
22575 1, label);
22576 if (epilogue)
22577 predict_jump (REG_BR_PROB_BASE * 50 / 100);
22578 else
22579 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22580 return label;
22581 }
22582
22583 /* Adjust COUNTER by the VALUE. */
22584 static void
22585 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
22586 {
22587 rtx (*gen_add)(rtx, rtx, rtx)
22588 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
22589
22590 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
22591 }
22592
22593 /* Zero extend possibly SImode EXP to Pmode register. */
22594 rtx
22595 ix86_zero_extend_to_Pmode (rtx exp)
22596 {
22597 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
22598 }
22599
22600 /* Divide COUNTREG by SCALE. */
22601 static rtx
22602 scale_counter (rtx countreg, int scale)
22603 {
22604 rtx sc;
22605
22606 if (scale == 1)
22607 return countreg;
22608 if (CONST_INT_P (countreg))
22609 return GEN_INT (INTVAL (countreg) / scale);
22610 gcc_assert (REG_P (countreg));
22611
22612 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
22613 GEN_INT (exact_log2 (scale)),
22614 NULL, 1, OPTAB_DIRECT);
22615 return sc;
22616 }
22617
22618 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
22619 DImode for constant loop counts. */
22620
22621 static enum machine_mode
22622 counter_mode (rtx count_exp)
22623 {
22624 if (GET_MODE (count_exp) != VOIDmode)
22625 return GET_MODE (count_exp);
22626 if (!CONST_INT_P (count_exp))
22627 return Pmode;
22628 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
22629 return DImode;
22630 return SImode;
22631 }
22632
22633 /* Copy the address to a Pmode register. This is used for x32 to
22634 truncate DImode TLS address to a SImode register. */
22635
22636 static rtx
22637 ix86_copy_addr_to_reg (rtx addr)
22638 {
22639 if (GET_MODE (addr) == Pmode)
22640 return copy_addr_to_reg (addr);
22641 else
22642 {
22643 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
22644 return gen_rtx_SUBREG (SImode, copy_to_mode_reg (DImode, addr), 0);
22645 }
22646 }
22647
22648 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
22649 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
22650 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
22651 memory by VALUE (supposed to be in MODE).
22652
22653 The size is rounded down to whole number of chunk size moved at once.
22654 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
22655
22656
22657 static void
22658 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
22659 rtx destptr, rtx srcptr, rtx value,
22660 rtx count, enum machine_mode mode, int unroll,
22661 int expected_size, bool issetmem)
22662 {
22663 rtx out_label, top_label, iter, tmp;
22664 enum machine_mode iter_mode = counter_mode (count);
22665 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
22666 rtx piece_size = GEN_INT (piece_size_n);
22667 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
22668 rtx size;
22669 int i;
22670
22671 top_label = gen_label_rtx ();
22672 out_label = gen_label_rtx ();
22673 iter = gen_reg_rtx (iter_mode);
22674
22675 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
22676 NULL, 1, OPTAB_DIRECT);
22677 /* Those two should combine. */
22678 if (piece_size == const1_rtx)
22679 {
22680 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
22681 true, out_label);
22682 predict_jump (REG_BR_PROB_BASE * 10 / 100);
22683 }
22684 emit_move_insn (iter, const0_rtx);
22685
22686 emit_label (top_label);
22687
22688 tmp = convert_modes (Pmode, iter_mode, iter, true);
22689
22690 /* This assert could be relaxed - in this case we'll need to compute
22691 smallest power of two, containing in PIECE_SIZE_N and pass it to
22692 offset_address. */
22693 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
22694 destmem = offset_address (destmem, tmp, piece_size_n);
22695 destmem = adjust_address (destmem, mode, 0);
22696
22697 if (!issetmem)
22698 {
22699 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
22700 srcmem = adjust_address (srcmem, mode, 0);
22701
22702 /* When unrolling for chips that reorder memory reads and writes,
22703 we can save registers by using single temporary.
22704 Also using 4 temporaries is overkill in 32bit mode. */
22705 if (!TARGET_64BIT && 0)
22706 {
22707 for (i = 0; i < unroll; i++)
22708 {
22709 if (i)
22710 {
22711 destmem =
22712 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22713 srcmem =
22714 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22715 }
22716 emit_move_insn (destmem, srcmem);
22717 }
22718 }
22719 else
22720 {
22721 rtx tmpreg[4];
22722 gcc_assert (unroll <= 4);
22723 for (i = 0; i < unroll; i++)
22724 {
22725 tmpreg[i] = gen_reg_rtx (mode);
22726 if (i)
22727 {
22728 srcmem =
22729 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22730 }
22731 emit_move_insn (tmpreg[i], srcmem);
22732 }
22733 for (i = 0; i < unroll; i++)
22734 {
22735 if (i)
22736 {
22737 destmem =
22738 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22739 }
22740 emit_move_insn (destmem, tmpreg[i]);
22741 }
22742 }
22743 }
22744 else
22745 for (i = 0; i < unroll; i++)
22746 {
22747 if (i)
22748 destmem =
22749 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22750 emit_move_insn (destmem, value);
22751 }
22752
22753 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
22754 true, OPTAB_LIB_WIDEN);
22755 if (tmp != iter)
22756 emit_move_insn (iter, tmp);
22757
22758 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
22759 true, top_label);
22760 if (expected_size != -1)
22761 {
22762 expected_size /= GET_MODE_SIZE (mode) * unroll;
22763 if (expected_size == 0)
22764 predict_jump (0);
22765 else if (expected_size > REG_BR_PROB_BASE)
22766 predict_jump (REG_BR_PROB_BASE - 1);
22767 else
22768 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
22769 }
22770 else
22771 predict_jump (REG_BR_PROB_BASE * 80 / 100);
22772 iter = ix86_zero_extend_to_Pmode (iter);
22773 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
22774 true, OPTAB_LIB_WIDEN);
22775 if (tmp != destptr)
22776 emit_move_insn (destptr, tmp);
22777 if (!issetmem)
22778 {
22779 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
22780 true, OPTAB_LIB_WIDEN);
22781 if (tmp != srcptr)
22782 emit_move_insn (srcptr, tmp);
22783 }
22784 emit_label (out_label);
22785 }
22786
22787 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
22788 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
22789 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
22790 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
22791 ORIG_VALUE is the original value passed to memset to fill the memory with.
22792 Other arguments have same meaning as for previous function. */
22793
22794 static void
22795 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
22796 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
22797 rtx count,
22798 enum machine_mode mode, bool issetmem)
22799 {
22800 rtx destexp;
22801 rtx srcexp;
22802 rtx countreg;
22803 HOST_WIDE_INT rounded_count;
22804
22805 /* If possible, it is shorter to use rep movs.
22806 TODO: Maybe it is better to move this logic to decide_alg. */
22807 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
22808 && (!issetmem || orig_value == const0_rtx))
22809 mode = SImode;
22810
22811 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
22812 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
22813
22814 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
22815 GET_MODE_SIZE (mode)));
22816 if (mode != QImode)
22817 {
22818 destexp = gen_rtx_ASHIFT (Pmode, countreg,
22819 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
22820 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
22821 }
22822 else
22823 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
22824 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
22825 {
22826 rounded_count = (INTVAL (count)
22827 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
22828 destmem = shallow_copy_rtx (destmem);
22829 set_mem_size (destmem, rounded_count);
22830 }
22831 else if (MEM_SIZE_KNOWN_P (destmem))
22832 clear_mem_size (destmem);
22833
22834 if (issetmem)
22835 {
22836 value = force_reg (mode, gen_lowpart (mode, value));
22837 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
22838 }
22839 else
22840 {
22841 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
22842 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
22843 if (mode != QImode)
22844 {
22845 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
22846 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
22847 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
22848 }
22849 else
22850 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
22851 if (CONST_INT_P (count))
22852 {
22853 rounded_count = (INTVAL (count)
22854 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
22855 srcmem = shallow_copy_rtx (srcmem);
22856 set_mem_size (srcmem, rounded_count);
22857 }
22858 else
22859 {
22860 if (MEM_SIZE_KNOWN_P (srcmem))
22861 clear_mem_size (srcmem);
22862 }
22863 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
22864 destexp, srcexp));
22865 }
22866 }
22867
22868 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
22869 DESTMEM.
22870 SRC is passed by pointer to be updated on return.
22871 Return value is updated DST. */
22872 static rtx
22873 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
22874 HOST_WIDE_INT size_to_move)
22875 {
22876 rtx dst = destmem, src = *srcmem, adjust, tempreg;
22877 enum insn_code code;
22878 enum machine_mode move_mode;
22879 int piece_size, i;
22880
22881 /* Find the widest mode in which we could perform moves.
22882 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
22883 it until move of such size is supported. */
22884 piece_size = 1 << floor_log2 (size_to_move);
22885 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
22886 code = optab_handler (mov_optab, move_mode);
22887 while (code == CODE_FOR_nothing && piece_size > 1)
22888 {
22889 piece_size >>= 1;
22890 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
22891 code = optab_handler (mov_optab, move_mode);
22892 }
22893
22894 /* Find the corresponding vector mode with the same size as MOVE_MODE.
22895 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
22896 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
22897 {
22898 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
22899 move_mode = mode_for_vector (word_mode, nunits);
22900 code = optab_handler (mov_optab, move_mode);
22901 if (code == CODE_FOR_nothing)
22902 {
22903 move_mode = word_mode;
22904 piece_size = GET_MODE_SIZE (move_mode);
22905 code = optab_handler (mov_optab, move_mode);
22906 }
22907 }
22908 gcc_assert (code != CODE_FOR_nothing);
22909
22910 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
22911 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
22912
22913 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
22914 gcc_assert (size_to_move % piece_size == 0);
22915 adjust = GEN_INT (piece_size);
22916 for (i = 0; i < size_to_move; i += piece_size)
22917 {
22918 /* We move from memory to memory, so we'll need to do it via
22919 a temporary register. */
22920 tempreg = gen_reg_rtx (move_mode);
22921 emit_insn (GEN_FCN (code) (tempreg, src));
22922 emit_insn (GEN_FCN (code) (dst, tempreg));
22923
22924 emit_move_insn (destptr,
22925 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
22926 emit_move_insn (srcptr,
22927 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
22928
22929 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
22930 piece_size);
22931 src = adjust_automodify_address_nv (src, move_mode, srcptr,
22932 piece_size);
22933 }
22934
22935 /* Update DST and SRC rtx. */
22936 *srcmem = src;
22937 return dst;
22938 }
22939
22940 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
22941 static void
22942 expand_movmem_epilogue (rtx destmem, rtx srcmem,
22943 rtx destptr, rtx srcptr, rtx count, int max_size)
22944 {
22945 rtx src, dest;
22946 if (CONST_INT_P (count))
22947 {
22948 HOST_WIDE_INT countval = INTVAL (count);
22949 HOST_WIDE_INT epilogue_size = countval % max_size;
22950 int i;
22951
22952 /* For now MAX_SIZE should be a power of 2. This assert could be
22953 relaxed, but it'll require a bit more complicated epilogue
22954 expanding. */
22955 gcc_assert ((max_size & (max_size - 1)) == 0);
22956 for (i = max_size; i >= 1; i >>= 1)
22957 {
22958 if (epilogue_size & i)
22959 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
22960 }
22961 return;
22962 }
22963 if (max_size > 8)
22964 {
22965 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
22966 count, 1, OPTAB_DIRECT);
22967 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
22968 count, QImode, 1, 4, false);
22969 return;
22970 }
22971
22972 /* When there are stringops, we can cheaply increase dest and src pointers.
22973 Otherwise we save code size by maintaining offset (zero is readily
22974 available from preceding rep operation) and using x86 addressing modes.
22975 */
22976 if (TARGET_SINGLE_STRINGOP)
22977 {
22978 if (max_size > 4)
22979 {
22980 rtx label = ix86_expand_aligntest (count, 4, true);
22981 src = change_address (srcmem, SImode, srcptr);
22982 dest = change_address (destmem, SImode, destptr);
22983 emit_insn (gen_strmov (destptr, dest, srcptr, src));
22984 emit_label (label);
22985 LABEL_NUSES (label) = 1;
22986 }
22987 if (max_size > 2)
22988 {
22989 rtx label = ix86_expand_aligntest (count, 2, true);
22990 src = change_address (srcmem, HImode, srcptr);
22991 dest = change_address (destmem, HImode, destptr);
22992 emit_insn (gen_strmov (destptr, dest, srcptr, src));
22993 emit_label (label);
22994 LABEL_NUSES (label) = 1;
22995 }
22996 if (max_size > 1)
22997 {
22998 rtx label = ix86_expand_aligntest (count, 1, true);
22999 src = change_address (srcmem, QImode, srcptr);
23000 dest = change_address (destmem, QImode, destptr);
23001 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23002 emit_label (label);
23003 LABEL_NUSES (label) = 1;
23004 }
23005 }
23006 else
23007 {
23008 rtx offset = force_reg (Pmode, const0_rtx);
23009 rtx tmp;
23010
23011 if (max_size > 4)
23012 {
23013 rtx label = ix86_expand_aligntest (count, 4, true);
23014 src = change_address (srcmem, SImode, srcptr);
23015 dest = change_address (destmem, SImode, destptr);
23016 emit_move_insn (dest, src);
23017 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23018 true, OPTAB_LIB_WIDEN);
23019 if (tmp != offset)
23020 emit_move_insn (offset, tmp);
23021 emit_label (label);
23022 LABEL_NUSES (label) = 1;
23023 }
23024 if (max_size > 2)
23025 {
23026 rtx label = ix86_expand_aligntest (count, 2, true);
23027 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23028 src = change_address (srcmem, HImode, tmp);
23029 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23030 dest = change_address (destmem, HImode, tmp);
23031 emit_move_insn (dest, src);
23032 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23033 true, OPTAB_LIB_WIDEN);
23034 if (tmp != offset)
23035 emit_move_insn (offset, tmp);
23036 emit_label (label);
23037 LABEL_NUSES (label) = 1;
23038 }
23039 if (max_size > 1)
23040 {
23041 rtx label = ix86_expand_aligntest (count, 1, true);
23042 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23043 src = change_address (srcmem, QImode, tmp);
23044 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23045 dest = change_address (destmem, QImode, tmp);
23046 emit_move_insn (dest, src);
23047 emit_label (label);
23048 LABEL_NUSES (label) = 1;
23049 }
23050 }
23051 }
23052
23053 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23054 with value PROMOTED_VAL.
23055 SRC is passed by pointer to be updated on return.
23056 Return value is updated DST. */
23057 static rtx
23058 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23059 HOST_WIDE_INT size_to_move)
23060 {
23061 rtx dst = destmem, adjust;
23062 enum insn_code code;
23063 enum machine_mode move_mode;
23064 int piece_size, i;
23065
23066 /* Find the widest mode in which we could perform moves.
23067 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23068 it until move of such size is supported. */
23069 move_mode = GET_MODE (promoted_val);
23070 if (move_mode == VOIDmode)
23071 move_mode = QImode;
23072 if (size_to_move < GET_MODE_SIZE (move_mode))
23073 {
23074 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23075 promoted_val = gen_lowpart (move_mode, promoted_val);
23076 }
23077 piece_size = GET_MODE_SIZE (move_mode);
23078 code = optab_handler (mov_optab, move_mode);
23079 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23080
23081 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23082
23083 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23084 gcc_assert (size_to_move % piece_size == 0);
23085 adjust = GEN_INT (piece_size);
23086 for (i = 0; i < size_to_move; i += piece_size)
23087 {
23088 if (piece_size <= GET_MODE_SIZE (word_mode))
23089 {
23090 emit_insn (gen_strset (destptr, dst, promoted_val));
23091 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23092 piece_size);
23093 continue;
23094 }
23095
23096 emit_insn (GEN_FCN (code) (dst, promoted_val));
23097
23098 emit_move_insn (destptr,
23099 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23100
23101 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23102 piece_size);
23103 }
23104
23105 /* Update DST rtx. */
23106 return dst;
23107 }
23108 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23109 static void
23110 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23111 rtx count, int max_size)
23112 {
23113 count =
23114 expand_simple_binop (counter_mode (count), AND, count,
23115 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23116 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23117 gen_lowpart (QImode, value), count, QImode,
23118 1, max_size / 2, true);
23119 }
23120
23121 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23122 static void
23123 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23124 rtx count, int max_size)
23125 {
23126 rtx dest;
23127
23128 if (CONST_INT_P (count))
23129 {
23130 HOST_WIDE_INT countval = INTVAL (count);
23131 HOST_WIDE_INT epilogue_size = countval % max_size;
23132 int i;
23133
23134 /* For now MAX_SIZE should be a power of 2. This assert could be
23135 relaxed, but it'll require a bit more complicated epilogue
23136 expanding. */
23137 gcc_assert ((max_size & (max_size - 1)) == 0);
23138 for (i = max_size; i >= 1; i >>= 1)
23139 {
23140 if (epilogue_size & i)
23141 {
23142 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23143 destmem = emit_memset (destmem, destptr, vec_value, i);
23144 else
23145 destmem = emit_memset (destmem, destptr, value, i);
23146 }
23147 }
23148 return;
23149 }
23150 if (max_size > 32)
23151 {
23152 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23153 return;
23154 }
23155 if (max_size > 16)
23156 {
23157 rtx label = ix86_expand_aligntest (count, 16, true);
23158 if (TARGET_64BIT)
23159 {
23160 dest = change_address (destmem, DImode, destptr);
23161 emit_insn (gen_strset (destptr, dest, value));
23162 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23163 emit_insn (gen_strset (destptr, dest, value));
23164 }
23165 else
23166 {
23167 dest = change_address (destmem, SImode, destptr);
23168 emit_insn (gen_strset (destptr, dest, value));
23169 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23170 emit_insn (gen_strset (destptr, dest, value));
23171 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23172 emit_insn (gen_strset (destptr, dest, value));
23173 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
23174 emit_insn (gen_strset (destptr, dest, value));
23175 }
23176 emit_label (label);
23177 LABEL_NUSES (label) = 1;
23178 }
23179 if (max_size > 8)
23180 {
23181 rtx label = ix86_expand_aligntest (count, 8, true);
23182 if (TARGET_64BIT)
23183 {
23184 dest = change_address (destmem, DImode, destptr);
23185 emit_insn (gen_strset (destptr, dest, value));
23186 }
23187 else
23188 {
23189 dest = change_address (destmem, SImode, destptr);
23190 emit_insn (gen_strset (destptr, dest, value));
23191 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23192 emit_insn (gen_strset (destptr, dest, value));
23193 }
23194 emit_label (label);
23195 LABEL_NUSES (label) = 1;
23196 }
23197 if (max_size > 4)
23198 {
23199 rtx label = ix86_expand_aligntest (count, 4, true);
23200 dest = change_address (destmem, SImode, destptr);
23201 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
23202 emit_label (label);
23203 LABEL_NUSES (label) = 1;
23204 }
23205 if (max_size > 2)
23206 {
23207 rtx label = ix86_expand_aligntest (count, 2, true);
23208 dest = change_address (destmem, HImode, destptr);
23209 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
23210 emit_label (label);
23211 LABEL_NUSES (label) = 1;
23212 }
23213 if (max_size > 1)
23214 {
23215 rtx label = ix86_expand_aligntest (count, 1, true);
23216 dest = change_address (destmem, QImode, destptr);
23217 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
23218 emit_label (label);
23219 LABEL_NUSES (label) = 1;
23220 }
23221 }
23222
23223 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
23224 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
23225 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
23226 ignored.
23227 Return value is updated DESTMEM. */
23228 static rtx
23229 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
23230 rtx destptr, rtx srcptr, rtx value,
23231 rtx vec_value, rtx count, int align,
23232 int desired_alignment, bool issetmem)
23233 {
23234 int i;
23235 for (i = 1; i < desired_alignment; i <<= 1)
23236 {
23237 if (align <= i)
23238 {
23239 rtx label = ix86_expand_aligntest (destptr, i, false);
23240 if (issetmem)
23241 {
23242 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23243 destmem = emit_memset (destmem, destptr, vec_value, i);
23244 else
23245 destmem = emit_memset (destmem, destptr, value, i);
23246 }
23247 else
23248 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23249 ix86_adjust_counter (count, i);
23250 emit_label (label);
23251 LABEL_NUSES (label) = 1;
23252 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
23253 }
23254 }
23255 return destmem;
23256 }
23257
23258 /* Test if COUNT&SIZE is nonzero and if so, expand movme
23259 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
23260 and jump to DONE_LABEL. */
23261 static void
23262 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
23263 rtx destptr, rtx srcptr,
23264 rtx value, rtx vec_value,
23265 rtx count, int size,
23266 rtx done_label, bool issetmem)
23267 {
23268 rtx label = ix86_expand_aligntest (count, size, false);
23269 enum machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
23270 rtx modesize;
23271 int n;
23272
23273 /* If we do not have vector value to copy, we must reduce size. */
23274 if (issetmem)
23275 {
23276 if (!vec_value)
23277 {
23278 if (GET_MODE (value) == VOIDmode && size > 8)
23279 mode = Pmode;
23280 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
23281 mode = GET_MODE (value);
23282 }
23283 else
23284 mode = GET_MODE (vec_value), value = vec_value;
23285 }
23286 else
23287 {
23288 /* Choose appropriate vector mode. */
23289 if (size >= 32)
23290 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
23291 else if (size >= 16)
23292 mode = TARGET_SSE ? V16QImode : DImode;
23293 srcmem = change_address (srcmem, mode, srcptr);
23294 }
23295 destmem = change_address (destmem, mode, destptr);
23296 modesize = GEN_INT (GET_MODE_SIZE (mode));
23297 gcc_assert (GET_MODE_SIZE (mode) <= size);
23298 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23299 {
23300 if (issetmem)
23301 emit_move_insn (destmem, gen_lowpart (mode, value));
23302 else
23303 {
23304 emit_move_insn (destmem, srcmem);
23305 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23306 }
23307 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23308 }
23309
23310 destmem = offset_address (destmem, count, 1);
23311 destmem = offset_address (destmem, GEN_INT (-size - GET_MODE_SIZE (mode)),
23312 GET_MODE_SIZE (mode));
23313 if (issetmem)
23314 emit_move_insn (destmem, gen_lowpart (mode, value));
23315 else
23316 {
23317 srcmem = offset_address (srcmem, count, 1);
23318 srcmem = offset_address (srcmem, GEN_INT (-size - GET_MODE_SIZE (mode)),
23319 GET_MODE_SIZE (mode));
23320 emit_move_insn (destmem, srcmem);
23321 }
23322 emit_jump_insn (gen_jump (done_label));
23323 emit_barrier ();
23324
23325 emit_label (label);
23326 LABEL_NUSES (label) = 1;
23327 }
23328
23329 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
23330 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
23331 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
23332 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
23333 DONE_LABEL is a label after the whole copying sequence. The label is created
23334 on demand if *DONE_LABEL is NULL.
23335 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
23336 bounds after the initial copies.
23337
23338 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
23339 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
23340 we will dispatch to a library call for large blocks.
23341
23342 In pseudocode we do:
23343
23344 if (COUNT < SIZE)
23345 {
23346 Assume that SIZE is 4. Bigger sizes are handled analogously
23347 if (COUNT & 4)
23348 {
23349 copy 4 bytes from SRCPTR to DESTPTR
23350 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
23351 goto done_label
23352 }
23353 if (!COUNT)
23354 goto done_label;
23355 copy 1 byte from SRCPTR to DESTPTR
23356 if (COUNT & 2)
23357 {
23358 copy 2 bytes from SRCPTR to DESTPTR
23359 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
23360 }
23361 }
23362 else
23363 {
23364 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
23365 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
23366
23367 OLD_DESPTR = DESTPTR;
23368 Align DESTPTR up to DESIRED_ALIGN
23369 SRCPTR += DESTPTR - OLD_DESTPTR
23370 COUNT -= DEST_PTR - OLD_DESTPTR
23371 if (DYNAMIC_CHECK)
23372 Round COUNT down to multiple of SIZE
23373 << optional caller supplied zero size guard is here >>
23374 << optional caller suppplied dynamic check is here >>
23375 << caller supplied main copy loop is here >>
23376 }
23377 done_label:
23378 */
23379 static void
23380 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
23381 rtx *destptr, rtx *srcptr,
23382 enum machine_mode mode,
23383 rtx value, rtx vec_value,
23384 rtx *count,
23385 rtx *done_label,
23386 int size,
23387 int desired_align,
23388 int align,
23389 unsigned HOST_WIDE_INT *min_size,
23390 bool dynamic_check,
23391 bool issetmem)
23392 {
23393 rtx loop_label = NULL, label;
23394 int n;
23395 rtx modesize;
23396 int prolog_size = 0;
23397 rtx mode_value;
23398
23399 /* Chose proper value to copy. */
23400 if (issetmem && VECTOR_MODE_P (mode))
23401 mode_value = vec_value;
23402 else
23403 mode_value = value;
23404 gcc_assert (GET_MODE_SIZE (mode) <= size);
23405
23406 /* See if block is big or small, handle small blocks. */
23407 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
23408 {
23409 int size2 = size;
23410 loop_label = gen_label_rtx ();
23411
23412 if (!*done_label)
23413 *done_label = gen_label_rtx ();
23414
23415 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
23416 1, loop_label);
23417 size2 >>= 1;
23418
23419 /* Handle sizes > 3. */
23420 for (;size2 > 2; size2 >>= 1)
23421 expand_small_movmem_or_setmem (destmem, srcmem,
23422 *destptr, *srcptr,
23423 value, vec_value,
23424 *count,
23425 size2, *done_label, issetmem);
23426 /* Nothing to copy? Jump to DONE_LABEL if so */
23427 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
23428 1, *done_label);
23429
23430 /* Do a byte copy. */
23431 destmem = change_address (destmem, QImode, *destptr);
23432 if (issetmem)
23433 emit_move_insn (destmem, gen_lowpart (QImode, value));
23434 else
23435 {
23436 srcmem = change_address (srcmem, QImode, *srcptr);
23437 emit_move_insn (destmem, srcmem);
23438 }
23439
23440 /* Handle sizes 2 and 3. */
23441 label = ix86_expand_aligntest (*count, 2, false);
23442 destmem = change_address (destmem, HImode, *destptr);
23443 destmem = offset_address (destmem, *count, 1);
23444 destmem = offset_address (destmem, GEN_INT (-2), 2);
23445 if (issetmem)
23446 emit_move_insn (destmem, gen_lowpart (HImode, value));
23447 else
23448 {
23449 srcmem = change_address (srcmem, HImode, *srcptr);
23450 srcmem = offset_address (srcmem, *count, 1);
23451 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
23452 emit_move_insn (destmem, srcmem);
23453 }
23454
23455 emit_label (label);
23456 LABEL_NUSES (label) = 1;
23457 emit_jump_insn (gen_jump (*done_label));
23458 emit_barrier ();
23459 }
23460 else
23461 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
23462 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
23463
23464 /* Start memcpy for COUNT >= SIZE. */
23465 if (loop_label)
23466 {
23467 emit_label (loop_label);
23468 LABEL_NUSES (loop_label) = 1;
23469 }
23470
23471 /* Copy first desired_align bytes. */
23472 if (!issetmem)
23473 srcmem = change_address (srcmem, mode, *srcptr);
23474 destmem = change_address (destmem, mode, *destptr);
23475 modesize = GEN_INT (GET_MODE_SIZE (mode));
23476 for (n = 0; prolog_size < desired_align - align; n++)
23477 {
23478 if (issetmem)
23479 emit_move_insn (destmem, mode_value);
23480 else
23481 {
23482 emit_move_insn (destmem, srcmem);
23483 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23484 }
23485 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23486 prolog_size += GET_MODE_SIZE (mode);
23487 }
23488
23489
23490 /* Copy last SIZE bytes. */
23491 destmem = offset_address (destmem, *count, 1);
23492 destmem = offset_address (destmem,
23493 GEN_INT (-size - prolog_size),
23494 1);
23495 if (issetmem)
23496 emit_move_insn (destmem, mode_value);
23497 else
23498 {
23499 srcmem = offset_address (srcmem, *count, 1);
23500 srcmem = offset_address (srcmem,
23501 GEN_INT (-size - prolog_size),
23502 1);
23503 emit_move_insn (destmem, srcmem);
23504 }
23505 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
23506 {
23507 destmem = offset_address (destmem, modesize, 1);
23508 if (issetmem)
23509 emit_move_insn (destmem, mode_value);
23510 else
23511 {
23512 srcmem = offset_address (srcmem, modesize, 1);
23513 emit_move_insn (destmem, srcmem);
23514 }
23515 }
23516
23517 /* Align destination. */
23518 if (desired_align > 1 && desired_align > align)
23519 {
23520 rtx saveddest = *destptr;
23521
23522 gcc_assert (desired_align <= size);
23523 /* Align destptr up, place it to new register. */
23524 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
23525 GEN_INT (prolog_size),
23526 NULL_RTX, 1, OPTAB_DIRECT);
23527 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
23528 GEN_INT (-desired_align),
23529 *destptr, 1, OPTAB_DIRECT);
23530 /* See how many bytes we skipped. */
23531 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
23532 *destptr,
23533 saveddest, 1, OPTAB_DIRECT);
23534 /* Adjust srcptr and count. */
23535 if (!issetmem)
23536 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, saveddest,
23537 *srcptr, 1, OPTAB_DIRECT);
23538 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23539 saveddest, *count, 1, OPTAB_DIRECT);
23540 /* We copied at most size + prolog_size. */
23541 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
23542 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
23543 else
23544 *min_size = 0;
23545
23546 /* Our loops always round down the bock size, but for dispatch to library
23547 we need precise value. */
23548 if (dynamic_check)
23549 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
23550 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
23551 }
23552 else
23553 {
23554 gcc_assert (prolog_size == 0);
23555 /* Decrease count, so we won't end up copying last word twice. */
23556 if (!CONST_INT_P (*count))
23557 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23558 constm1_rtx, *count, 1, OPTAB_DIRECT);
23559 else
23560 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
23561 if (*min_size)
23562 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
23563 }
23564 }
23565
23566
23567 /* This function is like the previous one, except here we know how many bytes
23568 need to be copied. That allows us to update alignment not only of DST, which
23569 is returned, but also of SRC, which is passed as a pointer for that
23570 reason. */
23571 static rtx
23572 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
23573 rtx srcreg, rtx value, rtx vec_value,
23574 int desired_align, int align_bytes,
23575 bool issetmem)
23576 {
23577 rtx src = NULL;
23578 rtx orig_dst = dst;
23579 rtx orig_src = NULL;
23580 int piece_size = 1;
23581 int copied_bytes = 0;
23582
23583 if (!issetmem)
23584 {
23585 gcc_assert (srcp != NULL);
23586 src = *srcp;
23587 orig_src = src;
23588 }
23589
23590 for (piece_size = 1;
23591 piece_size <= desired_align && copied_bytes < align_bytes;
23592 piece_size <<= 1)
23593 {
23594 if (align_bytes & piece_size)
23595 {
23596 if (issetmem)
23597 {
23598 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
23599 dst = emit_memset (dst, destreg, vec_value, piece_size);
23600 else
23601 dst = emit_memset (dst, destreg, value, piece_size);
23602 }
23603 else
23604 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
23605 copied_bytes += piece_size;
23606 }
23607 }
23608 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
23609 set_mem_align (dst, desired_align * BITS_PER_UNIT);
23610 if (MEM_SIZE_KNOWN_P (orig_dst))
23611 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
23612
23613 if (!issetmem)
23614 {
23615 int src_align_bytes = get_mem_align_offset (src, desired_align
23616 * BITS_PER_UNIT);
23617 if (src_align_bytes >= 0)
23618 src_align_bytes = desired_align - src_align_bytes;
23619 if (src_align_bytes >= 0)
23620 {
23621 unsigned int src_align;
23622 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
23623 {
23624 if ((src_align_bytes & (src_align - 1))
23625 == (align_bytes & (src_align - 1)))
23626 break;
23627 }
23628 if (src_align > (unsigned int) desired_align)
23629 src_align = desired_align;
23630 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
23631 set_mem_align (src, src_align * BITS_PER_UNIT);
23632 }
23633 if (MEM_SIZE_KNOWN_P (orig_src))
23634 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
23635 *srcp = src;
23636 }
23637
23638 return dst;
23639 }
23640
23641 /* Return true if ALG can be used in current context.
23642 Assume we expand memset if MEMSET is true. */
23643 static bool
23644 alg_usable_p (enum stringop_alg alg, bool memset)
23645 {
23646 if (alg == no_stringop)
23647 return false;
23648 if (alg == vector_loop)
23649 return TARGET_SSE || TARGET_AVX;
23650 /* Algorithms using the rep prefix want at least edi and ecx;
23651 additionally, memset wants eax and memcpy wants esi. Don't
23652 consider such algorithms if the user has appropriated those
23653 registers for their own purposes. */
23654 if (alg == rep_prefix_1_byte
23655 || alg == rep_prefix_4_byte
23656 || alg == rep_prefix_8_byte)
23657 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
23658 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
23659 return true;
23660 }
23661
23662 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
23663 static enum stringop_alg
23664 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
23665 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
23666 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
23667 {
23668 const struct stringop_algs * algs;
23669 bool optimize_for_speed;
23670 int max = -1;
23671 const struct processor_costs *cost;
23672 int i;
23673 bool any_alg_usable_p = false;
23674
23675 *noalign = false;
23676 *dynamic_check = -1;
23677
23678 /* Even if the string operation call is cold, we still might spend a lot
23679 of time processing large blocks. */
23680 if (optimize_function_for_size_p (cfun)
23681 || (optimize_insn_for_size_p ()
23682 && (max_size < 256
23683 || (expected_size != -1 && expected_size < 256))))
23684 optimize_for_speed = false;
23685 else
23686 optimize_for_speed = true;
23687
23688 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
23689 if (memset)
23690 algs = &cost->memset[TARGET_64BIT != 0];
23691 else
23692 algs = &cost->memcpy[TARGET_64BIT != 0];
23693
23694 /* See maximal size for user defined algorithm. */
23695 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23696 {
23697 enum stringop_alg candidate = algs->size[i].alg;
23698 bool usable = alg_usable_p (candidate, memset);
23699 any_alg_usable_p |= usable;
23700
23701 if (candidate != libcall && candidate && usable)
23702 max = algs->size[i].max;
23703 }
23704
23705 /* If expected size is not known but max size is small enough
23706 so inline version is a win, set expected size into
23707 the range. */
23708 if (max > 1 && (unsigned HOST_WIDE_INT) max >= max_size
23709 && expected_size == -1)
23710 expected_size = min_size / 2 + max_size / 2;
23711
23712 /* If user specified the algorithm, honnor it if possible. */
23713 if (ix86_stringop_alg != no_stringop
23714 && alg_usable_p (ix86_stringop_alg, memset))
23715 return ix86_stringop_alg;
23716 /* rep; movq or rep; movl is the smallest variant. */
23717 else if (!optimize_for_speed)
23718 {
23719 *noalign = true;
23720 if (!count || (count & 3) || (memset && !zero_memset))
23721 return alg_usable_p (rep_prefix_1_byte, memset)
23722 ? rep_prefix_1_byte : loop_1_byte;
23723 else
23724 return alg_usable_p (rep_prefix_4_byte, memset)
23725 ? rep_prefix_4_byte : loop;
23726 }
23727 /* Very tiny blocks are best handled via the loop, REP is expensive to
23728 setup. */
23729 else if (expected_size != -1 && expected_size < 4)
23730 return loop_1_byte;
23731 else if (expected_size != -1)
23732 {
23733 enum stringop_alg alg = libcall;
23734 bool alg_noalign = false;
23735 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23736 {
23737 /* We get here if the algorithms that were not libcall-based
23738 were rep-prefix based and we are unable to use rep prefixes
23739 based on global register usage. Break out of the loop and
23740 use the heuristic below. */
23741 if (algs->size[i].max == 0)
23742 break;
23743 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
23744 {
23745 enum stringop_alg candidate = algs->size[i].alg;
23746
23747 if (candidate != libcall && alg_usable_p (candidate, memset))
23748 {
23749 alg = candidate;
23750 alg_noalign = algs->size[i].noalign;
23751 }
23752 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
23753 last non-libcall inline algorithm. */
23754 if (TARGET_INLINE_ALL_STRINGOPS)
23755 {
23756 /* When the current size is best to be copied by a libcall,
23757 but we are still forced to inline, run the heuristic below
23758 that will pick code for medium sized blocks. */
23759 if (alg != libcall)
23760 {
23761 *noalign = alg_noalign;
23762 return alg;
23763 }
23764 break;
23765 }
23766 else if (alg_usable_p (candidate, memset))
23767 {
23768 *noalign = algs->size[i].noalign;
23769 return candidate;
23770 }
23771 }
23772 }
23773 }
23774 /* When asked to inline the call anyway, try to pick meaningful choice.
23775 We look for maximal size of block that is faster to copy by hand and
23776 take blocks of at most of that size guessing that average size will
23777 be roughly half of the block.
23778
23779 If this turns out to be bad, we might simply specify the preferred
23780 choice in ix86_costs. */
23781 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23782 && (algs->unknown_size == libcall
23783 || !alg_usable_p (algs->unknown_size, memset)))
23784 {
23785 enum stringop_alg alg;
23786
23787 /* If there aren't any usable algorithms, then recursing on
23788 smaller sizes isn't going to find anything. Just return the
23789 simple byte-at-a-time copy loop. */
23790 if (!any_alg_usable_p)
23791 {
23792 /* Pick something reasonable. */
23793 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23794 *dynamic_check = 128;
23795 return loop_1_byte;
23796 }
23797 if (max == -1)
23798 max = 4096;
23799 alg = decide_alg (count, max / 2, min_size, max_size, memset,
23800 zero_memset, dynamic_check, noalign);
23801 gcc_assert (*dynamic_check == -1);
23802 gcc_assert (alg != libcall);
23803 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23804 *dynamic_check = max;
23805 return alg;
23806 }
23807 return (alg_usable_p (algs->unknown_size, memset)
23808 ? algs->unknown_size : libcall);
23809 }
23810
23811 /* Decide on alignment. We know that the operand is already aligned to ALIGN
23812 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
23813 static int
23814 decide_alignment (int align,
23815 enum stringop_alg alg,
23816 int expected_size,
23817 enum machine_mode move_mode)
23818 {
23819 int desired_align = 0;
23820
23821 gcc_assert (alg != no_stringop);
23822
23823 if (alg == libcall)
23824 return 0;
23825 if (move_mode == VOIDmode)
23826 return 0;
23827
23828 desired_align = GET_MODE_SIZE (move_mode);
23829 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
23830 copying whole cacheline at once. */
23831 if (TARGET_PENTIUMPRO
23832 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
23833 desired_align = 8;
23834
23835 if (optimize_size)
23836 desired_align = 1;
23837 if (desired_align < align)
23838 desired_align = align;
23839 if (expected_size != -1 && expected_size < 4)
23840 desired_align = align;
23841
23842 return desired_align;
23843 }
23844
23845
23846 /* Helper function for memcpy. For QImode value 0xXY produce
23847 0xXYXYXYXY of wide specified by MODE. This is essentially
23848 a * 0x10101010, but we can do slightly better than
23849 synth_mult by unwinding the sequence by hand on CPUs with
23850 slow multiply. */
23851 static rtx
23852 promote_duplicated_reg (enum machine_mode mode, rtx val)
23853 {
23854 enum machine_mode valmode = GET_MODE (val);
23855 rtx tmp;
23856 int nops = mode == DImode ? 3 : 2;
23857
23858 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
23859 if (val == const0_rtx)
23860 return copy_to_mode_reg (mode, CONST0_RTX (mode));
23861 if (CONST_INT_P (val))
23862 {
23863 HOST_WIDE_INT v = INTVAL (val) & 255;
23864
23865 v |= v << 8;
23866 v |= v << 16;
23867 if (mode == DImode)
23868 v |= (v << 16) << 16;
23869 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
23870 }
23871
23872 if (valmode == VOIDmode)
23873 valmode = QImode;
23874 if (valmode != QImode)
23875 val = gen_lowpart (QImode, val);
23876 if (mode == QImode)
23877 return val;
23878 if (!TARGET_PARTIAL_REG_STALL)
23879 nops--;
23880 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
23881 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
23882 <= (ix86_cost->shift_const + ix86_cost->add) * nops
23883 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
23884 {
23885 rtx reg = convert_modes (mode, QImode, val, true);
23886 tmp = promote_duplicated_reg (mode, const1_rtx);
23887 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
23888 OPTAB_DIRECT);
23889 }
23890 else
23891 {
23892 rtx reg = convert_modes (mode, QImode, val, true);
23893
23894 if (!TARGET_PARTIAL_REG_STALL)
23895 if (mode == SImode)
23896 emit_insn (gen_movsi_insv_1 (reg, reg));
23897 else
23898 emit_insn (gen_movdi_insv_1 (reg, reg));
23899 else
23900 {
23901 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
23902 NULL, 1, OPTAB_DIRECT);
23903 reg =
23904 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
23905 }
23906 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
23907 NULL, 1, OPTAB_DIRECT);
23908 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
23909 if (mode == SImode)
23910 return reg;
23911 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
23912 NULL, 1, OPTAB_DIRECT);
23913 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
23914 return reg;
23915 }
23916 }
23917
23918 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
23919 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
23920 alignment from ALIGN to DESIRED_ALIGN. */
23921 static rtx
23922 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
23923 int align)
23924 {
23925 rtx promoted_val;
23926
23927 if (TARGET_64BIT
23928 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
23929 promoted_val = promote_duplicated_reg (DImode, val);
23930 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
23931 promoted_val = promote_duplicated_reg (SImode, val);
23932 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
23933 promoted_val = promote_duplicated_reg (HImode, val);
23934 else
23935 promoted_val = val;
23936
23937 return promoted_val;
23938 }
23939
23940 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
23941 operations when profitable. The code depends upon architecture, block size
23942 and alignment, but always has one of the following overall structures:
23943
23944 Aligned move sequence:
23945
23946 1) Prologue guard: Conditional that jumps up to epilogues for small
23947 blocks that can be handled by epilogue alone. This is faster
23948 but also needed for correctness, since prologue assume the block
23949 is larger than the desired alignment.
23950
23951 Optional dynamic check for size and libcall for large
23952 blocks is emitted here too, with -minline-stringops-dynamically.
23953
23954 2) Prologue: copy first few bytes in order to get destination
23955 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
23956 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
23957 copied. We emit either a jump tree on power of two sized
23958 blocks, or a byte loop.
23959
23960 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
23961 with specified algorithm.
23962
23963 4) Epilogue: code copying tail of the block that is too small to be
23964 handled by main body (or up to size guarded by prologue guard).
23965
23966 Misaligned move sequence
23967
23968 1) missaligned move prologue/epilogue containing:
23969 a) Prologue handling small memory blocks and jumping to done_label
23970 (skipped if blocks are known to be large enough)
23971 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
23972 needed by single possibly misaligned move
23973 (skipped if alignment is not needed)
23974 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
23975
23976 2) Zero size guard dispatching to done_label, if needed
23977
23978 3) dispatch to library call, if needed,
23979
23980 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
23981 with specified algorithm. */
23982 bool
23983 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
23984 rtx align_exp, rtx expected_align_exp,
23985 rtx expected_size_exp, rtx min_size_exp,
23986 rtx max_size_exp, rtx probable_max_size_exp,
23987 bool issetmem)
23988 {
23989 rtx destreg;
23990 rtx srcreg = NULL;
23991 rtx label = NULL;
23992 rtx tmp;
23993 rtx jump_around_label = NULL;
23994 HOST_WIDE_INT align = 1;
23995 unsigned HOST_WIDE_INT count = 0;
23996 HOST_WIDE_INT expected_size = -1;
23997 int size_needed = 0, epilogue_size_needed;
23998 int desired_align = 0, align_bytes = 0;
23999 enum stringop_alg alg;
24000 rtx promoted_val = NULL;
24001 rtx vec_promoted_val = NULL;
24002 bool force_loopy_epilogue = false;
24003 int dynamic_check;
24004 bool need_zero_guard = false;
24005 bool noalign;
24006 enum machine_mode move_mode = VOIDmode;
24007 int unroll_factor = 1;
24008 /* TODO: Once value ranges are available, fill in proper data. */
24009 unsigned HOST_WIDE_INT min_size = 0;
24010 unsigned HOST_WIDE_INT max_size = -1;
24011 unsigned HOST_WIDE_INT probable_max_size = -1;
24012 bool misaligned_prologue_used = false;
24013
24014 if (CONST_INT_P (align_exp))
24015 align = INTVAL (align_exp);
24016 /* i386 can do misaligned access on reasonably increased cost. */
24017 if (CONST_INT_P (expected_align_exp)
24018 && INTVAL (expected_align_exp) > align)
24019 align = INTVAL (expected_align_exp);
24020 /* ALIGN is the minimum of destination and source alignment, but we care here
24021 just about destination alignment. */
24022 else if (!issetmem
24023 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24024 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24025
24026 if (CONST_INT_P (count_exp))
24027 min_size = max_size = probable_max_size = count = expected_size
24028 = INTVAL (count_exp);
24029 else
24030 {
24031 if (min_size_exp)
24032 min_size = INTVAL (min_size_exp);
24033 if (max_size_exp)
24034 max_size = INTVAL (max_size_exp);
24035 if (probable_max_size_exp)
24036 probable_max_size = INTVAL (probable_max_size_exp);
24037 if (CONST_INT_P (expected_size_exp) && count == 0)
24038 expected_size = INTVAL (expected_size_exp);
24039 }
24040
24041 /* Make sure we don't need to care about overflow later on. */
24042 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
24043 return false;
24044
24045 /* Step 0: Decide on preferred algorithm, desired alignment and
24046 size of chunks to be copied by main loop. */
24047 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24048 issetmem,
24049 issetmem && val_exp == const0_rtx,
24050 &dynamic_check, &noalign);
24051 if (alg == libcall)
24052 return false;
24053 gcc_assert (alg != no_stringop);
24054
24055 /* For now vector-version of memset is generated only for memory zeroing, as
24056 creating of promoted vector value is very cheap in this case. */
24057 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24058 alg = unrolled_loop;
24059
24060 if (!count)
24061 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24062 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24063 if (!issetmem)
24064 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24065
24066 unroll_factor = 1;
24067 move_mode = word_mode;
24068 switch (alg)
24069 {
24070 case libcall:
24071 case no_stringop:
24072 case last_alg:
24073 gcc_unreachable ();
24074 case loop_1_byte:
24075 need_zero_guard = true;
24076 move_mode = QImode;
24077 break;
24078 case loop:
24079 need_zero_guard = true;
24080 break;
24081 case unrolled_loop:
24082 need_zero_guard = true;
24083 unroll_factor = (TARGET_64BIT ? 4 : 2);
24084 break;
24085 case vector_loop:
24086 need_zero_guard = true;
24087 unroll_factor = 4;
24088 /* Find the widest supported mode. */
24089 move_mode = word_mode;
24090 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24091 != CODE_FOR_nothing)
24092 move_mode = GET_MODE_WIDER_MODE (move_mode);
24093
24094 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24095 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24096 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24097 {
24098 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24099 move_mode = mode_for_vector (word_mode, nunits);
24100 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24101 move_mode = word_mode;
24102 }
24103 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24104 break;
24105 case rep_prefix_8_byte:
24106 move_mode = DImode;
24107 break;
24108 case rep_prefix_4_byte:
24109 move_mode = SImode;
24110 break;
24111 case rep_prefix_1_byte:
24112 move_mode = QImode;
24113 break;
24114 }
24115 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24116 epilogue_size_needed = size_needed;
24117
24118 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24119 if (!TARGET_ALIGN_STRINGOPS || noalign)
24120 align = desired_align;
24121
24122 /* Step 1: Prologue guard. */
24123
24124 /* Alignment code needs count to be in register. */
24125 if (CONST_INT_P (count_exp) && desired_align > align)
24126 {
24127 if (INTVAL (count_exp) > desired_align
24128 && INTVAL (count_exp) > size_needed)
24129 {
24130 align_bytes
24131 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24132 if (align_bytes <= 0)
24133 align_bytes = 0;
24134 else
24135 align_bytes = desired_align - align_bytes;
24136 }
24137 if (align_bytes == 0)
24138 count_exp = force_reg (counter_mode (count_exp), count_exp);
24139 }
24140 gcc_assert (desired_align >= 1 && align >= 1);
24141
24142 /* Misaligned move sequences handle both prologue and epilogue at once.
24143 Default code generation results in a smaller code for large alignments
24144 and also avoids redundant job when sizes are known precisely. */
24145 misaligned_prologue_used
24146 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24147 && MAX (desired_align, epilogue_size_needed) <= 32
24148 && desired_align <= epilogue_size_needed
24149 && ((desired_align > align && !align_bytes)
24150 || (!count && epilogue_size_needed > 1)));
24151
24152 /* Do the cheap promotion to allow better CSE across the
24153 main loop and epilogue (ie one load of the big constant in the
24154 front of all code.
24155 For now the misaligned move sequences do not have fast path
24156 without broadcasting. */
24157 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
24158 {
24159 if (alg == vector_loop)
24160 {
24161 gcc_assert (val_exp == const0_rtx);
24162 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
24163 promoted_val = promote_duplicated_reg_to_size (val_exp,
24164 GET_MODE_SIZE (word_mode),
24165 desired_align, align);
24166 }
24167 else
24168 {
24169 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24170 desired_align, align);
24171 }
24172 }
24173 /* Misaligned move sequences handles both prologues and epilogues at once.
24174 Default code generation results in smaller code for large alignments and
24175 also avoids redundant job when sizes are known precisely. */
24176 if (misaligned_prologue_used)
24177 {
24178 /* Misaligned move prologue handled small blocks by itself. */
24179 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
24180 (dst, src, &destreg, &srcreg,
24181 move_mode, promoted_val, vec_promoted_val,
24182 &count_exp,
24183 &jump_around_label,
24184 desired_align < align
24185 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
24186 desired_align, align, &min_size, dynamic_check, issetmem);
24187 if (!issetmem)
24188 src = change_address (src, BLKmode, srcreg);
24189 dst = change_address (dst, BLKmode, destreg);
24190 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24191 epilogue_size_needed = 0;
24192 if (need_zero_guard && !min_size)
24193 {
24194 /* It is possible that we copied enough so the main loop will not
24195 execute. */
24196 gcc_assert (size_needed > 1);
24197 if (jump_around_label == NULL_RTX)
24198 jump_around_label = gen_label_rtx ();
24199 emit_cmp_and_jump_insns (count_exp,
24200 GEN_INT (size_needed),
24201 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
24202 if (expected_size == -1
24203 || expected_size < (desired_align - align) / 2 + size_needed)
24204 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24205 else
24206 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24207 }
24208 }
24209 /* Ensure that alignment prologue won't copy past end of block. */
24210 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
24211 {
24212 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
24213 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
24214 Make sure it is power of 2. */
24215 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
24216
24217 /* To improve performance of small blocks, we jump around the VAL
24218 promoting mode. This mean that if the promoted VAL is not constant,
24219 we might not use it in the epilogue and have to use byte
24220 loop variant. */
24221 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
24222 force_loopy_epilogue = true;
24223 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24224 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24225 {
24226 /* If main algorithm works on QImode, no epilogue is needed.
24227 For small sizes just don't align anything. */
24228 if (size_needed == 1)
24229 desired_align = align;
24230 else
24231 goto epilogue;
24232 }
24233 else if (!count
24234 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24235 {
24236 label = gen_label_rtx ();
24237 emit_cmp_and_jump_insns (count_exp,
24238 GEN_INT (epilogue_size_needed),
24239 LTU, 0, counter_mode (count_exp), 1, label);
24240 if (expected_size == -1 || expected_size < epilogue_size_needed)
24241 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24242 else
24243 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24244 }
24245 }
24246
24247 /* Emit code to decide on runtime whether library call or inline should be
24248 used. */
24249 if (dynamic_check != -1)
24250 {
24251 if (!issetmem && CONST_INT_P (count_exp))
24252 {
24253 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
24254 {
24255 emit_block_move_via_libcall (dst, src, count_exp, false);
24256 count_exp = const0_rtx;
24257 goto epilogue;
24258 }
24259 }
24260 else
24261 {
24262 rtx hot_label = gen_label_rtx ();
24263 if (jump_around_label == NULL_RTX)
24264 jump_around_label = gen_label_rtx ();
24265 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
24266 LEU, 0, GET_MODE (count_exp), 1, hot_label);
24267 predict_jump (REG_BR_PROB_BASE * 90 / 100);
24268 if (issetmem)
24269 set_storage_via_libcall (dst, count_exp, val_exp, false);
24270 else
24271 emit_block_move_via_libcall (dst, src, count_exp, false);
24272 emit_jump (jump_around_label);
24273 emit_label (hot_label);
24274 }
24275 }
24276
24277 /* Step 2: Alignment prologue. */
24278 /* Do the expensive promotion once we branched off the small blocks. */
24279 if (issetmem && !promoted_val)
24280 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24281 desired_align, align);
24282
24283 if (desired_align > align && !misaligned_prologue_used)
24284 {
24285 if (align_bytes == 0)
24286 {
24287 /* Except for the first move in prologue, we no longer know
24288 constant offset in aliasing info. It don't seems to worth
24289 the pain to maintain it for the first move, so throw away
24290 the info early. */
24291 dst = change_address (dst, BLKmode, destreg);
24292 if (!issetmem)
24293 src = change_address (src, BLKmode, srcreg);
24294 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
24295 promoted_val, vec_promoted_val,
24296 count_exp, align, desired_align,
24297 issetmem);
24298 /* At most desired_align - align bytes are copied. */
24299 if (min_size < (unsigned)(desired_align - align))
24300 min_size = 0;
24301 else
24302 min_size -= desired_align - align;
24303 }
24304 else
24305 {
24306 /* If we know how many bytes need to be stored before dst is
24307 sufficiently aligned, maintain aliasing info accurately. */
24308 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
24309 srcreg,
24310 promoted_val,
24311 vec_promoted_val,
24312 desired_align,
24313 align_bytes,
24314 issetmem);
24315
24316 count_exp = plus_constant (counter_mode (count_exp),
24317 count_exp, -align_bytes);
24318 count -= align_bytes;
24319 min_size -= align_bytes;
24320 max_size -= align_bytes;
24321 }
24322 if (need_zero_guard
24323 && !min_size
24324 && (count < (unsigned HOST_WIDE_INT) size_needed
24325 || (align_bytes == 0
24326 && count < ((unsigned HOST_WIDE_INT) size_needed
24327 + desired_align - align))))
24328 {
24329 /* It is possible that we copied enough so the main loop will not
24330 execute. */
24331 gcc_assert (size_needed > 1);
24332 if (label == NULL_RTX)
24333 label = gen_label_rtx ();
24334 emit_cmp_and_jump_insns (count_exp,
24335 GEN_INT (size_needed),
24336 LTU, 0, counter_mode (count_exp), 1, label);
24337 if (expected_size == -1
24338 || expected_size < (desired_align - align) / 2 + size_needed)
24339 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24340 else
24341 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24342 }
24343 }
24344 if (label && size_needed == 1)
24345 {
24346 emit_label (label);
24347 LABEL_NUSES (label) = 1;
24348 label = NULL;
24349 epilogue_size_needed = 1;
24350 if (issetmem)
24351 promoted_val = val_exp;
24352 }
24353 else if (label == NULL_RTX && !misaligned_prologue_used)
24354 epilogue_size_needed = size_needed;
24355
24356 /* Step 3: Main loop. */
24357
24358 switch (alg)
24359 {
24360 case libcall:
24361 case no_stringop:
24362 case last_alg:
24363 gcc_unreachable ();
24364 case loop_1_byte:
24365 case loop:
24366 case unrolled_loop:
24367 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
24368 count_exp, move_mode, unroll_factor,
24369 expected_size, issetmem);
24370 break;
24371 case vector_loop:
24372 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
24373 vec_promoted_val, count_exp, move_mode,
24374 unroll_factor, expected_size, issetmem);
24375 break;
24376 case rep_prefix_8_byte:
24377 case rep_prefix_4_byte:
24378 case rep_prefix_1_byte:
24379 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
24380 val_exp, count_exp, move_mode, issetmem);
24381 break;
24382 }
24383 /* Adjust properly the offset of src and dest memory for aliasing. */
24384 if (CONST_INT_P (count_exp))
24385 {
24386 if (!issetmem)
24387 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
24388 (count / size_needed) * size_needed);
24389 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
24390 (count / size_needed) * size_needed);
24391 }
24392 else
24393 {
24394 if (!issetmem)
24395 src = change_address (src, BLKmode, srcreg);
24396 dst = change_address (dst, BLKmode, destreg);
24397 }
24398
24399 /* Step 4: Epilogue to copy the remaining bytes. */
24400 epilogue:
24401 if (label)
24402 {
24403 /* When the main loop is done, COUNT_EXP might hold original count,
24404 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
24405 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
24406 bytes. Compensate if needed. */
24407
24408 if (size_needed < epilogue_size_needed)
24409 {
24410 tmp =
24411 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
24412 GEN_INT (size_needed - 1), count_exp, 1,
24413 OPTAB_DIRECT);
24414 if (tmp != count_exp)
24415 emit_move_insn (count_exp, tmp);
24416 }
24417 emit_label (label);
24418 LABEL_NUSES (label) = 1;
24419 }
24420
24421 if (count_exp != const0_rtx && epilogue_size_needed > 1)
24422 {
24423 if (force_loopy_epilogue)
24424 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
24425 epilogue_size_needed);
24426 else
24427 {
24428 if (issetmem)
24429 expand_setmem_epilogue (dst, destreg, promoted_val,
24430 vec_promoted_val, count_exp,
24431 epilogue_size_needed);
24432 else
24433 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
24434 epilogue_size_needed);
24435 }
24436 }
24437 if (jump_around_label)
24438 emit_label (jump_around_label);
24439 return true;
24440 }
24441
24442
24443 /* Expand the appropriate insns for doing strlen if not just doing
24444 repnz; scasb
24445
24446 out = result, initialized with the start address
24447 align_rtx = alignment of the address.
24448 scratch = scratch register, initialized with the startaddress when
24449 not aligned, otherwise undefined
24450
24451 This is just the body. It needs the initializations mentioned above and
24452 some address computing at the end. These things are done in i386.md. */
24453
24454 static void
24455 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
24456 {
24457 int align;
24458 rtx tmp;
24459 rtx align_2_label = NULL_RTX;
24460 rtx align_3_label = NULL_RTX;
24461 rtx align_4_label = gen_label_rtx ();
24462 rtx end_0_label = gen_label_rtx ();
24463 rtx mem;
24464 rtx tmpreg = gen_reg_rtx (SImode);
24465 rtx scratch = gen_reg_rtx (SImode);
24466 rtx cmp;
24467
24468 align = 0;
24469 if (CONST_INT_P (align_rtx))
24470 align = INTVAL (align_rtx);
24471
24472 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
24473
24474 /* Is there a known alignment and is it less than 4? */
24475 if (align < 4)
24476 {
24477 rtx scratch1 = gen_reg_rtx (Pmode);
24478 emit_move_insn (scratch1, out);
24479 /* Is there a known alignment and is it not 2? */
24480 if (align != 2)
24481 {
24482 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
24483 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
24484
24485 /* Leave just the 3 lower bits. */
24486 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
24487 NULL_RTX, 0, OPTAB_WIDEN);
24488
24489 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24490 Pmode, 1, align_4_label);
24491 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
24492 Pmode, 1, align_2_label);
24493 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
24494 Pmode, 1, align_3_label);
24495 }
24496 else
24497 {
24498 /* Since the alignment is 2, we have to check 2 or 0 bytes;
24499 check if is aligned to 4 - byte. */
24500
24501 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
24502 NULL_RTX, 0, OPTAB_WIDEN);
24503
24504 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24505 Pmode, 1, align_4_label);
24506 }
24507
24508 mem = change_address (src, QImode, out);
24509
24510 /* Now compare the bytes. */
24511
24512 /* Compare the first n unaligned byte on a byte per byte basis. */
24513 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
24514 QImode, 1, end_0_label);
24515
24516 /* Increment the address. */
24517 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24518
24519 /* Not needed with an alignment of 2 */
24520 if (align != 2)
24521 {
24522 emit_label (align_2_label);
24523
24524 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24525 end_0_label);
24526
24527 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24528
24529 emit_label (align_3_label);
24530 }
24531
24532 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24533 end_0_label);
24534
24535 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24536 }
24537
24538 /* Generate loop to check 4 bytes at a time. It is not a good idea to
24539 align this loop. It gives only huge programs, but does not help to
24540 speed up. */
24541 emit_label (align_4_label);
24542
24543 mem = change_address (src, SImode, out);
24544 emit_move_insn (scratch, mem);
24545 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
24546
24547 /* This formula yields a nonzero result iff one of the bytes is zero.
24548 This saves three branches inside loop and many cycles. */
24549
24550 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
24551 emit_insn (gen_one_cmplsi2 (scratch, scratch));
24552 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
24553 emit_insn (gen_andsi3 (tmpreg, tmpreg,
24554 gen_int_mode (0x80808080, SImode)));
24555 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
24556 align_4_label);
24557
24558 if (TARGET_CMOVE)
24559 {
24560 rtx reg = gen_reg_rtx (SImode);
24561 rtx reg2 = gen_reg_rtx (Pmode);
24562 emit_move_insn (reg, tmpreg);
24563 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
24564
24565 /* If zero is not in the first two bytes, move two bytes forward. */
24566 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24567 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24568 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24569 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
24570 gen_rtx_IF_THEN_ELSE (SImode, tmp,
24571 reg,
24572 tmpreg)));
24573 /* Emit lea manually to avoid clobbering of flags. */
24574 emit_insn (gen_rtx_SET (SImode, reg2,
24575 gen_rtx_PLUS (Pmode, out, const2_rtx)));
24576
24577 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24578 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24579 emit_insn (gen_rtx_SET (VOIDmode, out,
24580 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
24581 reg2,
24582 out)));
24583 }
24584 else
24585 {
24586 rtx end_2_label = gen_label_rtx ();
24587 /* Is zero in the first two bytes? */
24588
24589 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24590 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24591 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
24592 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
24593 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
24594 pc_rtx);
24595 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
24596 JUMP_LABEL (tmp) = end_2_label;
24597
24598 /* Not in the first two. Move two bytes forward. */
24599 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
24600 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
24601
24602 emit_label (end_2_label);
24603
24604 }
24605
24606 /* Avoid branch in fixing the byte. */
24607 tmpreg = gen_lowpart (QImode, tmpreg);
24608 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
24609 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
24610 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
24611 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
24612
24613 emit_label (end_0_label);
24614 }
24615
24616 /* Expand strlen. */
24617
24618 bool
24619 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
24620 {
24621 rtx addr, scratch1, scratch2, scratch3, scratch4;
24622
24623 /* The generic case of strlen expander is long. Avoid it's
24624 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
24625
24626 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24627 && !TARGET_INLINE_ALL_STRINGOPS
24628 && !optimize_insn_for_size_p ()
24629 && (!CONST_INT_P (align) || INTVAL (align) < 4))
24630 return false;
24631
24632 addr = force_reg (Pmode, XEXP (src, 0));
24633 scratch1 = gen_reg_rtx (Pmode);
24634
24635 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24636 && !optimize_insn_for_size_p ())
24637 {
24638 /* Well it seems that some optimizer does not combine a call like
24639 foo(strlen(bar), strlen(bar));
24640 when the move and the subtraction is done here. It does calculate
24641 the length just once when these instructions are done inside of
24642 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
24643 often used and I use one fewer register for the lifetime of
24644 output_strlen_unroll() this is better. */
24645
24646 emit_move_insn (out, addr);
24647
24648 ix86_expand_strlensi_unroll_1 (out, src, align);
24649
24650 /* strlensi_unroll_1 returns the address of the zero at the end of
24651 the string, like memchr(), so compute the length by subtracting
24652 the start address. */
24653 emit_insn (ix86_gen_sub3 (out, out, addr));
24654 }
24655 else
24656 {
24657 rtx unspec;
24658
24659 /* Can't use this if the user has appropriated eax, ecx, or edi. */
24660 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
24661 return false;
24662
24663 scratch2 = gen_reg_rtx (Pmode);
24664 scratch3 = gen_reg_rtx (Pmode);
24665 scratch4 = force_reg (Pmode, constm1_rtx);
24666
24667 emit_move_insn (scratch3, addr);
24668 eoschar = force_reg (QImode, eoschar);
24669
24670 src = replace_equiv_address_nv (src, scratch3);
24671
24672 /* If .md starts supporting :P, this can be done in .md. */
24673 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
24674 scratch4), UNSPEC_SCAS);
24675 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
24676 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
24677 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
24678 }
24679 return true;
24680 }
24681
24682 /* For given symbol (function) construct code to compute address of it's PLT
24683 entry in large x86-64 PIC model. */
24684 static rtx
24685 construct_plt_address (rtx symbol)
24686 {
24687 rtx tmp, unspec;
24688
24689 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
24690 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
24691 gcc_assert (Pmode == DImode);
24692
24693 tmp = gen_reg_rtx (Pmode);
24694 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
24695
24696 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
24697 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
24698 return tmp;
24699 }
24700
24701 rtx
24702 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
24703 rtx callarg2,
24704 rtx pop, bool sibcall)
24705 {
24706 unsigned int const cregs_size
24707 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
24708 rtx vec[3 + cregs_size];
24709 rtx use = NULL, call;
24710 unsigned int vec_len = 0;
24711
24712 if (pop == const0_rtx)
24713 pop = NULL;
24714 gcc_assert (!TARGET_64BIT || !pop);
24715
24716 if (TARGET_MACHO && !TARGET_64BIT)
24717 {
24718 #if TARGET_MACHO
24719 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
24720 fnaddr = machopic_indirect_call_target (fnaddr);
24721 #endif
24722 }
24723 else
24724 {
24725 /* Static functions and indirect calls don't need the pic register. */
24726 if (flag_pic
24727 && (!TARGET_64BIT
24728 || (ix86_cmodel == CM_LARGE_PIC
24729 && DEFAULT_ABI != MS_ABI))
24730 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24731 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
24732 use_reg (&use, pic_offset_table_rtx);
24733 }
24734
24735 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
24736 {
24737 rtx al = gen_rtx_REG (QImode, AX_REG);
24738 emit_move_insn (al, callarg2);
24739 use_reg (&use, al);
24740 }
24741
24742 if (ix86_cmodel == CM_LARGE_PIC
24743 && !TARGET_PECOFF
24744 && MEM_P (fnaddr)
24745 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24746 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
24747 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
24748 else if (sibcall
24749 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
24750 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
24751 {
24752 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
24753 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
24754 }
24755
24756 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
24757 if (retval)
24758 call = gen_rtx_SET (VOIDmode, retval, call);
24759 vec[vec_len++] = call;
24760
24761 if (pop)
24762 {
24763 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
24764 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
24765 vec[vec_len++] = pop;
24766 }
24767
24768 if (TARGET_64BIT_MS_ABI
24769 && (!callarg2 || INTVAL (callarg2) != -2))
24770 {
24771 unsigned i;
24772
24773 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
24774 UNSPEC_MS_TO_SYSV_CALL);
24775
24776 for (i = 0; i < cregs_size; i++)
24777 {
24778 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
24779 enum machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
24780
24781 vec[vec_len++]
24782 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (mode, regno));
24783 }
24784 }
24785
24786 if (vec_len > 1)
24787 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
24788 call = emit_call_insn (call);
24789 if (use)
24790 CALL_INSN_FUNCTION_USAGE (call) = use;
24791
24792 return call;
24793 }
24794
24795 /* Output the assembly for a call instruction. */
24796
24797 const char *
24798 ix86_output_call_insn (rtx insn, rtx call_op)
24799 {
24800 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
24801 bool seh_nop_p = false;
24802 const char *xasm;
24803
24804 if (SIBLING_CALL_P (insn))
24805 {
24806 if (direct_p)
24807 xasm = "jmp\t%P0";
24808 /* SEH epilogue detection requires the indirect branch case
24809 to include REX.W. */
24810 else if (TARGET_SEH)
24811 xasm = "rex.W jmp %A0";
24812 else
24813 xasm = "jmp\t%A0";
24814
24815 output_asm_insn (xasm, &call_op);
24816 return "";
24817 }
24818
24819 /* SEH unwinding can require an extra nop to be emitted in several
24820 circumstances. Determine if we have one of those. */
24821 if (TARGET_SEH)
24822 {
24823 rtx i;
24824
24825 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
24826 {
24827 /* If we get to another real insn, we don't need the nop. */
24828 if (INSN_P (i))
24829 break;
24830
24831 /* If we get to the epilogue note, prevent a catch region from
24832 being adjacent to the standard epilogue sequence. If non-
24833 call-exceptions, we'll have done this during epilogue emission. */
24834 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
24835 && !flag_non_call_exceptions
24836 && !can_throw_internal (insn))
24837 {
24838 seh_nop_p = true;
24839 break;
24840 }
24841 }
24842
24843 /* If we didn't find a real insn following the call, prevent the
24844 unwinder from looking into the next function. */
24845 if (i == NULL)
24846 seh_nop_p = true;
24847 }
24848
24849 if (direct_p)
24850 xasm = "call\t%P0";
24851 else
24852 xasm = "call\t%A0";
24853
24854 output_asm_insn (xasm, &call_op);
24855
24856 if (seh_nop_p)
24857 return "nop";
24858
24859 return "";
24860 }
24861 \f
24862 /* Clear stack slot assignments remembered from previous functions.
24863 This is called from INIT_EXPANDERS once before RTL is emitted for each
24864 function. */
24865
24866 static struct machine_function *
24867 ix86_init_machine_status (void)
24868 {
24869 struct machine_function *f;
24870
24871 f = ggc_alloc_cleared_machine_function ();
24872 f->use_fast_prologue_epilogue_nregs = -1;
24873 f->call_abi = ix86_abi;
24874
24875 return f;
24876 }
24877
24878 /* Return a MEM corresponding to a stack slot with mode MODE.
24879 Allocate a new slot if necessary.
24880
24881 The RTL for a function can have several slots available: N is
24882 which slot to use. */
24883
24884 rtx
24885 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
24886 {
24887 struct stack_local_entry *s;
24888
24889 gcc_assert (n < MAX_386_STACK_LOCALS);
24890
24891 for (s = ix86_stack_locals; s; s = s->next)
24892 if (s->mode == mode && s->n == n)
24893 return validize_mem (copy_rtx (s->rtl));
24894
24895 s = ggc_alloc_stack_local_entry ();
24896 s->n = n;
24897 s->mode = mode;
24898 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
24899
24900 s->next = ix86_stack_locals;
24901 ix86_stack_locals = s;
24902 return validize_mem (s->rtl);
24903 }
24904
24905 static void
24906 ix86_instantiate_decls (void)
24907 {
24908 struct stack_local_entry *s;
24909
24910 for (s = ix86_stack_locals; s; s = s->next)
24911 if (s->rtl != NULL_RTX)
24912 instantiate_decl_rtl (s->rtl);
24913 }
24914 \f
24915 /* Check whether x86 address PARTS is a pc-relative address. */
24916
24917 static bool
24918 rip_relative_addr_p (struct ix86_address *parts)
24919 {
24920 rtx base, index, disp;
24921
24922 base = parts->base;
24923 index = parts->index;
24924 disp = parts->disp;
24925
24926 if (disp && !base && !index)
24927 {
24928 if (TARGET_64BIT)
24929 {
24930 rtx symbol = disp;
24931
24932 if (GET_CODE (disp) == CONST)
24933 symbol = XEXP (disp, 0);
24934 if (GET_CODE (symbol) == PLUS
24935 && CONST_INT_P (XEXP (symbol, 1)))
24936 symbol = XEXP (symbol, 0);
24937
24938 if (GET_CODE (symbol) == LABEL_REF
24939 || (GET_CODE (symbol) == SYMBOL_REF
24940 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
24941 || (GET_CODE (symbol) == UNSPEC
24942 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
24943 || XINT (symbol, 1) == UNSPEC_PCREL
24944 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
24945 return true;
24946 }
24947 }
24948 return false;
24949 }
24950
24951 /* Calculate the length of the memory address in the instruction encoding.
24952 Includes addr32 prefix, does not include the one-byte modrm, opcode,
24953 or other prefixes. We never generate addr32 prefix for LEA insn. */
24954
24955 int
24956 memory_address_length (rtx addr, bool lea)
24957 {
24958 struct ix86_address parts;
24959 rtx base, index, disp;
24960 int len;
24961 int ok;
24962
24963 if (GET_CODE (addr) == PRE_DEC
24964 || GET_CODE (addr) == POST_INC
24965 || GET_CODE (addr) == PRE_MODIFY
24966 || GET_CODE (addr) == POST_MODIFY)
24967 return 0;
24968
24969 ok = ix86_decompose_address (addr, &parts);
24970 gcc_assert (ok);
24971
24972 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
24973
24974 /* If this is not LEA instruction, add the length of addr32 prefix. */
24975 if (TARGET_64BIT && !lea
24976 && (SImode_address_operand (addr, VOIDmode)
24977 || (parts.base && GET_MODE (parts.base) == SImode)
24978 || (parts.index && GET_MODE (parts.index) == SImode)))
24979 len++;
24980
24981 base = parts.base;
24982 index = parts.index;
24983 disp = parts.disp;
24984
24985 if (base && GET_CODE (base) == SUBREG)
24986 base = SUBREG_REG (base);
24987 if (index && GET_CODE (index) == SUBREG)
24988 index = SUBREG_REG (index);
24989
24990 gcc_assert (base == NULL_RTX || REG_P (base));
24991 gcc_assert (index == NULL_RTX || REG_P (index));
24992
24993 /* Rule of thumb:
24994 - esp as the base always wants an index,
24995 - ebp as the base always wants a displacement,
24996 - r12 as the base always wants an index,
24997 - r13 as the base always wants a displacement. */
24998
24999 /* Register Indirect. */
25000 if (base && !index && !disp)
25001 {
25002 /* esp (for its index) and ebp (for its displacement) need
25003 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25004 code. */
25005 if (base == arg_pointer_rtx
25006 || base == frame_pointer_rtx
25007 || REGNO (base) == SP_REG
25008 || REGNO (base) == BP_REG
25009 || REGNO (base) == R12_REG
25010 || REGNO (base) == R13_REG)
25011 len++;
25012 }
25013
25014 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25015 is not disp32, but disp32(%rip), so for disp32
25016 SIB byte is needed, unless print_operand_address
25017 optimizes it into disp32(%rip) or (%rip) is implied
25018 by UNSPEC. */
25019 else if (disp && !base && !index)
25020 {
25021 len += 4;
25022 if (rip_relative_addr_p (&parts))
25023 len++;
25024 }
25025 else
25026 {
25027 /* Find the length of the displacement constant. */
25028 if (disp)
25029 {
25030 if (base && satisfies_constraint_K (disp))
25031 len += 1;
25032 else
25033 len += 4;
25034 }
25035 /* ebp always wants a displacement. Similarly r13. */
25036 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25037 len++;
25038
25039 /* An index requires the two-byte modrm form.... */
25040 if (index
25041 /* ...like esp (or r12), which always wants an index. */
25042 || base == arg_pointer_rtx
25043 || base == frame_pointer_rtx
25044 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25045 len++;
25046 }
25047
25048 return len;
25049 }
25050
25051 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25052 is set, expect that insn have 8bit immediate alternative. */
25053 int
25054 ix86_attr_length_immediate_default (rtx insn, bool shortform)
25055 {
25056 int len = 0;
25057 int i;
25058 extract_insn_cached (insn);
25059 for (i = recog_data.n_operands - 1; i >= 0; --i)
25060 if (CONSTANT_P (recog_data.operand[i]))
25061 {
25062 enum attr_mode mode = get_attr_mode (insn);
25063
25064 gcc_assert (!len);
25065 if (shortform && CONST_INT_P (recog_data.operand[i]))
25066 {
25067 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25068 switch (mode)
25069 {
25070 case MODE_QI:
25071 len = 1;
25072 continue;
25073 case MODE_HI:
25074 ival = trunc_int_for_mode (ival, HImode);
25075 break;
25076 case MODE_SI:
25077 ival = trunc_int_for_mode (ival, SImode);
25078 break;
25079 default:
25080 break;
25081 }
25082 if (IN_RANGE (ival, -128, 127))
25083 {
25084 len = 1;
25085 continue;
25086 }
25087 }
25088 switch (mode)
25089 {
25090 case MODE_QI:
25091 len = 1;
25092 break;
25093 case MODE_HI:
25094 len = 2;
25095 break;
25096 case MODE_SI:
25097 len = 4;
25098 break;
25099 /* Immediates for DImode instructions are encoded
25100 as 32bit sign extended values. */
25101 case MODE_DI:
25102 len = 4;
25103 break;
25104 default:
25105 fatal_insn ("unknown insn mode", insn);
25106 }
25107 }
25108 return len;
25109 }
25110
25111 /* Compute default value for "length_address" attribute. */
25112 int
25113 ix86_attr_length_address_default (rtx insn)
25114 {
25115 int i;
25116
25117 if (get_attr_type (insn) == TYPE_LEA)
25118 {
25119 rtx set = PATTERN (insn), addr;
25120
25121 if (GET_CODE (set) == PARALLEL)
25122 set = XVECEXP (set, 0, 0);
25123
25124 gcc_assert (GET_CODE (set) == SET);
25125
25126 addr = SET_SRC (set);
25127
25128 return memory_address_length (addr, true);
25129 }
25130
25131 extract_insn_cached (insn);
25132 for (i = recog_data.n_operands - 1; i >= 0; --i)
25133 if (MEM_P (recog_data.operand[i]))
25134 {
25135 constrain_operands_cached (reload_completed);
25136 if (which_alternative != -1)
25137 {
25138 const char *constraints = recog_data.constraints[i];
25139 int alt = which_alternative;
25140
25141 while (*constraints == '=' || *constraints == '+')
25142 constraints++;
25143 while (alt-- > 0)
25144 while (*constraints++ != ',')
25145 ;
25146 /* Skip ignored operands. */
25147 if (*constraints == 'X')
25148 continue;
25149 }
25150 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
25151 }
25152 return 0;
25153 }
25154
25155 /* Compute default value for "length_vex" attribute. It includes
25156 2 or 3 byte VEX prefix and 1 opcode byte. */
25157
25158 int
25159 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
25160 {
25161 int i;
25162
25163 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
25164 byte VEX prefix. */
25165 if (!has_0f_opcode || has_vex_w)
25166 return 3 + 1;
25167
25168 /* We can always use 2 byte VEX prefix in 32bit. */
25169 if (!TARGET_64BIT)
25170 return 2 + 1;
25171
25172 extract_insn_cached (insn);
25173
25174 for (i = recog_data.n_operands - 1; i >= 0; --i)
25175 if (REG_P (recog_data.operand[i]))
25176 {
25177 /* REX.W bit uses 3 byte VEX prefix. */
25178 if (GET_MODE (recog_data.operand[i]) == DImode
25179 && GENERAL_REG_P (recog_data.operand[i]))
25180 return 3 + 1;
25181 }
25182 else
25183 {
25184 /* REX.X or REX.B bits use 3 byte VEX prefix. */
25185 if (MEM_P (recog_data.operand[i])
25186 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
25187 return 3 + 1;
25188 }
25189
25190 return 2 + 1;
25191 }
25192 \f
25193 /* Return the maximum number of instructions a cpu can issue. */
25194
25195 static int
25196 ix86_issue_rate (void)
25197 {
25198 switch (ix86_tune)
25199 {
25200 case PROCESSOR_PENTIUM:
25201 case PROCESSOR_BONNELL:
25202 case PROCESSOR_SILVERMONT:
25203 case PROCESSOR_K6:
25204 case PROCESSOR_BTVER2:
25205 case PROCESSOR_PENTIUM4:
25206 case PROCESSOR_NOCONA:
25207 return 2;
25208
25209 case PROCESSOR_PENTIUMPRO:
25210 case PROCESSOR_ATHLON:
25211 case PROCESSOR_K8:
25212 case PROCESSOR_AMDFAM10:
25213 case PROCESSOR_GENERIC:
25214 case PROCESSOR_BTVER1:
25215 return 3;
25216
25217 case PROCESSOR_BDVER1:
25218 case PROCESSOR_BDVER2:
25219 case PROCESSOR_BDVER3:
25220 case PROCESSOR_BDVER4:
25221 case PROCESSOR_CORE2:
25222 case PROCESSOR_NEHALEM:
25223 case PROCESSOR_SANDYBRIDGE:
25224 case PROCESSOR_HASWELL:
25225 return 4;
25226
25227 default:
25228 return 1;
25229 }
25230 }
25231
25232 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
25233 by DEP_INSN and nothing set by DEP_INSN. */
25234
25235 static bool
25236 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
25237 {
25238 rtx set, set2;
25239
25240 /* Simplify the test for uninteresting insns. */
25241 if (insn_type != TYPE_SETCC
25242 && insn_type != TYPE_ICMOV
25243 && insn_type != TYPE_FCMOV
25244 && insn_type != TYPE_IBR)
25245 return false;
25246
25247 if ((set = single_set (dep_insn)) != 0)
25248 {
25249 set = SET_DEST (set);
25250 set2 = NULL_RTX;
25251 }
25252 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
25253 && XVECLEN (PATTERN (dep_insn), 0) == 2
25254 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
25255 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
25256 {
25257 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25258 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25259 }
25260 else
25261 return false;
25262
25263 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
25264 return false;
25265
25266 /* This test is true if the dependent insn reads the flags but
25267 not any other potentially set register. */
25268 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
25269 return false;
25270
25271 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
25272 return false;
25273
25274 return true;
25275 }
25276
25277 /* Return true iff USE_INSN has a memory address with operands set by
25278 SET_INSN. */
25279
25280 bool
25281 ix86_agi_dependent (rtx set_insn, rtx use_insn)
25282 {
25283 int i;
25284 extract_insn_cached (use_insn);
25285 for (i = recog_data.n_operands - 1; i >= 0; --i)
25286 if (MEM_P (recog_data.operand[i]))
25287 {
25288 rtx addr = XEXP (recog_data.operand[i], 0);
25289 return modified_in_p (addr, set_insn) != 0;
25290 }
25291 return false;
25292 }
25293
25294 /* Helper function for exact_store_load_dependency.
25295 Return true if addr is found in insn. */
25296 static bool
25297 exact_dependency_1 (rtx addr, rtx insn)
25298 {
25299 enum rtx_code code;
25300 const char *format_ptr;
25301 int i, j;
25302
25303 code = GET_CODE (insn);
25304 switch (code)
25305 {
25306 case MEM:
25307 if (rtx_equal_p (addr, insn))
25308 return true;
25309 break;
25310 case REG:
25311 CASE_CONST_ANY:
25312 case SYMBOL_REF:
25313 case CODE_LABEL:
25314 case PC:
25315 case CC0:
25316 case EXPR_LIST:
25317 return false;
25318 default:
25319 break;
25320 }
25321
25322 format_ptr = GET_RTX_FORMAT (code);
25323 for (i = 0; i < GET_RTX_LENGTH (code); i++)
25324 {
25325 switch (*format_ptr++)
25326 {
25327 case 'e':
25328 if (exact_dependency_1 (addr, XEXP (insn, i)))
25329 return true;
25330 break;
25331 case 'E':
25332 for (j = 0; j < XVECLEN (insn, i); j++)
25333 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
25334 return true;
25335 break;
25336 }
25337 }
25338 return false;
25339 }
25340
25341 /* Return true if there exists exact dependency for store & load, i.e.
25342 the same memory address is used in them. */
25343 static bool
25344 exact_store_load_dependency (rtx store, rtx load)
25345 {
25346 rtx set1, set2;
25347
25348 set1 = single_set (store);
25349 if (!set1)
25350 return false;
25351 if (!MEM_P (SET_DEST (set1)))
25352 return false;
25353 set2 = single_set (load);
25354 if (!set2)
25355 return false;
25356 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
25357 return true;
25358 return false;
25359 }
25360
25361 static int
25362 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
25363 {
25364 enum attr_type insn_type, dep_insn_type;
25365 enum attr_memory memory;
25366 rtx set, set2;
25367 int dep_insn_code_number;
25368
25369 /* Anti and output dependencies have zero cost on all CPUs. */
25370 if (REG_NOTE_KIND (link) != 0)
25371 return 0;
25372
25373 dep_insn_code_number = recog_memoized (dep_insn);
25374
25375 /* If we can't recognize the insns, we can't really do anything. */
25376 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
25377 return cost;
25378
25379 insn_type = get_attr_type (insn);
25380 dep_insn_type = get_attr_type (dep_insn);
25381
25382 switch (ix86_tune)
25383 {
25384 case PROCESSOR_PENTIUM:
25385 /* Address Generation Interlock adds a cycle of latency. */
25386 if (insn_type == TYPE_LEA)
25387 {
25388 rtx addr = PATTERN (insn);
25389
25390 if (GET_CODE (addr) == PARALLEL)
25391 addr = XVECEXP (addr, 0, 0);
25392
25393 gcc_assert (GET_CODE (addr) == SET);
25394
25395 addr = SET_SRC (addr);
25396 if (modified_in_p (addr, dep_insn))
25397 cost += 1;
25398 }
25399 else if (ix86_agi_dependent (dep_insn, insn))
25400 cost += 1;
25401
25402 /* ??? Compares pair with jump/setcc. */
25403 if (ix86_flags_dependent (insn, dep_insn, insn_type))
25404 cost = 0;
25405
25406 /* Floating point stores require value to be ready one cycle earlier. */
25407 if (insn_type == TYPE_FMOV
25408 && get_attr_memory (insn) == MEMORY_STORE
25409 && !ix86_agi_dependent (dep_insn, insn))
25410 cost += 1;
25411 break;
25412
25413 case PROCESSOR_PENTIUMPRO:
25414 memory = get_attr_memory (insn);
25415
25416 /* INT->FP conversion is expensive. */
25417 if (get_attr_fp_int_src (dep_insn))
25418 cost += 5;
25419
25420 /* There is one cycle extra latency between an FP op and a store. */
25421 if (insn_type == TYPE_FMOV
25422 && (set = single_set (dep_insn)) != NULL_RTX
25423 && (set2 = single_set (insn)) != NULL_RTX
25424 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
25425 && MEM_P (SET_DEST (set2)))
25426 cost += 1;
25427
25428 /* Show ability of reorder buffer to hide latency of load by executing
25429 in parallel with previous instruction in case
25430 previous instruction is not needed to compute the address. */
25431 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25432 && !ix86_agi_dependent (dep_insn, insn))
25433 {
25434 /* Claim moves to take one cycle, as core can issue one load
25435 at time and the next load can start cycle later. */
25436 if (dep_insn_type == TYPE_IMOV
25437 || dep_insn_type == TYPE_FMOV)
25438 cost = 1;
25439 else if (cost > 1)
25440 cost--;
25441 }
25442 break;
25443
25444 case PROCESSOR_K6:
25445 memory = get_attr_memory (insn);
25446
25447 /* The esp dependency is resolved before the instruction is really
25448 finished. */
25449 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25450 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25451 return 1;
25452
25453 /* INT->FP conversion is expensive. */
25454 if (get_attr_fp_int_src (dep_insn))
25455 cost += 5;
25456
25457 /* Show ability of reorder buffer to hide latency of load by executing
25458 in parallel with previous instruction in case
25459 previous instruction is not needed to compute the address. */
25460 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25461 && !ix86_agi_dependent (dep_insn, insn))
25462 {
25463 /* Claim moves to take one cycle, as core can issue one load
25464 at time and the next load can start cycle later. */
25465 if (dep_insn_type == TYPE_IMOV
25466 || dep_insn_type == TYPE_FMOV)
25467 cost = 1;
25468 else if (cost > 2)
25469 cost -= 2;
25470 else
25471 cost = 1;
25472 }
25473 break;
25474
25475 case PROCESSOR_ATHLON:
25476 case PROCESSOR_K8:
25477 case PROCESSOR_AMDFAM10:
25478 case PROCESSOR_BDVER1:
25479 case PROCESSOR_BDVER2:
25480 case PROCESSOR_BDVER3:
25481 case PROCESSOR_BDVER4:
25482 case PROCESSOR_BTVER1:
25483 case PROCESSOR_BTVER2:
25484 case PROCESSOR_GENERIC:
25485 memory = get_attr_memory (insn);
25486
25487 /* Stack engine allows to execute push&pop instructions in parall. */
25488 if (((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25489 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25490 && (ix86_tune != PROCESSOR_ATHLON && ix86_tune != PROCESSOR_K8))
25491 return 0;
25492
25493 /* Show ability of reorder buffer to hide latency of load by executing
25494 in parallel with previous instruction in case
25495 previous instruction is not needed to compute the address. */
25496 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25497 && !ix86_agi_dependent (dep_insn, insn))
25498 {
25499 enum attr_unit unit = get_attr_unit (insn);
25500 int loadcost = 3;
25501
25502 /* Because of the difference between the length of integer and
25503 floating unit pipeline preparation stages, the memory operands
25504 for floating point are cheaper.
25505
25506 ??? For Athlon it the difference is most probably 2. */
25507 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
25508 loadcost = 3;
25509 else
25510 loadcost = TARGET_ATHLON ? 2 : 0;
25511
25512 if (cost >= loadcost)
25513 cost -= loadcost;
25514 else
25515 cost = 0;
25516 }
25517 break;
25518
25519 case PROCESSOR_CORE2:
25520 case PROCESSOR_NEHALEM:
25521 case PROCESSOR_SANDYBRIDGE:
25522 case PROCESSOR_HASWELL:
25523 memory = get_attr_memory (insn);
25524
25525 /* Stack engine allows to execute push&pop instructions in parall. */
25526 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25527 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25528 return 0;
25529
25530 /* Show ability of reorder buffer to hide latency of load by executing
25531 in parallel with previous instruction in case
25532 previous instruction is not needed to compute the address. */
25533 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25534 && !ix86_agi_dependent (dep_insn, insn))
25535 {
25536 if (cost >= 4)
25537 cost -= 4;
25538 else
25539 cost = 0;
25540 }
25541 break;
25542
25543 case PROCESSOR_SILVERMONT:
25544 if (!reload_completed)
25545 return cost;
25546
25547 /* Increase cost of integer loads. */
25548 memory = get_attr_memory (dep_insn);
25549 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25550 {
25551 enum attr_unit unit = get_attr_unit (dep_insn);
25552 if (unit == UNIT_INTEGER && cost == 1)
25553 {
25554 if (memory == MEMORY_LOAD)
25555 cost = 3;
25556 else
25557 {
25558 /* Increase cost of ld/st for short int types only
25559 because of store forwarding issue. */
25560 rtx set = single_set (dep_insn);
25561 if (set && (GET_MODE (SET_DEST (set)) == QImode
25562 || GET_MODE (SET_DEST (set)) == HImode))
25563 {
25564 /* Increase cost of store/load insn if exact
25565 dependence exists and it is load insn. */
25566 enum attr_memory insn_memory = get_attr_memory (insn);
25567 if (insn_memory == MEMORY_LOAD
25568 && exact_store_load_dependency (dep_insn, insn))
25569 cost = 3;
25570 }
25571 }
25572 }
25573 }
25574
25575 default:
25576 break;
25577 }
25578
25579 return cost;
25580 }
25581
25582 /* How many alternative schedules to try. This should be as wide as the
25583 scheduling freedom in the DFA, but no wider. Making this value too
25584 large results extra work for the scheduler. */
25585
25586 static int
25587 ia32_multipass_dfa_lookahead (void)
25588 {
25589 switch (ix86_tune)
25590 {
25591 case PROCESSOR_PENTIUM:
25592 return 2;
25593
25594 case PROCESSOR_PENTIUMPRO:
25595 case PROCESSOR_K6:
25596 return 1;
25597
25598 case PROCESSOR_BDVER1:
25599 case PROCESSOR_BDVER2:
25600 case PROCESSOR_BDVER3:
25601 case PROCESSOR_BDVER4:
25602 /* We use lookahead value 4 for BD both before and after reload
25603 schedules. Plan is to have value 8 included for O3. */
25604 return 4;
25605
25606 case PROCESSOR_CORE2:
25607 case PROCESSOR_NEHALEM:
25608 case PROCESSOR_SANDYBRIDGE:
25609 case PROCESSOR_HASWELL:
25610 case PROCESSOR_BONNELL:
25611 case PROCESSOR_SILVERMONT:
25612 /* Generally, we want haifa-sched:max_issue() to look ahead as far
25613 as many instructions can be executed on a cycle, i.e.,
25614 issue_rate. I wonder why tuning for many CPUs does not do this. */
25615 if (reload_completed)
25616 return ix86_issue_rate ();
25617 /* Don't use lookahead for pre-reload schedule to save compile time. */
25618 return 0;
25619
25620 default:
25621 return 0;
25622 }
25623 }
25624
25625 /* Return true if target platform supports macro-fusion. */
25626
25627 static bool
25628 ix86_macro_fusion_p ()
25629 {
25630 return TARGET_FUSE_CMP_AND_BRANCH;
25631 }
25632
25633 /* Check whether current microarchitecture support macro fusion
25634 for insn pair "CONDGEN + CONDJMP". Refer to
25635 "Intel Architectures Optimization Reference Manual". */
25636
25637 static bool
25638 ix86_macro_fusion_pair_p (rtx condgen, rtx condjmp)
25639 {
25640 rtx src, dest;
25641 rtx single_set = single_set (condgen);
25642 enum rtx_code ccode;
25643 rtx compare_set = NULL_RTX, test_if, cond;
25644 rtx alu_set = NULL_RTX, addr = NULL_RTX;
25645
25646 if (get_attr_type (condgen) != TYPE_TEST
25647 && get_attr_type (condgen) != TYPE_ICMP
25648 && get_attr_type (condgen) != TYPE_INCDEC
25649 && get_attr_type (condgen) != TYPE_ALU)
25650 return false;
25651
25652 if (single_set == NULL_RTX
25653 && !TARGET_FUSE_ALU_AND_BRANCH)
25654 return false;
25655
25656 if (single_set != NULL_RTX)
25657 compare_set = single_set;
25658 else
25659 {
25660 int i;
25661 rtx pat = PATTERN (condgen);
25662 for (i = 0; i < XVECLEN (pat, 0); i++)
25663 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
25664 {
25665 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
25666 if (GET_CODE (set_src) == COMPARE)
25667 compare_set = XVECEXP (pat, 0, i);
25668 else
25669 alu_set = XVECEXP (pat, 0, i);
25670 }
25671 }
25672 if (compare_set == NULL_RTX)
25673 return false;
25674 src = SET_SRC (compare_set);
25675 if (GET_CODE (src) != COMPARE)
25676 return false;
25677
25678 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
25679 supported. */
25680 if ((MEM_P (XEXP (src, 0))
25681 && CONST_INT_P (XEXP (src, 1)))
25682 || (MEM_P (XEXP (src, 1))
25683 && CONST_INT_P (XEXP (src, 0))))
25684 return false;
25685
25686 /* No fusion for RIP-relative address. */
25687 if (MEM_P (XEXP (src, 0)))
25688 addr = XEXP (XEXP (src, 0), 0);
25689 else if (MEM_P (XEXP (src, 1)))
25690 addr = XEXP (XEXP (src, 1), 0);
25691
25692 if (addr) {
25693 ix86_address parts;
25694 int ok = ix86_decompose_address (addr, &parts);
25695 gcc_assert (ok);
25696
25697 if (rip_relative_addr_p (&parts))
25698 return false;
25699 }
25700
25701 test_if = SET_SRC (pc_set (condjmp));
25702 cond = XEXP (test_if, 0);
25703 ccode = GET_CODE (cond);
25704 /* Check whether conditional jump use Sign or Overflow Flags. */
25705 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
25706 && (ccode == GE
25707 || ccode == GT
25708 || ccode == LE
25709 || ccode == LT))
25710 return false;
25711
25712 /* Return true for TYPE_TEST and TYPE_ICMP. */
25713 if (get_attr_type (condgen) == TYPE_TEST
25714 || get_attr_type (condgen) == TYPE_ICMP)
25715 return true;
25716
25717 /* The following is the case that macro-fusion for alu + jmp. */
25718 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
25719 return false;
25720
25721 /* No fusion for alu op with memory destination operand. */
25722 dest = SET_DEST (alu_set);
25723 if (MEM_P (dest))
25724 return false;
25725
25726 /* Macro-fusion for inc/dec + unsigned conditional jump is not
25727 supported. */
25728 if (get_attr_type (condgen) == TYPE_INCDEC
25729 && (ccode == GEU
25730 || ccode == GTU
25731 || ccode == LEU
25732 || ccode == LTU))
25733 return false;
25734
25735 return true;
25736 }
25737
25738 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
25739 execution. It is applied if
25740 (1) IMUL instruction is on the top of list;
25741 (2) There exists the only producer of independent IMUL instruction in
25742 ready list.
25743 Return index of IMUL producer if it was found and -1 otherwise. */
25744 static int
25745 do_reorder_for_imul (rtx *ready, int n_ready)
25746 {
25747 rtx insn, set, insn1, insn2;
25748 sd_iterator_def sd_it;
25749 dep_t dep;
25750 int index = -1;
25751 int i;
25752
25753 if (ix86_tune != PROCESSOR_BONNELL)
25754 return index;
25755
25756 /* Check that IMUL instruction is on the top of ready list. */
25757 insn = ready[n_ready - 1];
25758 set = single_set (insn);
25759 if (!set)
25760 return index;
25761 if (!(GET_CODE (SET_SRC (set)) == MULT
25762 && GET_MODE (SET_SRC (set)) == SImode))
25763 return index;
25764
25765 /* Search for producer of independent IMUL instruction. */
25766 for (i = n_ready - 2; i >= 0; i--)
25767 {
25768 insn = ready[i];
25769 if (!NONDEBUG_INSN_P (insn))
25770 continue;
25771 /* Skip IMUL instruction. */
25772 insn2 = PATTERN (insn);
25773 if (GET_CODE (insn2) == PARALLEL)
25774 insn2 = XVECEXP (insn2, 0, 0);
25775 if (GET_CODE (insn2) == SET
25776 && GET_CODE (SET_SRC (insn2)) == MULT
25777 && GET_MODE (SET_SRC (insn2)) == SImode)
25778 continue;
25779
25780 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
25781 {
25782 rtx con;
25783 con = DEP_CON (dep);
25784 if (!NONDEBUG_INSN_P (con))
25785 continue;
25786 insn1 = PATTERN (con);
25787 if (GET_CODE (insn1) == PARALLEL)
25788 insn1 = XVECEXP (insn1, 0, 0);
25789
25790 if (GET_CODE (insn1) == SET
25791 && GET_CODE (SET_SRC (insn1)) == MULT
25792 && GET_MODE (SET_SRC (insn1)) == SImode)
25793 {
25794 sd_iterator_def sd_it1;
25795 dep_t dep1;
25796 /* Check if there is no other dependee for IMUL. */
25797 index = i;
25798 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
25799 {
25800 rtx pro;
25801 pro = DEP_PRO (dep1);
25802 if (!NONDEBUG_INSN_P (pro))
25803 continue;
25804 if (pro != insn)
25805 index = -1;
25806 }
25807 if (index >= 0)
25808 break;
25809 }
25810 }
25811 if (index >= 0)
25812 break;
25813 }
25814 return index;
25815 }
25816
25817 /* Try to find the best candidate on the top of ready list if two insns
25818 have the same priority - candidate is best if its dependees were
25819 scheduled earlier. Applied for Silvermont only.
25820 Return true if top 2 insns must be interchanged. */
25821 static bool
25822 swap_top_of_ready_list (rtx *ready, int n_ready)
25823 {
25824 rtx top = ready[n_ready - 1];
25825 rtx next = ready[n_ready - 2];
25826 rtx set;
25827 sd_iterator_def sd_it;
25828 dep_t dep;
25829 int clock1 = -1;
25830 int clock2 = -1;
25831 #define INSN_TICK(INSN) (HID (INSN)->tick)
25832
25833 if (ix86_tune != PROCESSOR_SILVERMONT)
25834 return false;
25835
25836 if (!NONDEBUG_INSN_P (top))
25837 return false;
25838 if (!NONJUMP_INSN_P (top))
25839 return false;
25840 if (!NONDEBUG_INSN_P (next))
25841 return false;
25842 if (!NONJUMP_INSN_P (next))
25843 return false;
25844 set = single_set (top);
25845 if (!set)
25846 return false;
25847 set = single_set (next);
25848 if (!set)
25849 return false;
25850
25851 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
25852 {
25853 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
25854 return false;
25855 /* Determine winner more precise. */
25856 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
25857 {
25858 rtx pro;
25859 pro = DEP_PRO (dep);
25860 if (!NONDEBUG_INSN_P (pro))
25861 continue;
25862 if (INSN_TICK (pro) > clock1)
25863 clock1 = INSN_TICK (pro);
25864 }
25865 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
25866 {
25867 rtx pro;
25868 pro = DEP_PRO (dep);
25869 if (!NONDEBUG_INSN_P (pro))
25870 continue;
25871 if (INSN_TICK (pro) > clock2)
25872 clock2 = INSN_TICK (pro);
25873 }
25874
25875 if (clock1 == clock2)
25876 {
25877 /* Determine winner - load must win. */
25878 enum attr_memory memory1, memory2;
25879 memory1 = get_attr_memory (top);
25880 memory2 = get_attr_memory (next);
25881 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
25882 return true;
25883 }
25884 return (bool) (clock2 < clock1);
25885 }
25886 return false;
25887 #undef INSN_TICK
25888 }
25889
25890 /* Perform possible reodering of ready list for Atom/Silvermont only.
25891 Return issue rate. */
25892 static int
25893 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
25894 int clock_var)
25895 {
25896 int issue_rate = -1;
25897 int n_ready = *pn_ready;
25898 int i;
25899 rtx insn;
25900 int index = -1;
25901
25902 /* Set up issue rate. */
25903 issue_rate = ix86_issue_rate ();
25904
25905 /* Do reodering for BONNELL/SILVERMONT only. */
25906 if (ix86_tune != PROCESSOR_BONNELL
25907 && ix86_tune != PROCESSOR_SILVERMONT)
25908 return issue_rate;
25909
25910 /* Nothing to do if ready list contains only 1 instruction. */
25911 if (n_ready <= 1)
25912 return issue_rate;
25913
25914 /* Do reodering for post-reload scheduler only. */
25915 if (!reload_completed)
25916 return issue_rate;
25917
25918 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
25919 {
25920 if (sched_verbose > 1)
25921 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
25922 INSN_UID (ready[index]));
25923
25924 /* Put IMUL producer (ready[index]) at the top of ready list. */
25925 insn = ready[index];
25926 for (i = index; i < n_ready - 1; i++)
25927 ready[i] = ready[i + 1];
25928 ready[n_ready - 1] = insn;
25929 return issue_rate;
25930 }
25931 if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
25932 {
25933 if (sched_verbose > 1)
25934 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
25935 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
25936 /* Swap 2 top elements of ready list. */
25937 insn = ready[n_ready - 1];
25938 ready[n_ready - 1] = ready[n_ready - 2];
25939 ready[n_ready - 2] = insn;
25940 }
25941 return issue_rate;
25942 }
25943
25944 static bool
25945 ix86_class_likely_spilled_p (reg_class_t);
25946
25947 /* Returns true if lhs of insn is HW function argument register and set up
25948 is_spilled to true if it is likely spilled HW register. */
25949 static bool
25950 insn_is_function_arg (rtx insn, bool* is_spilled)
25951 {
25952 rtx dst;
25953
25954 if (!NONDEBUG_INSN_P (insn))
25955 return false;
25956 /* Call instructions are not movable, ignore it. */
25957 if (CALL_P (insn))
25958 return false;
25959 insn = PATTERN (insn);
25960 if (GET_CODE (insn) == PARALLEL)
25961 insn = XVECEXP (insn, 0, 0);
25962 if (GET_CODE (insn) != SET)
25963 return false;
25964 dst = SET_DEST (insn);
25965 if (REG_P (dst) && HARD_REGISTER_P (dst)
25966 && ix86_function_arg_regno_p (REGNO (dst)))
25967 {
25968 /* Is it likely spilled HW register? */
25969 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
25970 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
25971 *is_spilled = true;
25972 return true;
25973 }
25974 return false;
25975 }
25976
25977 /* Add output dependencies for chain of function adjacent arguments if only
25978 there is a move to likely spilled HW register. Return first argument
25979 if at least one dependence was added or NULL otherwise. */
25980 static rtx
25981 add_parameter_dependencies (rtx call, rtx head)
25982 {
25983 rtx insn;
25984 rtx last = call;
25985 rtx first_arg = NULL;
25986 bool is_spilled = false;
25987
25988 head = PREV_INSN (head);
25989
25990 /* Find nearest to call argument passing instruction. */
25991 while (true)
25992 {
25993 last = PREV_INSN (last);
25994 if (last == head)
25995 return NULL;
25996 if (!NONDEBUG_INSN_P (last))
25997 continue;
25998 if (insn_is_function_arg (last, &is_spilled))
25999 break;
26000 return NULL;
26001 }
26002
26003 first_arg = last;
26004 while (true)
26005 {
26006 insn = PREV_INSN (last);
26007 if (!INSN_P (insn))
26008 break;
26009 if (insn == head)
26010 break;
26011 if (!NONDEBUG_INSN_P (insn))
26012 {
26013 last = insn;
26014 continue;
26015 }
26016 if (insn_is_function_arg (insn, &is_spilled))
26017 {
26018 /* Add output depdendence between two function arguments if chain
26019 of output arguments contains likely spilled HW registers. */
26020 if (is_spilled)
26021 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26022 first_arg = last = insn;
26023 }
26024 else
26025 break;
26026 }
26027 if (!is_spilled)
26028 return NULL;
26029 return first_arg;
26030 }
26031
26032 /* Add output or anti dependency from insn to first_arg to restrict its code
26033 motion. */
26034 static void
26035 avoid_func_arg_motion (rtx first_arg, rtx insn)
26036 {
26037 rtx set;
26038 rtx tmp;
26039
26040 set = single_set (insn);
26041 if (!set)
26042 return;
26043 tmp = SET_DEST (set);
26044 if (REG_P (tmp))
26045 {
26046 /* Add output dependency to the first function argument. */
26047 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26048 return;
26049 }
26050 /* Add anti dependency. */
26051 add_dependence (first_arg, insn, REG_DEP_ANTI);
26052 }
26053
26054 /* Avoid cross block motion of function argument through adding dependency
26055 from the first non-jump instruction in bb. */
26056 static void
26057 add_dependee_for_func_arg (rtx arg, basic_block bb)
26058 {
26059 rtx insn = BB_END (bb);
26060
26061 while (insn)
26062 {
26063 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26064 {
26065 rtx set = single_set (insn);
26066 if (set)
26067 {
26068 avoid_func_arg_motion (arg, insn);
26069 return;
26070 }
26071 }
26072 if (insn == BB_HEAD (bb))
26073 return;
26074 insn = PREV_INSN (insn);
26075 }
26076 }
26077
26078 /* Hook for pre-reload schedule - avoid motion of function arguments
26079 passed in likely spilled HW registers. */
26080 static void
26081 ix86_dependencies_evaluation_hook (rtx head, rtx tail)
26082 {
26083 rtx insn;
26084 rtx first_arg = NULL;
26085 if (reload_completed)
26086 return;
26087 while (head != tail && DEBUG_INSN_P (head))
26088 head = NEXT_INSN (head);
26089 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26090 if (INSN_P (insn) && CALL_P (insn))
26091 {
26092 first_arg = add_parameter_dependencies (insn, head);
26093 if (first_arg)
26094 {
26095 /* Add dependee for first argument to predecessors if only
26096 region contains more than one block. */
26097 basic_block bb = BLOCK_FOR_INSN (insn);
26098 int rgn = CONTAINING_RGN (bb->index);
26099 int nr_blks = RGN_NR_BLOCKS (rgn);
26100 /* Skip trivial regions and region head blocks that can have
26101 predecessors outside of region. */
26102 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26103 {
26104 edge e;
26105 edge_iterator ei;
26106 /* Assume that region is SCC, i.e. all immediate predecessors
26107 of non-head block are in the same region. */
26108 FOR_EACH_EDGE (e, ei, bb->preds)
26109 {
26110 /* Avoid creating of loop-carried dependencies through
26111 using topological odering in region. */
26112 if (BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
26113 add_dependee_for_func_arg (first_arg, e->src);
26114 }
26115 }
26116 insn = first_arg;
26117 if (insn == head)
26118 break;
26119 }
26120 }
26121 else if (first_arg)
26122 avoid_func_arg_motion (first_arg, insn);
26123 }
26124
26125 /* Hook for pre-reload schedule - set priority of moves from likely spilled
26126 HW registers to maximum, to schedule them at soon as possible. These are
26127 moves from function argument registers at the top of the function entry
26128 and moves from function return value registers after call. */
26129 static int
26130 ix86_adjust_priority (rtx insn, int priority)
26131 {
26132 rtx set;
26133
26134 if (reload_completed)
26135 return priority;
26136
26137 if (!NONDEBUG_INSN_P (insn))
26138 return priority;
26139
26140 set = single_set (insn);
26141 if (set)
26142 {
26143 rtx tmp = SET_SRC (set);
26144 if (REG_P (tmp)
26145 && HARD_REGISTER_P (tmp)
26146 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
26147 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
26148 return current_sched_info->sched_max_insns_priority;
26149 }
26150
26151 return priority;
26152 }
26153
26154 /* Model decoder of Core 2/i7.
26155 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
26156 track the instruction fetch block boundaries and make sure that long
26157 (9+ bytes) instructions are assigned to D0. */
26158
26159 /* Maximum length of an insn that can be handled by
26160 a secondary decoder unit. '8' for Core 2/i7. */
26161 static int core2i7_secondary_decoder_max_insn_size;
26162
26163 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
26164 '16' for Core 2/i7. */
26165 static int core2i7_ifetch_block_size;
26166
26167 /* Maximum number of instructions decoder can handle per cycle.
26168 '6' for Core 2/i7. */
26169 static int core2i7_ifetch_block_max_insns;
26170
26171 typedef struct ix86_first_cycle_multipass_data_ *
26172 ix86_first_cycle_multipass_data_t;
26173 typedef const struct ix86_first_cycle_multipass_data_ *
26174 const_ix86_first_cycle_multipass_data_t;
26175
26176 /* A variable to store target state across calls to max_issue within
26177 one cycle. */
26178 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
26179 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
26180
26181 /* Initialize DATA. */
26182 static void
26183 core2i7_first_cycle_multipass_init (void *_data)
26184 {
26185 ix86_first_cycle_multipass_data_t data
26186 = (ix86_first_cycle_multipass_data_t) _data;
26187
26188 data->ifetch_block_len = 0;
26189 data->ifetch_block_n_insns = 0;
26190 data->ready_try_change = NULL;
26191 data->ready_try_change_size = 0;
26192 }
26193
26194 /* Advancing the cycle; reset ifetch block counts. */
26195 static void
26196 core2i7_dfa_post_advance_cycle (void)
26197 {
26198 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
26199
26200 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26201
26202 data->ifetch_block_len = 0;
26203 data->ifetch_block_n_insns = 0;
26204 }
26205
26206 static int min_insn_size (rtx);
26207
26208 /* Filter out insns from ready_try that the core will not be able to issue
26209 on current cycle due to decoder. */
26210 static void
26211 core2i7_first_cycle_multipass_filter_ready_try
26212 (const_ix86_first_cycle_multipass_data_t data,
26213 char *ready_try, int n_ready, bool first_cycle_insn_p)
26214 {
26215 while (n_ready--)
26216 {
26217 rtx insn;
26218 int insn_size;
26219
26220 if (ready_try[n_ready])
26221 continue;
26222
26223 insn = get_ready_element (n_ready);
26224 insn_size = min_insn_size (insn);
26225
26226 if (/* If this is a too long an insn for a secondary decoder ... */
26227 (!first_cycle_insn_p
26228 && insn_size > core2i7_secondary_decoder_max_insn_size)
26229 /* ... or it would not fit into the ifetch block ... */
26230 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
26231 /* ... or the decoder is full already ... */
26232 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
26233 /* ... mask the insn out. */
26234 {
26235 ready_try[n_ready] = 1;
26236
26237 if (data->ready_try_change)
26238 bitmap_set_bit (data->ready_try_change, n_ready);
26239 }
26240 }
26241 }
26242
26243 /* Prepare for a new round of multipass lookahead scheduling. */
26244 static void
26245 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
26246 bool first_cycle_insn_p)
26247 {
26248 ix86_first_cycle_multipass_data_t data
26249 = (ix86_first_cycle_multipass_data_t) _data;
26250 const_ix86_first_cycle_multipass_data_t prev_data
26251 = ix86_first_cycle_multipass_data;
26252
26253 /* Restore the state from the end of the previous round. */
26254 data->ifetch_block_len = prev_data->ifetch_block_len;
26255 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
26256
26257 /* Filter instructions that cannot be issued on current cycle due to
26258 decoder restrictions. */
26259 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26260 first_cycle_insn_p);
26261 }
26262
26263 /* INSN is being issued in current solution. Account for its impact on
26264 the decoder model. */
26265 static void
26266 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
26267 rtx insn, const void *_prev_data)
26268 {
26269 ix86_first_cycle_multipass_data_t data
26270 = (ix86_first_cycle_multipass_data_t) _data;
26271 const_ix86_first_cycle_multipass_data_t prev_data
26272 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
26273
26274 int insn_size = min_insn_size (insn);
26275
26276 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
26277 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
26278 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
26279 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26280
26281 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
26282 if (!data->ready_try_change)
26283 {
26284 data->ready_try_change = sbitmap_alloc (n_ready);
26285 data->ready_try_change_size = n_ready;
26286 }
26287 else if (data->ready_try_change_size < n_ready)
26288 {
26289 data->ready_try_change = sbitmap_resize (data->ready_try_change,
26290 n_ready, 0);
26291 data->ready_try_change_size = n_ready;
26292 }
26293 bitmap_clear (data->ready_try_change);
26294
26295 /* Filter out insns from ready_try that the core will not be able to issue
26296 on current cycle due to decoder. */
26297 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26298 false);
26299 }
26300
26301 /* Revert the effect on ready_try. */
26302 static void
26303 core2i7_first_cycle_multipass_backtrack (const void *_data,
26304 char *ready_try,
26305 int n_ready ATTRIBUTE_UNUSED)
26306 {
26307 const_ix86_first_cycle_multipass_data_t data
26308 = (const_ix86_first_cycle_multipass_data_t) _data;
26309 unsigned int i = 0;
26310 sbitmap_iterator sbi;
26311
26312 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
26313 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
26314 {
26315 ready_try[i] = 0;
26316 }
26317 }
26318
26319 /* Save the result of multipass lookahead scheduling for the next round. */
26320 static void
26321 core2i7_first_cycle_multipass_end (const void *_data)
26322 {
26323 const_ix86_first_cycle_multipass_data_t data
26324 = (const_ix86_first_cycle_multipass_data_t) _data;
26325 ix86_first_cycle_multipass_data_t next_data
26326 = ix86_first_cycle_multipass_data;
26327
26328 if (data != NULL)
26329 {
26330 next_data->ifetch_block_len = data->ifetch_block_len;
26331 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
26332 }
26333 }
26334
26335 /* Deallocate target data. */
26336 static void
26337 core2i7_first_cycle_multipass_fini (void *_data)
26338 {
26339 ix86_first_cycle_multipass_data_t data
26340 = (ix86_first_cycle_multipass_data_t) _data;
26341
26342 if (data->ready_try_change)
26343 {
26344 sbitmap_free (data->ready_try_change);
26345 data->ready_try_change = NULL;
26346 data->ready_try_change_size = 0;
26347 }
26348 }
26349
26350 /* Prepare for scheduling pass. */
26351 static void
26352 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
26353 int verbose ATTRIBUTE_UNUSED,
26354 int max_uid ATTRIBUTE_UNUSED)
26355 {
26356 /* Install scheduling hooks for current CPU. Some of these hooks are used
26357 in time-critical parts of the scheduler, so we only set them up when
26358 they are actually used. */
26359 switch (ix86_tune)
26360 {
26361 case PROCESSOR_CORE2:
26362 case PROCESSOR_NEHALEM:
26363 case PROCESSOR_SANDYBRIDGE:
26364 case PROCESSOR_HASWELL:
26365 /* Do not perform multipass scheduling for pre-reload schedule
26366 to save compile time. */
26367 if (reload_completed)
26368 {
26369 targetm.sched.dfa_post_advance_cycle
26370 = core2i7_dfa_post_advance_cycle;
26371 targetm.sched.first_cycle_multipass_init
26372 = core2i7_first_cycle_multipass_init;
26373 targetm.sched.first_cycle_multipass_begin
26374 = core2i7_first_cycle_multipass_begin;
26375 targetm.sched.first_cycle_multipass_issue
26376 = core2i7_first_cycle_multipass_issue;
26377 targetm.sched.first_cycle_multipass_backtrack
26378 = core2i7_first_cycle_multipass_backtrack;
26379 targetm.sched.first_cycle_multipass_end
26380 = core2i7_first_cycle_multipass_end;
26381 targetm.sched.first_cycle_multipass_fini
26382 = core2i7_first_cycle_multipass_fini;
26383
26384 /* Set decoder parameters. */
26385 core2i7_secondary_decoder_max_insn_size = 8;
26386 core2i7_ifetch_block_size = 16;
26387 core2i7_ifetch_block_max_insns = 6;
26388 break;
26389 }
26390 /* ... Fall through ... */
26391 default:
26392 targetm.sched.dfa_post_advance_cycle = NULL;
26393 targetm.sched.first_cycle_multipass_init = NULL;
26394 targetm.sched.first_cycle_multipass_begin = NULL;
26395 targetm.sched.first_cycle_multipass_issue = NULL;
26396 targetm.sched.first_cycle_multipass_backtrack = NULL;
26397 targetm.sched.first_cycle_multipass_end = NULL;
26398 targetm.sched.first_cycle_multipass_fini = NULL;
26399 break;
26400 }
26401 }
26402
26403 \f
26404 /* Compute the alignment given to a constant that is being placed in memory.
26405 EXP is the constant and ALIGN is the alignment that the object would
26406 ordinarily have.
26407 The value of this function is used instead of that alignment to align
26408 the object. */
26409
26410 int
26411 ix86_constant_alignment (tree exp, int align)
26412 {
26413 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
26414 || TREE_CODE (exp) == INTEGER_CST)
26415 {
26416 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
26417 return 64;
26418 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
26419 return 128;
26420 }
26421 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
26422 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
26423 return BITS_PER_WORD;
26424
26425 return align;
26426 }
26427
26428 /* Compute the alignment for a static variable.
26429 TYPE is the data type, and ALIGN is the alignment that
26430 the object would ordinarily have. The value of this function is used
26431 instead of that alignment to align the object. */
26432
26433 int
26434 ix86_data_alignment (tree type, int align, bool opt)
26435 {
26436 /* A data structure, equal or greater than the size of a cache line
26437 (64 bytes in the Pentium 4 and other recent Intel processors, including
26438 processors based on Intel Core microarchitecture) should be aligned
26439 so that its base address is a multiple of a cache line size. */
26440
26441 int max_align
26442 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
26443
26444 if (max_align < BITS_PER_WORD)
26445 max_align = BITS_PER_WORD;
26446
26447 if (opt
26448 && AGGREGATE_TYPE_P (type)
26449 && TYPE_SIZE (type)
26450 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26451 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
26452 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
26453 && align < max_align)
26454 align = max_align;
26455
26456 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26457 to 16byte boundary. */
26458 if (TARGET_64BIT)
26459 {
26460 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
26461 && TYPE_SIZE (type)
26462 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26463 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
26464 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
26465 return 128;
26466 }
26467
26468 if (!opt)
26469 return align;
26470
26471 if (TREE_CODE (type) == ARRAY_TYPE)
26472 {
26473 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26474 return 64;
26475 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26476 return 128;
26477 }
26478 else if (TREE_CODE (type) == COMPLEX_TYPE)
26479 {
26480
26481 if (TYPE_MODE (type) == DCmode && align < 64)
26482 return 64;
26483 if ((TYPE_MODE (type) == XCmode
26484 || TYPE_MODE (type) == TCmode) && align < 128)
26485 return 128;
26486 }
26487 else if ((TREE_CODE (type) == RECORD_TYPE
26488 || TREE_CODE (type) == UNION_TYPE
26489 || TREE_CODE (type) == QUAL_UNION_TYPE)
26490 && TYPE_FIELDS (type))
26491 {
26492 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26493 return 64;
26494 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26495 return 128;
26496 }
26497 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26498 || TREE_CODE (type) == INTEGER_TYPE)
26499 {
26500 if (TYPE_MODE (type) == DFmode && align < 64)
26501 return 64;
26502 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26503 return 128;
26504 }
26505
26506 return align;
26507 }
26508
26509 /* Compute the alignment for a local variable or a stack slot. EXP is
26510 the data type or decl itself, MODE is the widest mode available and
26511 ALIGN is the alignment that the object would ordinarily have. The
26512 value of this macro is used instead of that alignment to align the
26513 object. */
26514
26515 unsigned int
26516 ix86_local_alignment (tree exp, enum machine_mode mode,
26517 unsigned int align)
26518 {
26519 tree type, decl;
26520
26521 if (exp && DECL_P (exp))
26522 {
26523 type = TREE_TYPE (exp);
26524 decl = exp;
26525 }
26526 else
26527 {
26528 type = exp;
26529 decl = NULL;
26530 }
26531
26532 /* Don't do dynamic stack realignment for long long objects with
26533 -mpreferred-stack-boundary=2. */
26534 if (!TARGET_64BIT
26535 && align == 64
26536 && ix86_preferred_stack_boundary < 64
26537 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
26538 && (!type || !TYPE_USER_ALIGN (type))
26539 && (!decl || !DECL_USER_ALIGN (decl)))
26540 align = 32;
26541
26542 /* If TYPE is NULL, we are allocating a stack slot for caller-save
26543 register in MODE. We will return the largest alignment of XF
26544 and DF. */
26545 if (!type)
26546 {
26547 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
26548 align = GET_MODE_ALIGNMENT (DFmode);
26549 return align;
26550 }
26551
26552 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26553 to 16byte boundary. Exact wording is:
26554
26555 An array uses the same alignment as its elements, except that a local or
26556 global array variable of length at least 16 bytes or
26557 a C99 variable-length array variable always has alignment of at least 16 bytes.
26558
26559 This was added to allow use of aligned SSE instructions at arrays. This
26560 rule is meant for static storage (where compiler can not do the analysis
26561 by itself). We follow it for automatic variables only when convenient.
26562 We fully control everything in the function compiled and functions from
26563 other unit can not rely on the alignment.
26564
26565 Exclude va_list type. It is the common case of local array where
26566 we can not benefit from the alignment.
26567
26568 TODO: Probably one should optimize for size only when var is not escaping. */
26569 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
26570 && TARGET_SSE)
26571 {
26572 if (AGGREGATE_TYPE_P (type)
26573 && (va_list_type_node == NULL_TREE
26574 || (TYPE_MAIN_VARIANT (type)
26575 != TYPE_MAIN_VARIANT (va_list_type_node)))
26576 && TYPE_SIZE (type)
26577 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26578 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
26579 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
26580 return 128;
26581 }
26582 if (TREE_CODE (type) == ARRAY_TYPE)
26583 {
26584 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26585 return 64;
26586 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26587 return 128;
26588 }
26589 else if (TREE_CODE (type) == COMPLEX_TYPE)
26590 {
26591 if (TYPE_MODE (type) == DCmode && align < 64)
26592 return 64;
26593 if ((TYPE_MODE (type) == XCmode
26594 || TYPE_MODE (type) == TCmode) && align < 128)
26595 return 128;
26596 }
26597 else if ((TREE_CODE (type) == RECORD_TYPE
26598 || TREE_CODE (type) == UNION_TYPE
26599 || TREE_CODE (type) == QUAL_UNION_TYPE)
26600 && TYPE_FIELDS (type))
26601 {
26602 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26603 return 64;
26604 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26605 return 128;
26606 }
26607 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26608 || TREE_CODE (type) == INTEGER_TYPE)
26609 {
26610
26611 if (TYPE_MODE (type) == DFmode && align < 64)
26612 return 64;
26613 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26614 return 128;
26615 }
26616 return align;
26617 }
26618
26619 /* Compute the minimum required alignment for dynamic stack realignment
26620 purposes for a local variable, parameter or a stack slot. EXP is
26621 the data type or decl itself, MODE is its mode and ALIGN is the
26622 alignment that the object would ordinarily have. */
26623
26624 unsigned int
26625 ix86_minimum_alignment (tree exp, enum machine_mode mode,
26626 unsigned int align)
26627 {
26628 tree type, decl;
26629
26630 if (exp && DECL_P (exp))
26631 {
26632 type = TREE_TYPE (exp);
26633 decl = exp;
26634 }
26635 else
26636 {
26637 type = exp;
26638 decl = NULL;
26639 }
26640
26641 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
26642 return align;
26643
26644 /* Don't do dynamic stack realignment for long long objects with
26645 -mpreferred-stack-boundary=2. */
26646 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
26647 && (!type || !TYPE_USER_ALIGN (type))
26648 && (!decl || !DECL_USER_ALIGN (decl)))
26649 return 32;
26650
26651 return align;
26652 }
26653 \f
26654 /* Find a location for the static chain incoming to a nested function.
26655 This is a register, unless all free registers are used by arguments. */
26656
26657 static rtx
26658 ix86_static_chain (const_tree fndecl, bool incoming_p)
26659 {
26660 unsigned regno;
26661
26662 if (!DECL_STATIC_CHAIN (fndecl))
26663 return NULL;
26664
26665 if (TARGET_64BIT)
26666 {
26667 /* We always use R10 in 64-bit mode. */
26668 regno = R10_REG;
26669 }
26670 else
26671 {
26672 tree fntype;
26673 unsigned int ccvt;
26674
26675 /* By default in 32-bit mode we use ECX to pass the static chain. */
26676 regno = CX_REG;
26677
26678 fntype = TREE_TYPE (fndecl);
26679 ccvt = ix86_get_callcvt (fntype);
26680 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
26681 {
26682 /* Fastcall functions use ecx/edx for arguments, which leaves
26683 us with EAX for the static chain.
26684 Thiscall functions use ecx for arguments, which also
26685 leaves us with EAX for the static chain. */
26686 regno = AX_REG;
26687 }
26688 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
26689 {
26690 /* Thiscall functions use ecx for arguments, which leaves
26691 us with EAX and EDX for the static chain.
26692 We are using for abi-compatibility EAX. */
26693 regno = AX_REG;
26694 }
26695 else if (ix86_function_regparm (fntype, fndecl) == 3)
26696 {
26697 /* For regparm 3, we have no free call-clobbered registers in
26698 which to store the static chain. In order to implement this,
26699 we have the trampoline push the static chain to the stack.
26700 However, we can't push a value below the return address when
26701 we call the nested function directly, so we have to use an
26702 alternate entry point. For this we use ESI, and have the
26703 alternate entry point push ESI, so that things appear the
26704 same once we're executing the nested function. */
26705 if (incoming_p)
26706 {
26707 if (fndecl == current_function_decl)
26708 ix86_static_chain_on_stack = true;
26709 return gen_frame_mem (SImode,
26710 plus_constant (Pmode,
26711 arg_pointer_rtx, -8));
26712 }
26713 regno = SI_REG;
26714 }
26715 }
26716
26717 return gen_rtx_REG (Pmode, regno);
26718 }
26719
26720 /* Emit RTL insns to initialize the variable parts of a trampoline.
26721 FNDECL is the decl of the target address; M_TRAMP is a MEM for
26722 the trampoline, and CHAIN_VALUE is an RTX for the static chain
26723 to be passed to the target function. */
26724
26725 static void
26726 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
26727 {
26728 rtx mem, fnaddr;
26729 int opcode;
26730 int offset = 0;
26731
26732 fnaddr = XEXP (DECL_RTL (fndecl), 0);
26733
26734 if (TARGET_64BIT)
26735 {
26736 int size;
26737
26738 /* Load the function address to r11. Try to load address using
26739 the shorter movl instead of movabs. We may want to support
26740 movq for kernel mode, but kernel does not use trampolines at
26741 the moment. FNADDR is a 32bit address and may not be in
26742 DImode when ptr_mode == SImode. Always use movl in this
26743 case. */
26744 if (ptr_mode == SImode
26745 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
26746 {
26747 fnaddr = copy_addr_to_reg (fnaddr);
26748
26749 mem = adjust_address (m_tramp, HImode, offset);
26750 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
26751
26752 mem = adjust_address (m_tramp, SImode, offset + 2);
26753 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
26754 offset += 6;
26755 }
26756 else
26757 {
26758 mem = adjust_address (m_tramp, HImode, offset);
26759 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
26760
26761 mem = adjust_address (m_tramp, DImode, offset + 2);
26762 emit_move_insn (mem, fnaddr);
26763 offset += 10;
26764 }
26765
26766 /* Load static chain using movabs to r10. Use the shorter movl
26767 instead of movabs when ptr_mode == SImode. */
26768 if (ptr_mode == SImode)
26769 {
26770 opcode = 0xba41;
26771 size = 6;
26772 }
26773 else
26774 {
26775 opcode = 0xba49;
26776 size = 10;
26777 }
26778
26779 mem = adjust_address (m_tramp, HImode, offset);
26780 emit_move_insn (mem, gen_int_mode (opcode, HImode));
26781
26782 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
26783 emit_move_insn (mem, chain_value);
26784 offset += size;
26785
26786 /* Jump to r11; the last (unused) byte is a nop, only there to
26787 pad the write out to a single 32-bit store. */
26788 mem = adjust_address (m_tramp, SImode, offset);
26789 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
26790 offset += 4;
26791 }
26792 else
26793 {
26794 rtx disp, chain;
26795
26796 /* Depending on the static chain location, either load a register
26797 with a constant, or push the constant to the stack. All of the
26798 instructions are the same size. */
26799 chain = ix86_static_chain (fndecl, true);
26800 if (REG_P (chain))
26801 {
26802 switch (REGNO (chain))
26803 {
26804 case AX_REG:
26805 opcode = 0xb8; break;
26806 case CX_REG:
26807 opcode = 0xb9; break;
26808 default:
26809 gcc_unreachable ();
26810 }
26811 }
26812 else
26813 opcode = 0x68;
26814
26815 mem = adjust_address (m_tramp, QImode, offset);
26816 emit_move_insn (mem, gen_int_mode (opcode, QImode));
26817
26818 mem = adjust_address (m_tramp, SImode, offset + 1);
26819 emit_move_insn (mem, chain_value);
26820 offset += 5;
26821
26822 mem = adjust_address (m_tramp, QImode, offset);
26823 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
26824
26825 mem = adjust_address (m_tramp, SImode, offset + 1);
26826
26827 /* Compute offset from the end of the jmp to the target function.
26828 In the case in which the trampoline stores the static chain on
26829 the stack, we need to skip the first insn which pushes the
26830 (call-saved) register static chain; this push is 1 byte. */
26831 offset += 5;
26832 disp = expand_binop (SImode, sub_optab, fnaddr,
26833 plus_constant (Pmode, XEXP (m_tramp, 0),
26834 offset - (MEM_P (chain) ? 1 : 0)),
26835 NULL_RTX, 1, OPTAB_DIRECT);
26836 emit_move_insn (mem, disp);
26837 }
26838
26839 gcc_assert (offset <= TRAMPOLINE_SIZE);
26840
26841 #ifdef HAVE_ENABLE_EXECUTE_STACK
26842 #ifdef CHECK_EXECUTE_STACK_ENABLED
26843 if (CHECK_EXECUTE_STACK_ENABLED)
26844 #endif
26845 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
26846 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
26847 #endif
26848 }
26849 \f
26850 /* The following file contains several enumerations and data structures
26851 built from the definitions in i386-builtin-types.def. */
26852
26853 #include "i386-builtin-types.inc"
26854
26855 /* Table for the ix86 builtin non-function types. */
26856 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
26857
26858 /* Retrieve an element from the above table, building some of
26859 the types lazily. */
26860
26861 static tree
26862 ix86_get_builtin_type (enum ix86_builtin_type tcode)
26863 {
26864 unsigned int index;
26865 tree type, itype;
26866
26867 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
26868
26869 type = ix86_builtin_type_tab[(int) tcode];
26870 if (type != NULL)
26871 return type;
26872
26873 gcc_assert (tcode > IX86_BT_LAST_PRIM);
26874 if (tcode <= IX86_BT_LAST_VECT)
26875 {
26876 enum machine_mode mode;
26877
26878 index = tcode - IX86_BT_LAST_PRIM - 1;
26879 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
26880 mode = ix86_builtin_type_vect_mode[index];
26881
26882 type = build_vector_type_for_mode (itype, mode);
26883 }
26884 else
26885 {
26886 int quals;
26887
26888 index = tcode - IX86_BT_LAST_VECT - 1;
26889 if (tcode <= IX86_BT_LAST_PTR)
26890 quals = TYPE_UNQUALIFIED;
26891 else
26892 quals = TYPE_QUAL_CONST;
26893
26894 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
26895 if (quals != TYPE_UNQUALIFIED)
26896 itype = build_qualified_type (itype, quals);
26897
26898 type = build_pointer_type (itype);
26899 }
26900
26901 ix86_builtin_type_tab[(int) tcode] = type;
26902 return type;
26903 }
26904
26905 /* Table for the ix86 builtin function types. */
26906 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
26907
26908 /* Retrieve an element from the above table, building some of
26909 the types lazily. */
26910
26911 static tree
26912 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
26913 {
26914 tree type;
26915
26916 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
26917
26918 type = ix86_builtin_func_type_tab[(int) tcode];
26919 if (type != NULL)
26920 return type;
26921
26922 if (tcode <= IX86_BT_LAST_FUNC)
26923 {
26924 unsigned start = ix86_builtin_func_start[(int) tcode];
26925 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
26926 tree rtype, atype, args = void_list_node;
26927 unsigned i;
26928
26929 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
26930 for (i = after - 1; i > start; --i)
26931 {
26932 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
26933 args = tree_cons (NULL, atype, args);
26934 }
26935
26936 type = build_function_type (rtype, args);
26937 }
26938 else
26939 {
26940 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
26941 enum ix86_builtin_func_type icode;
26942
26943 icode = ix86_builtin_func_alias_base[index];
26944 type = ix86_get_builtin_func_type (icode);
26945 }
26946
26947 ix86_builtin_func_type_tab[(int) tcode] = type;
26948 return type;
26949 }
26950
26951
26952 /* Codes for all the SSE/MMX builtins. */
26953 enum ix86_builtins
26954 {
26955 IX86_BUILTIN_ADDPS,
26956 IX86_BUILTIN_ADDSS,
26957 IX86_BUILTIN_DIVPS,
26958 IX86_BUILTIN_DIVSS,
26959 IX86_BUILTIN_MULPS,
26960 IX86_BUILTIN_MULSS,
26961 IX86_BUILTIN_SUBPS,
26962 IX86_BUILTIN_SUBSS,
26963
26964 IX86_BUILTIN_CMPEQPS,
26965 IX86_BUILTIN_CMPLTPS,
26966 IX86_BUILTIN_CMPLEPS,
26967 IX86_BUILTIN_CMPGTPS,
26968 IX86_BUILTIN_CMPGEPS,
26969 IX86_BUILTIN_CMPNEQPS,
26970 IX86_BUILTIN_CMPNLTPS,
26971 IX86_BUILTIN_CMPNLEPS,
26972 IX86_BUILTIN_CMPNGTPS,
26973 IX86_BUILTIN_CMPNGEPS,
26974 IX86_BUILTIN_CMPORDPS,
26975 IX86_BUILTIN_CMPUNORDPS,
26976 IX86_BUILTIN_CMPEQSS,
26977 IX86_BUILTIN_CMPLTSS,
26978 IX86_BUILTIN_CMPLESS,
26979 IX86_BUILTIN_CMPNEQSS,
26980 IX86_BUILTIN_CMPNLTSS,
26981 IX86_BUILTIN_CMPNLESS,
26982 IX86_BUILTIN_CMPORDSS,
26983 IX86_BUILTIN_CMPUNORDSS,
26984
26985 IX86_BUILTIN_COMIEQSS,
26986 IX86_BUILTIN_COMILTSS,
26987 IX86_BUILTIN_COMILESS,
26988 IX86_BUILTIN_COMIGTSS,
26989 IX86_BUILTIN_COMIGESS,
26990 IX86_BUILTIN_COMINEQSS,
26991 IX86_BUILTIN_UCOMIEQSS,
26992 IX86_BUILTIN_UCOMILTSS,
26993 IX86_BUILTIN_UCOMILESS,
26994 IX86_BUILTIN_UCOMIGTSS,
26995 IX86_BUILTIN_UCOMIGESS,
26996 IX86_BUILTIN_UCOMINEQSS,
26997
26998 IX86_BUILTIN_CVTPI2PS,
26999 IX86_BUILTIN_CVTPS2PI,
27000 IX86_BUILTIN_CVTSI2SS,
27001 IX86_BUILTIN_CVTSI642SS,
27002 IX86_BUILTIN_CVTSS2SI,
27003 IX86_BUILTIN_CVTSS2SI64,
27004 IX86_BUILTIN_CVTTPS2PI,
27005 IX86_BUILTIN_CVTTSS2SI,
27006 IX86_BUILTIN_CVTTSS2SI64,
27007
27008 IX86_BUILTIN_MAXPS,
27009 IX86_BUILTIN_MAXSS,
27010 IX86_BUILTIN_MINPS,
27011 IX86_BUILTIN_MINSS,
27012
27013 IX86_BUILTIN_LOADUPS,
27014 IX86_BUILTIN_STOREUPS,
27015 IX86_BUILTIN_MOVSS,
27016
27017 IX86_BUILTIN_MOVHLPS,
27018 IX86_BUILTIN_MOVLHPS,
27019 IX86_BUILTIN_LOADHPS,
27020 IX86_BUILTIN_LOADLPS,
27021 IX86_BUILTIN_STOREHPS,
27022 IX86_BUILTIN_STORELPS,
27023
27024 IX86_BUILTIN_MASKMOVQ,
27025 IX86_BUILTIN_MOVMSKPS,
27026 IX86_BUILTIN_PMOVMSKB,
27027
27028 IX86_BUILTIN_MOVNTPS,
27029 IX86_BUILTIN_MOVNTQ,
27030
27031 IX86_BUILTIN_LOADDQU,
27032 IX86_BUILTIN_STOREDQU,
27033
27034 IX86_BUILTIN_PACKSSWB,
27035 IX86_BUILTIN_PACKSSDW,
27036 IX86_BUILTIN_PACKUSWB,
27037
27038 IX86_BUILTIN_PADDB,
27039 IX86_BUILTIN_PADDW,
27040 IX86_BUILTIN_PADDD,
27041 IX86_BUILTIN_PADDQ,
27042 IX86_BUILTIN_PADDSB,
27043 IX86_BUILTIN_PADDSW,
27044 IX86_BUILTIN_PADDUSB,
27045 IX86_BUILTIN_PADDUSW,
27046 IX86_BUILTIN_PSUBB,
27047 IX86_BUILTIN_PSUBW,
27048 IX86_BUILTIN_PSUBD,
27049 IX86_BUILTIN_PSUBQ,
27050 IX86_BUILTIN_PSUBSB,
27051 IX86_BUILTIN_PSUBSW,
27052 IX86_BUILTIN_PSUBUSB,
27053 IX86_BUILTIN_PSUBUSW,
27054
27055 IX86_BUILTIN_PAND,
27056 IX86_BUILTIN_PANDN,
27057 IX86_BUILTIN_POR,
27058 IX86_BUILTIN_PXOR,
27059
27060 IX86_BUILTIN_PAVGB,
27061 IX86_BUILTIN_PAVGW,
27062
27063 IX86_BUILTIN_PCMPEQB,
27064 IX86_BUILTIN_PCMPEQW,
27065 IX86_BUILTIN_PCMPEQD,
27066 IX86_BUILTIN_PCMPGTB,
27067 IX86_BUILTIN_PCMPGTW,
27068 IX86_BUILTIN_PCMPGTD,
27069
27070 IX86_BUILTIN_PMADDWD,
27071
27072 IX86_BUILTIN_PMAXSW,
27073 IX86_BUILTIN_PMAXUB,
27074 IX86_BUILTIN_PMINSW,
27075 IX86_BUILTIN_PMINUB,
27076
27077 IX86_BUILTIN_PMULHUW,
27078 IX86_BUILTIN_PMULHW,
27079 IX86_BUILTIN_PMULLW,
27080
27081 IX86_BUILTIN_PSADBW,
27082 IX86_BUILTIN_PSHUFW,
27083
27084 IX86_BUILTIN_PSLLW,
27085 IX86_BUILTIN_PSLLD,
27086 IX86_BUILTIN_PSLLQ,
27087 IX86_BUILTIN_PSRAW,
27088 IX86_BUILTIN_PSRAD,
27089 IX86_BUILTIN_PSRLW,
27090 IX86_BUILTIN_PSRLD,
27091 IX86_BUILTIN_PSRLQ,
27092 IX86_BUILTIN_PSLLWI,
27093 IX86_BUILTIN_PSLLDI,
27094 IX86_BUILTIN_PSLLQI,
27095 IX86_BUILTIN_PSRAWI,
27096 IX86_BUILTIN_PSRADI,
27097 IX86_BUILTIN_PSRLWI,
27098 IX86_BUILTIN_PSRLDI,
27099 IX86_BUILTIN_PSRLQI,
27100
27101 IX86_BUILTIN_PUNPCKHBW,
27102 IX86_BUILTIN_PUNPCKHWD,
27103 IX86_BUILTIN_PUNPCKHDQ,
27104 IX86_BUILTIN_PUNPCKLBW,
27105 IX86_BUILTIN_PUNPCKLWD,
27106 IX86_BUILTIN_PUNPCKLDQ,
27107
27108 IX86_BUILTIN_SHUFPS,
27109
27110 IX86_BUILTIN_RCPPS,
27111 IX86_BUILTIN_RCPSS,
27112 IX86_BUILTIN_RSQRTPS,
27113 IX86_BUILTIN_RSQRTPS_NR,
27114 IX86_BUILTIN_RSQRTSS,
27115 IX86_BUILTIN_RSQRTF,
27116 IX86_BUILTIN_SQRTPS,
27117 IX86_BUILTIN_SQRTPS_NR,
27118 IX86_BUILTIN_SQRTSS,
27119
27120 IX86_BUILTIN_UNPCKHPS,
27121 IX86_BUILTIN_UNPCKLPS,
27122
27123 IX86_BUILTIN_ANDPS,
27124 IX86_BUILTIN_ANDNPS,
27125 IX86_BUILTIN_ORPS,
27126 IX86_BUILTIN_XORPS,
27127
27128 IX86_BUILTIN_EMMS,
27129 IX86_BUILTIN_LDMXCSR,
27130 IX86_BUILTIN_STMXCSR,
27131 IX86_BUILTIN_SFENCE,
27132
27133 IX86_BUILTIN_FXSAVE,
27134 IX86_BUILTIN_FXRSTOR,
27135 IX86_BUILTIN_FXSAVE64,
27136 IX86_BUILTIN_FXRSTOR64,
27137
27138 IX86_BUILTIN_XSAVE,
27139 IX86_BUILTIN_XRSTOR,
27140 IX86_BUILTIN_XSAVE64,
27141 IX86_BUILTIN_XRSTOR64,
27142
27143 IX86_BUILTIN_XSAVEOPT,
27144 IX86_BUILTIN_XSAVEOPT64,
27145
27146 /* 3DNow! Original */
27147 IX86_BUILTIN_FEMMS,
27148 IX86_BUILTIN_PAVGUSB,
27149 IX86_BUILTIN_PF2ID,
27150 IX86_BUILTIN_PFACC,
27151 IX86_BUILTIN_PFADD,
27152 IX86_BUILTIN_PFCMPEQ,
27153 IX86_BUILTIN_PFCMPGE,
27154 IX86_BUILTIN_PFCMPGT,
27155 IX86_BUILTIN_PFMAX,
27156 IX86_BUILTIN_PFMIN,
27157 IX86_BUILTIN_PFMUL,
27158 IX86_BUILTIN_PFRCP,
27159 IX86_BUILTIN_PFRCPIT1,
27160 IX86_BUILTIN_PFRCPIT2,
27161 IX86_BUILTIN_PFRSQIT1,
27162 IX86_BUILTIN_PFRSQRT,
27163 IX86_BUILTIN_PFSUB,
27164 IX86_BUILTIN_PFSUBR,
27165 IX86_BUILTIN_PI2FD,
27166 IX86_BUILTIN_PMULHRW,
27167
27168 /* 3DNow! Athlon Extensions */
27169 IX86_BUILTIN_PF2IW,
27170 IX86_BUILTIN_PFNACC,
27171 IX86_BUILTIN_PFPNACC,
27172 IX86_BUILTIN_PI2FW,
27173 IX86_BUILTIN_PSWAPDSI,
27174 IX86_BUILTIN_PSWAPDSF,
27175
27176 /* SSE2 */
27177 IX86_BUILTIN_ADDPD,
27178 IX86_BUILTIN_ADDSD,
27179 IX86_BUILTIN_DIVPD,
27180 IX86_BUILTIN_DIVSD,
27181 IX86_BUILTIN_MULPD,
27182 IX86_BUILTIN_MULSD,
27183 IX86_BUILTIN_SUBPD,
27184 IX86_BUILTIN_SUBSD,
27185
27186 IX86_BUILTIN_CMPEQPD,
27187 IX86_BUILTIN_CMPLTPD,
27188 IX86_BUILTIN_CMPLEPD,
27189 IX86_BUILTIN_CMPGTPD,
27190 IX86_BUILTIN_CMPGEPD,
27191 IX86_BUILTIN_CMPNEQPD,
27192 IX86_BUILTIN_CMPNLTPD,
27193 IX86_BUILTIN_CMPNLEPD,
27194 IX86_BUILTIN_CMPNGTPD,
27195 IX86_BUILTIN_CMPNGEPD,
27196 IX86_BUILTIN_CMPORDPD,
27197 IX86_BUILTIN_CMPUNORDPD,
27198 IX86_BUILTIN_CMPEQSD,
27199 IX86_BUILTIN_CMPLTSD,
27200 IX86_BUILTIN_CMPLESD,
27201 IX86_BUILTIN_CMPNEQSD,
27202 IX86_BUILTIN_CMPNLTSD,
27203 IX86_BUILTIN_CMPNLESD,
27204 IX86_BUILTIN_CMPORDSD,
27205 IX86_BUILTIN_CMPUNORDSD,
27206
27207 IX86_BUILTIN_COMIEQSD,
27208 IX86_BUILTIN_COMILTSD,
27209 IX86_BUILTIN_COMILESD,
27210 IX86_BUILTIN_COMIGTSD,
27211 IX86_BUILTIN_COMIGESD,
27212 IX86_BUILTIN_COMINEQSD,
27213 IX86_BUILTIN_UCOMIEQSD,
27214 IX86_BUILTIN_UCOMILTSD,
27215 IX86_BUILTIN_UCOMILESD,
27216 IX86_BUILTIN_UCOMIGTSD,
27217 IX86_BUILTIN_UCOMIGESD,
27218 IX86_BUILTIN_UCOMINEQSD,
27219
27220 IX86_BUILTIN_MAXPD,
27221 IX86_BUILTIN_MAXSD,
27222 IX86_BUILTIN_MINPD,
27223 IX86_BUILTIN_MINSD,
27224
27225 IX86_BUILTIN_ANDPD,
27226 IX86_BUILTIN_ANDNPD,
27227 IX86_BUILTIN_ORPD,
27228 IX86_BUILTIN_XORPD,
27229
27230 IX86_BUILTIN_SQRTPD,
27231 IX86_BUILTIN_SQRTSD,
27232
27233 IX86_BUILTIN_UNPCKHPD,
27234 IX86_BUILTIN_UNPCKLPD,
27235
27236 IX86_BUILTIN_SHUFPD,
27237
27238 IX86_BUILTIN_LOADUPD,
27239 IX86_BUILTIN_STOREUPD,
27240 IX86_BUILTIN_MOVSD,
27241
27242 IX86_BUILTIN_LOADHPD,
27243 IX86_BUILTIN_LOADLPD,
27244
27245 IX86_BUILTIN_CVTDQ2PD,
27246 IX86_BUILTIN_CVTDQ2PS,
27247
27248 IX86_BUILTIN_CVTPD2DQ,
27249 IX86_BUILTIN_CVTPD2PI,
27250 IX86_BUILTIN_CVTPD2PS,
27251 IX86_BUILTIN_CVTTPD2DQ,
27252 IX86_BUILTIN_CVTTPD2PI,
27253
27254 IX86_BUILTIN_CVTPI2PD,
27255 IX86_BUILTIN_CVTSI2SD,
27256 IX86_BUILTIN_CVTSI642SD,
27257
27258 IX86_BUILTIN_CVTSD2SI,
27259 IX86_BUILTIN_CVTSD2SI64,
27260 IX86_BUILTIN_CVTSD2SS,
27261 IX86_BUILTIN_CVTSS2SD,
27262 IX86_BUILTIN_CVTTSD2SI,
27263 IX86_BUILTIN_CVTTSD2SI64,
27264
27265 IX86_BUILTIN_CVTPS2DQ,
27266 IX86_BUILTIN_CVTPS2PD,
27267 IX86_BUILTIN_CVTTPS2DQ,
27268
27269 IX86_BUILTIN_MOVNTI,
27270 IX86_BUILTIN_MOVNTI64,
27271 IX86_BUILTIN_MOVNTPD,
27272 IX86_BUILTIN_MOVNTDQ,
27273
27274 IX86_BUILTIN_MOVQ128,
27275
27276 /* SSE2 MMX */
27277 IX86_BUILTIN_MASKMOVDQU,
27278 IX86_BUILTIN_MOVMSKPD,
27279 IX86_BUILTIN_PMOVMSKB128,
27280
27281 IX86_BUILTIN_PACKSSWB128,
27282 IX86_BUILTIN_PACKSSDW128,
27283 IX86_BUILTIN_PACKUSWB128,
27284
27285 IX86_BUILTIN_PADDB128,
27286 IX86_BUILTIN_PADDW128,
27287 IX86_BUILTIN_PADDD128,
27288 IX86_BUILTIN_PADDQ128,
27289 IX86_BUILTIN_PADDSB128,
27290 IX86_BUILTIN_PADDSW128,
27291 IX86_BUILTIN_PADDUSB128,
27292 IX86_BUILTIN_PADDUSW128,
27293 IX86_BUILTIN_PSUBB128,
27294 IX86_BUILTIN_PSUBW128,
27295 IX86_BUILTIN_PSUBD128,
27296 IX86_BUILTIN_PSUBQ128,
27297 IX86_BUILTIN_PSUBSB128,
27298 IX86_BUILTIN_PSUBSW128,
27299 IX86_BUILTIN_PSUBUSB128,
27300 IX86_BUILTIN_PSUBUSW128,
27301
27302 IX86_BUILTIN_PAND128,
27303 IX86_BUILTIN_PANDN128,
27304 IX86_BUILTIN_POR128,
27305 IX86_BUILTIN_PXOR128,
27306
27307 IX86_BUILTIN_PAVGB128,
27308 IX86_BUILTIN_PAVGW128,
27309
27310 IX86_BUILTIN_PCMPEQB128,
27311 IX86_BUILTIN_PCMPEQW128,
27312 IX86_BUILTIN_PCMPEQD128,
27313 IX86_BUILTIN_PCMPGTB128,
27314 IX86_BUILTIN_PCMPGTW128,
27315 IX86_BUILTIN_PCMPGTD128,
27316
27317 IX86_BUILTIN_PMADDWD128,
27318
27319 IX86_BUILTIN_PMAXSW128,
27320 IX86_BUILTIN_PMAXUB128,
27321 IX86_BUILTIN_PMINSW128,
27322 IX86_BUILTIN_PMINUB128,
27323
27324 IX86_BUILTIN_PMULUDQ,
27325 IX86_BUILTIN_PMULUDQ128,
27326 IX86_BUILTIN_PMULHUW128,
27327 IX86_BUILTIN_PMULHW128,
27328 IX86_BUILTIN_PMULLW128,
27329
27330 IX86_BUILTIN_PSADBW128,
27331 IX86_BUILTIN_PSHUFHW,
27332 IX86_BUILTIN_PSHUFLW,
27333 IX86_BUILTIN_PSHUFD,
27334
27335 IX86_BUILTIN_PSLLDQI128,
27336 IX86_BUILTIN_PSLLWI128,
27337 IX86_BUILTIN_PSLLDI128,
27338 IX86_BUILTIN_PSLLQI128,
27339 IX86_BUILTIN_PSRAWI128,
27340 IX86_BUILTIN_PSRADI128,
27341 IX86_BUILTIN_PSRLDQI128,
27342 IX86_BUILTIN_PSRLWI128,
27343 IX86_BUILTIN_PSRLDI128,
27344 IX86_BUILTIN_PSRLQI128,
27345
27346 IX86_BUILTIN_PSLLDQ128,
27347 IX86_BUILTIN_PSLLW128,
27348 IX86_BUILTIN_PSLLD128,
27349 IX86_BUILTIN_PSLLQ128,
27350 IX86_BUILTIN_PSRAW128,
27351 IX86_BUILTIN_PSRAD128,
27352 IX86_BUILTIN_PSRLW128,
27353 IX86_BUILTIN_PSRLD128,
27354 IX86_BUILTIN_PSRLQ128,
27355
27356 IX86_BUILTIN_PUNPCKHBW128,
27357 IX86_BUILTIN_PUNPCKHWD128,
27358 IX86_BUILTIN_PUNPCKHDQ128,
27359 IX86_BUILTIN_PUNPCKHQDQ128,
27360 IX86_BUILTIN_PUNPCKLBW128,
27361 IX86_BUILTIN_PUNPCKLWD128,
27362 IX86_BUILTIN_PUNPCKLDQ128,
27363 IX86_BUILTIN_PUNPCKLQDQ128,
27364
27365 IX86_BUILTIN_CLFLUSH,
27366 IX86_BUILTIN_MFENCE,
27367 IX86_BUILTIN_LFENCE,
27368 IX86_BUILTIN_PAUSE,
27369
27370 IX86_BUILTIN_FNSTENV,
27371 IX86_BUILTIN_FLDENV,
27372 IX86_BUILTIN_FNSTSW,
27373 IX86_BUILTIN_FNCLEX,
27374
27375 IX86_BUILTIN_BSRSI,
27376 IX86_BUILTIN_BSRDI,
27377 IX86_BUILTIN_RDPMC,
27378 IX86_BUILTIN_RDTSC,
27379 IX86_BUILTIN_RDTSCP,
27380 IX86_BUILTIN_ROLQI,
27381 IX86_BUILTIN_ROLHI,
27382 IX86_BUILTIN_RORQI,
27383 IX86_BUILTIN_RORHI,
27384
27385 /* SSE3. */
27386 IX86_BUILTIN_ADDSUBPS,
27387 IX86_BUILTIN_HADDPS,
27388 IX86_BUILTIN_HSUBPS,
27389 IX86_BUILTIN_MOVSHDUP,
27390 IX86_BUILTIN_MOVSLDUP,
27391 IX86_BUILTIN_ADDSUBPD,
27392 IX86_BUILTIN_HADDPD,
27393 IX86_BUILTIN_HSUBPD,
27394 IX86_BUILTIN_LDDQU,
27395
27396 IX86_BUILTIN_MONITOR,
27397 IX86_BUILTIN_MWAIT,
27398
27399 /* SSSE3. */
27400 IX86_BUILTIN_PHADDW,
27401 IX86_BUILTIN_PHADDD,
27402 IX86_BUILTIN_PHADDSW,
27403 IX86_BUILTIN_PHSUBW,
27404 IX86_BUILTIN_PHSUBD,
27405 IX86_BUILTIN_PHSUBSW,
27406 IX86_BUILTIN_PMADDUBSW,
27407 IX86_BUILTIN_PMULHRSW,
27408 IX86_BUILTIN_PSHUFB,
27409 IX86_BUILTIN_PSIGNB,
27410 IX86_BUILTIN_PSIGNW,
27411 IX86_BUILTIN_PSIGND,
27412 IX86_BUILTIN_PALIGNR,
27413 IX86_BUILTIN_PABSB,
27414 IX86_BUILTIN_PABSW,
27415 IX86_BUILTIN_PABSD,
27416
27417 IX86_BUILTIN_PHADDW128,
27418 IX86_BUILTIN_PHADDD128,
27419 IX86_BUILTIN_PHADDSW128,
27420 IX86_BUILTIN_PHSUBW128,
27421 IX86_BUILTIN_PHSUBD128,
27422 IX86_BUILTIN_PHSUBSW128,
27423 IX86_BUILTIN_PMADDUBSW128,
27424 IX86_BUILTIN_PMULHRSW128,
27425 IX86_BUILTIN_PSHUFB128,
27426 IX86_BUILTIN_PSIGNB128,
27427 IX86_BUILTIN_PSIGNW128,
27428 IX86_BUILTIN_PSIGND128,
27429 IX86_BUILTIN_PALIGNR128,
27430 IX86_BUILTIN_PABSB128,
27431 IX86_BUILTIN_PABSW128,
27432 IX86_BUILTIN_PABSD128,
27433
27434 /* AMDFAM10 - SSE4A New Instructions. */
27435 IX86_BUILTIN_MOVNTSD,
27436 IX86_BUILTIN_MOVNTSS,
27437 IX86_BUILTIN_EXTRQI,
27438 IX86_BUILTIN_EXTRQ,
27439 IX86_BUILTIN_INSERTQI,
27440 IX86_BUILTIN_INSERTQ,
27441
27442 /* SSE4.1. */
27443 IX86_BUILTIN_BLENDPD,
27444 IX86_BUILTIN_BLENDPS,
27445 IX86_BUILTIN_BLENDVPD,
27446 IX86_BUILTIN_BLENDVPS,
27447 IX86_BUILTIN_PBLENDVB128,
27448 IX86_BUILTIN_PBLENDW128,
27449
27450 IX86_BUILTIN_DPPD,
27451 IX86_BUILTIN_DPPS,
27452
27453 IX86_BUILTIN_INSERTPS128,
27454
27455 IX86_BUILTIN_MOVNTDQA,
27456 IX86_BUILTIN_MPSADBW128,
27457 IX86_BUILTIN_PACKUSDW128,
27458 IX86_BUILTIN_PCMPEQQ,
27459 IX86_BUILTIN_PHMINPOSUW128,
27460
27461 IX86_BUILTIN_PMAXSB128,
27462 IX86_BUILTIN_PMAXSD128,
27463 IX86_BUILTIN_PMAXUD128,
27464 IX86_BUILTIN_PMAXUW128,
27465
27466 IX86_BUILTIN_PMINSB128,
27467 IX86_BUILTIN_PMINSD128,
27468 IX86_BUILTIN_PMINUD128,
27469 IX86_BUILTIN_PMINUW128,
27470
27471 IX86_BUILTIN_PMOVSXBW128,
27472 IX86_BUILTIN_PMOVSXBD128,
27473 IX86_BUILTIN_PMOVSXBQ128,
27474 IX86_BUILTIN_PMOVSXWD128,
27475 IX86_BUILTIN_PMOVSXWQ128,
27476 IX86_BUILTIN_PMOVSXDQ128,
27477
27478 IX86_BUILTIN_PMOVZXBW128,
27479 IX86_BUILTIN_PMOVZXBD128,
27480 IX86_BUILTIN_PMOVZXBQ128,
27481 IX86_BUILTIN_PMOVZXWD128,
27482 IX86_BUILTIN_PMOVZXWQ128,
27483 IX86_BUILTIN_PMOVZXDQ128,
27484
27485 IX86_BUILTIN_PMULDQ128,
27486 IX86_BUILTIN_PMULLD128,
27487
27488 IX86_BUILTIN_ROUNDSD,
27489 IX86_BUILTIN_ROUNDSS,
27490
27491 IX86_BUILTIN_ROUNDPD,
27492 IX86_BUILTIN_ROUNDPS,
27493
27494 IX86_BUILTIN_FLOORPD,
27495 IX86_BUILTIN_CEILPD,
27496 IX86_BUILTIN_TRUNCPD,
27497 IX86_BUILTIN_RINTPD,
27498 IX86_BUILTIN_ROUNDPD_AZ,
27499
27500 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
27501 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
27502 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
27503
27504 IX86_BUILTIN_FLOORPS,
27505 IX86_BUILTIN_CEILPS,
27506 IX86_BUILTIN_TRUNCPS,
27507 IX86_BUILTIN_RINTPS,
27508 IX86_BUILTIN_ROUNDPS_AZ,
27509
27510 IX86_BUILTIN_FLOORPS_SFIX,
27511 IX86_BUILTIN_CEILPS_SFIX,
27512 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
27513
27514 IX86_BUILTIN_PTESTZ,
27515 IX86_BUILTIN_PTESTC,
27516 IX86_BUILTIN_PTESTNZC,
27517
27518 IX86_BUILTIN_VEC_INIT_V2SI,
27519 IX86_BUILTIN_VEC_INIT_V4HI,
27520 IX86_BUILTIN_VEC_INIT_V8QI,
27521 IX86_BUILTIN_VEC_EXT_V2DF,
27522 IX86_BUILTIN_VEC_EXT_V2DI,
27523 IX86_BUILTIN_VEC_EXT_V4SF,
27524 IX86_BUILTIN_VEC_EXT_V4SI,
27525 IX86_BUILTIN_VEC_EXT_V8HI,
27526 IX86_BUILTIN_VEC_EXT_V2SI,
27527 IX86_BUILTIN_VEC_EXT_V4HI,
27528 IX86_BUILTIN_VEC_EXT_V16QI,
27529 IX86_BUILTIN_VEC_SET_V2DI,
27530 IX86_BUILTIN_VEC_SET_V4SF,
27531 IX86_BUILTIN_VEC_SET_V4SI,
27532 IX86_BUILTIN_VEC_SET_V8HI,
27533 IX86_BUILTIN_VEC_SET_V4HI,
27534 IX86_BUILTIN_VEC_SET_V16QI,
27535
27536 IX86_BUILTIN_VEC_PACK_SFIX,
27537 IX86_BUILTIN_VEC_PACK_SFIX256,
27538
27539 /* SSE4.2. */
27540 IX86_BUILTIN_CRC32QI,
27541 IX86_BUILTIN_CRC32HI,
27542 IX86_BUILTIN_CRC32SI,
27543 IX86_BUILTIN_CRC32DI,
27544
27545 IX86_BUILTIN_PCMPESTRI128,
27546 IX86_BUILTIN_PCMPESTRM128,
27547 IX86_BUILTIN_PCMPESTRA128,
27548 IX86_BUILTIN_PCMPESTRC128,
27549 IX86_BUILTIN_PCMPESTRO128,
27550 IX86_BUILTIN_PCMPESTRS128,
27551 IX86_BUILTIN_PCMPESTRZ128,
27552 IX86_BUILTIN_PCMPISTRI128,
27553 IX86_BUILTIN_PCMPISTRM128,
27554 IX86_BUILTIN_PCMPISTRA128,
27555 IX86_BUILTIN_PCMPISTRC128,
27556 IX86_BUILTIN_PCMPISTRO128,
27557 IX86_BUILTIN_PCMPISTRS128,
27558 IX86_BUILTIN_PCMPISTRZ128,
27559
27560 IX86_BUILTIN_PCMPGTQ,
27561
27562 /* AES instructions */
27563 IX86_BUILTIN_AESENC128,
27564 IX86_BUILTIN_AESENCLAST128,
27565 IX86_BUILTIN_AESDEC128,
27566 IX86_BUILTIN_AESDECLAST128,
27567 IX86_BUILTIN_AESIMC128,
27568 IX86_BUILTIN_AESKEYGENASSIST128,
27569
27570 /* PCLMUL instruction */
27571 IX86_BUILTIN_PCLMULQDQ128,
27572
27573 /* AVX */
27574 IX86_BUILTIN_ADDPD256,
27575 IX86_BUILTIN_ADDPS256,
27576 IX86_BUILTIN_ADDSUBPD256,
27577 IX86_BUILTIN_ADDSUBPS256,
27578 IX86_BUILTIN_ANDPD256,
27579 IX86_BUILTIN_ANDPS256,
27580 IX86_BUILTIN_ANDNPD256,
27581 IX86_BUILTIN_ANDNPS256,
27582 IX86_BUILTIN_BLENDPD256,
27583 IX86_BUILTIN_BLENDPS256,
27584 IX86_BUILTIN_BLENDVPD256,
27585 IX86_BUILTIN_BLENDVPS256,
27586 IX86_BUILTIN_DIVPD256,
27587 IX86_BUILTIN_DIVPS256,
27588 IX86_BUILTIN_DPPS256,
27589 IX86_BUILTIN_HADDPD256,
27590 IX86_BUILTIN_HADDPS256,
27591 IX86_BUILTIN_HSUBPD256,
27592 IX86_BUILTIN_HSUBPS256,
27593 IX86_BUILTIN_MAXPD256,
27594 IX86_BUILTIN_MAXPS256,
27595 IX86_BUILTIN_MINPD256,
27596 IX86_BUILTIN_MINPS256,
27597 IX86_BUILTIN_MULPD256,
27598 IX86_BUILTIN_MULPS256,
27599 IX86_BUILTIN_ORPD256,
27600 IX86_BUILTIN_ORPS256,
27601 IX86_BUILTIN_SHUFPD256,
27602 IX86_BUILTIN_SHUFPS256,
27603 IX86_BUILTIN_SUBPD256,
27604 IX86_BUILTIN_SUBPS256,
27605 IX86_BUILTIN_XORPD256,
27606 IX86_BUILTIN_XORPS256,
27607 IX86_BUILTIN_CMPSD,
27608 IX86_BUILTIN_CMPSS,
27609 IX86_BUILTIN_CMPPD,
27610 IX86_BUILTIN_CMPPS,
27611 IX86_BUILTIN_CMPPD256,
27612 IX86_BUILTIN_CMPPS256,
27613 IX86_BUILTIN_CVTDQ2PD256,
27614 IX86_BUILTIN_CVTDQ2PS256,
27615 IX86_BUILTIN_CVTPD2PS256,
27616 IX86_BUILTIN_CVTPS2DQ256,
27617 IX86_BUILTIN_CVTPS2PD256,
27618 IX86_BUILTIN_CVTTPD2DQ256,
27619 IX86_BUILTIN_CVTPD2DQ256,
27620 IX86_BUILTIN_CVTTPS2DQ256,
27621 IX86_BUILTIN_EXTRACTF128PD256,
27622 IX86_BUILTIN_EXTRACTF128PS256,
27623 IX86_BUILTIN_EXTRACTF128SI256,
27624 IX86_BUILTIN_VZEROALL,
27625 IX86_BUILTIN_VZEROUPPER,
27626 IX86_BUILTIN_VPERMILVARPD,
27627 IX86_BUILTIN_VPERMILVARPS,
27628 IX86_BUILTIN_VPERMILVARPD256,
27629 IX86_BUILTIN_VPERMILVARPS256,
27630 IX86_BUILTIN_VPERMILPD,
27631 IX86_BUILTIN_VPERMILPS,
27632 IX86_BUILTIN_VPERMILPD256,
27633 IX86_BUILTIN_VPERMILPS256,
27634 IX86_BUILTIN_VPERMIL2PD,
27635 IX86_BUILTIN_VPERMIL2PS,
27636 IX86_BUILTIN_VPERMIL2PD256,
27637 IX86_BUILTIN_VPERMIL2PS256,
27638 IX86_BUILTIN_VPERM2F128PD256,
27639 IX86_BUILTIN_VPERM2F128PS256,
27640 IX86_BUILTIN_VPERM2F128SI256,
27641 IX86_BUILTIN_VBROADCASTSS,
27642 IX86_BUILTIN_VBROADCASTSD256,
27643 IX86_BUILTIN_VBROADCASTSS256,
27644 IX86_BUILTIN_VBROADCASTPD256,
27645 IX86_BUILTIN_VBROADCASTPS256,
27646 IX86_BUILTIN_VINSERTF128PD256,
27647 IX86_BUILTIN_VINSERTF128PS256,
27648 IX86_BUILTIN_VINSERTF128SI256,
27649 IX86_BUILTIN_LOADUPD256,
27650 IX86_BUILTIN_LOADUPS256,
27651 IX86_BUILTIN_STOREUPD256,
27652 IX86_BUILTIN_STOREUPS256,
27653 IX86_BUILTIN_LDDQU256,
27654 IX86_BUILTIN_MOVNTDQ256,
27655 IX86_BUILTIN_MOVNTPD256,
27656 IX86_BUILTIN_MOVNTPS256,
27657 IX86_BUILTIN_LOADDQU256,
27658 IX86_BUILTIN_STOREDQU256,
27659 IX86_BUILTIN_MASKLOADPD,
27660 IX86_BUILTIN_MASKLOADPS,
27661 IX86_BUILTIN_MASKSTOREPD,
27662 IX86_BUILTIN_MASKSTOREPS,
27663 IX86_BUILTIN_MASKLOADPD256,
27664 IX86_BUILTIN_MASKLOADPS256,
27665 IX86_BUILTIN_MASKSTOREPD256,
27666 IX86_BUILTIN_MASKSTOREPS256,
27667 IX86_BUILTIN_MOVSHDUP256,
27668 IX86_BUILTIN_MOVSLDUP256,
27669 IX86_BUILTIN_MOVDDUP256,
27670
27671 IX86_BUILTIN_SQRTPD256,
27672 IX86_BUILTIN_SQRTPS256,
27673 IX86_BUILTIN_SQRTPS_NR256,
27674 IX86_BUILTIN_RSQRTPS256,
27675 IX86_BUILTIN_RSQRTPS_NR256,
27676
27677 IX86_BUILTIN_RCPPS256,
27678
27679 IX86_BUILTIN_ROUNDPD256,
27680 IX86_BUILTIN_ROUNDPS256,
27681
27682 IX86_BUILTIN_FLOORPD256,
27683 IX86_BUILTIN_CEILPD256,
27684 IX86_BUILTIN_TRUNCPD256,
27685 IX86_BUILTIN_RINTPD256,
27686 IX86_BUILTIN_ROUNDPD_AZ256,
27687
27688 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
27689 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
27690 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
27691
27692 IX86_BUILTIN_FLOORPS256,
27693 IX86_BUILTIN_CEILPS256,
27694 IX86_BUILTIN_TRUNCPS256,
27695 IX86_BUILTIN_RINTPS256,
27696 IX86_BUILTIN_ROUNDPS_AZ256,
27697
27698 IX86_BUILTIN_FLOORPS_SFIX256,
27699 IX86_BUILTIN_CEILPS_SFIX256,
27700 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
27701
27702 IX86_BUILTIN_UNPCKHPD256,
27703 IX86_BUILTIN_UNPCKLPD256,
27704 IX86_BUILTIN_UNPCKHPS256,
27705 IX86_BUILTIN_UNPCKLPS256,
27706
27707 IX86_BUILTIN_SI256_SI,
27708 IX86_BUILTIN_PS256_PS,
27709 IX86_BUILTIN_PD256_PD,
27710 IX86_BUILTIN_SI_SI256,
27711 IX86_BUILTIN_PS_PS256,
27712 IX86_BUILTIN_PD_PD256,
27713
27714 IX86_BUILTIN_VTESTZPD,
27715 IX86_BUILTIN_VTESTCPD,
27716 IX86_BUILTIN_VTESTNZCPD,
27717 IX86_BUILTIN_VTESTZPS,
27718 IX86_BUILTIN_VTESTCPS,
27719 IX86_BUILTIN_VTESTNZCPS,
27720 IX86_BUILTIN_VTESTZPD256,
27721 IX86_BUILTIN_VTESTCPD256,
27722 IX86_BUILTIN_VTESTNZCPD256,
27723 IX86_BUILTIN_VTESTZPS256,
27724 IX86_BUILTIN_VTESTCPS256,
27725 IX86_BUILTIN_VTESTNZCPS256,
27726 IX86_BUILTIN_PTESTZ256,
27727 IX86_BUILTIN_PTESTC256,
27728 IX86_BUILTIN_PTESTNZC256,
27729
27730 IX86_BUILTIN_MOVMSKPD256,
27731 IX86_BUILTIN_MOVMSKPS256,
27732
27733 /* AVX2 */
27734 IX86_BUILTIN_MPSADBW256,
27735 IX86_BUILTIN_PABSB256,
27736 IX86_BUILTIN_PABSW256,
27737 IX86_BUILTIN_PABSD256,
27738 IX86_BUILTIN_PACKSSDW256,
27739 IX86_BUILTIN_PACKSSWB256,
27740 IX86_BUILTIN_PACKUSDW256,
27741 IX86_BUILTIN_PACKUSWB256,
27742 IX86_BUILTIN_PADDB256,
27743 IX86_BUILTIN_PADDW256,
27744 IX86_BUILTIN_PADDD256,
27745 IX86_BUILTIN_PADDQ256,
27746 IX86_BUILTIN_PADDSB256,
27747 IX86_BUILTIN_PADDSW256,
27748 IX86_BUILTIN_PADDUSB256,
27749 IX86_BUILTIN_PADDUSW256,
27750 IX86_BUILTIN_PALIGNR256,
27751 IX86_BUILTIN_AND256I,
27752 IX86_BUILTIN_ANDNOT256I,
27753 IX86_BUILTIN_PAVGB256,
27754 IX86_BUILTIN_PAVGW256,
27755 IX86_BUILTIN_PBLENDVB256,
27756 IX86_BUILTIN_PBLENDVW256,
27757 IX86_BUILTIN_PCMPEQB256,
27758 IX86_BUILTIN_PCMPEQW256,
27759 IX86_BUILTIN_PCMPEQD256,
27760 IX86_BUILTIN_PCMPEQQ256,
27761 IX86_BUILTIN_PCMPGTB256,
27762 IX86_BUILTIN_PCMPGTW256,
27763 IX86_BUILTIN_PCMPGTD256,
27764 IX86_BUILTIN_PCMPGTQ256,
27765 IX86_BUILTIN_PHADDW256,
27766 IX86_BUILTIN_PHADDD256,
27767 IX86_BUILTIN_PHADDSW256,
27768 IX86_BUILTIN_PHSUBW256,
27769 IX86_BUILTIN_PHSUBD256,
27770 IX86_BUILTIN_PHSUBSW256,
27771 IX86_BUILTIN_PMADDUBSW256,
27772 IX86_BUILTIN_PMADDWD256,
27773 IX86_BUILTIN_PMAXSB256,
27774 IX86_BUILTIN_PMAXSW256,
27775 IX86_BUILTIN_PMAXSD256,
27776 IX86_BUILTIN_PMAXUB256,
27777 IX86_BUILTIN_PMAXUW256,
27778 IX86_BUILTIN_PMAXUD256,
27779 IX86_BUILTIN_PMINSB256,
27780 IX86_BUILTIN_PMINSW256,
27781 IX86_BUILTIN_PMINSD256,
27782 IX86_BUILTIN_PMINUB256,
27783 IX86_BUILTIN_PMINUW256,
27784 IX86_BUILTIN_PMINUD256,
27785 IX86_BUILTIN_PMOVMSKB256,
27786 IX86_BUILTIN_PMOVSXBW256,
27787 IX86_BUILTIN_PMOVSXBD256,
27788 IX86_BUILTIN_PMOVSXBQ256,
27789 IX86_BUILTIN_PMOVSXWD256,
27790 IX86_BUILTIN_PMOVSXWQ256,
27791 IX86_BUILTIN_PMOVSXDQ256,
27792 IX86_BUILTIN_PMOVZXBW256,
27793 IX86_BUILTIN_PMOVZXBD256,
27794 IX86_BUILTIN_PMOVZXBQ256,
27795 IX86_BUILTIN_PMOVZXWD256,
27796 IX86_BUILTIN_PMOVZXWQ256,
27797 IX86_BUILTIN_PMOVZXDQ256,
27798 IX86_BUILTIN_PMULDQ256,
27799 IX86_BUILTIN_PMULHRSW256,
27800 IX86_BUILTIN_PMULHUW256,
27801 IX86_BUILTIN_PMULHW256,
27802 IX86_BUILTIN_PMULLW256,
27803 IX86_BUILTIN_PMULLD256,
27804 IX86_BUILTIN_PMULUDQ256,
27805 IX86_BUILTIN_POR256,
27806 IX86_BUILTIN_PSADBW256,
27807 IX86_BUILTIN_PSHUFB256,
27808 IX86_BUILTIN_PSHUFD256,
27809 IX86_BUILTIN_PSHUFHW256,
27810 IX86_BUILTIN_PSHUFLW256,
27811 IX86_BUILTIN_PSIGNB256,
27812 IX86_BUILTIN_PSIGNW256,
27813 IX86_BUILTIN_PSIGND256,
27814 IX86_BUILTIN_PSLLDQI256,
27815 IX86_BUILTIN_PSLLWI256,
27816 IX86_BUILTIN_PSLLW256,
27817 IX86_BUILTIN_PSLLDI256,
27818 IX86_BUILTIN_PSLLD256,
27819 IX86_BUILTIN_PSLLQI256,
27820 IX86_BUILTIN_PSLLQ256,
27821 IX86_BUILTIN_PSRAWI256,
27822 IX86_BUILTIN_PSRAW256,
27823 IX86_BUILTIN_PSRADI256,
27824 IX86_BUILTIN_PSRAD256,
27825 IX86_BUILTIN_PSRLDQI256,
27826 IX86_BUILTIN_PSRLWI256,
27827 IX86_BUILTIN_PSRLW256,
27828 IX86_BUILTIN_PSRLDI256,
27829 IX86_BUILTIN_PSRLD256,
27830 IX86_BUILTIN_PSRLQI256,
27831 IX86_BUILTIN_PSRLQ256,
27832 IX86_BUILTIN_PSUBB256,
27833 IX86_BUILTIN_PSUBW256,
27834 IX86_BUILTIN_PSUBD256,
27835 IX86_BUILTIN_PSUBQ256,
27836 IX86_BUILTIN_PSUBSB256,
27837 IX86_BUILTIN_PSUBSW256,
27838 IX86_BUILTIN_PSUBUSB256,
27839 IX86_BUILTIN_PSUBUSW256,
27840 IX86_BUILTIN_PUNPCKHBW256,
27841 IX86_BUILTIN_PUNPCKHWD256,
27842 IX86_BUILTIN_PUNPCKHDQ256,
27843 IX86_BUILTIN_PUNPCKHQDQ256,
27844 IX86_BUILTIN_PUNPCKLBW256,
27845 IX86_BUILTIN_PUNPCKLWD256,
27846 IX86_BUILTIN_PUNPCKLDQ256,
27847 IX86_BUILTIN_PUNPCKLQDQ256,
27848 IX86_BUILTIN_PXOR256,
27849 IX86_BUILTIN_MOVNTDQA256,
27850 IX86_BUILTIN_VBROADCASTSS_PS,
27851 IX86_BUILTIN_VBROADCASTSS_PS256,
27852 IX86_BUILTIN_VBROADCASTSD_PD256,
27853 IX86_BUILTIN_VBROADCASTSI256,
27854 IX86_BUILTIN_PBLENDD256,
27855 IX86_BUILTIN_PBLENDD128,
27856 IX86_BUILTIN_PBROADCASTB256,
27857 IX86_BUILTIN_PBROADCASTW256,
27858 IX86_BUILTIN_PBROADCASTD256,
27859 IX86_BUILTIN_PBROADCASTQ256,
27860 IX86_BUILTIN_PBROADCASTB128,
27861 IX86_BUILTIN_PBROADCASTW128,
27862 IX86_BUILTIN_PBROADCASTD128,
27863 IX86_BUILTIN_PBROADCASTQ128,
27864 IX86_BUILTIN_VPERMVARSI256,
27865 IX86_BUILTIN_VPERMDF256,
27866 IX86_BUILTIN_VPERMVARSF256,
27867 IX86_BUILTIN_VPERMDI256,
27868 IX86_BUILTIN_VPERMTI256,
27869 IX86_BUILTIN_VEXTRACT128I256,
27870 IX86_BUILTIN_VINSERT128I256,
27871 IX86_BUILTIN_MASKLOADD,
27872 IX86_BUILTIN_MASKLOADQ,
27873 IX86_BUILTIN_MASKLOADD256,
27874 IX86_BUILTIN_MASKLOADQ256,
27875 IX86_BUILTIN_MASKSTORED,
27876 IX86_BUILTIN_MASKSTOREQ,
27877 IX86_BUILTIN_MASKSTORED256,
27878 IX86_BUILTIN_MASKSTOREQ256,
27879 IX86_BUILTIN_PSLLVV4DI,
27880 IX86_BUILTIN_PSLLVV2DI,
27881 IX86_BUILTIN_PSLLVV8SI,
27882 IX86_BUILTIN_PSLLVV4SI,
27883 IX86_BUILTIN_PSRAVV8SI,
27884 IX86_BUILTIN_PSRAVV4SI,
27885 IX86_BUILTIN_PSRLVV4DI,
27886 IX86_BUILTIN_PSRLVV2DI,
27887 IX86_BUILTIN_PSRLVV8SI,
27888 IX86_BUILTIN_PSRLVV4SI,
27889
27890 IX86_BUILTIN_GATHERSIV2DF,
27891 IX86_BUILTIN_GATHERSIV4DF,
27892 IX86_BUILTIN_GATHERDIV2DF,
27893 IX86_BUILTIN_GATHERDIV4DF,
27894 IX86_BUILTIN_GATHERSIV4SF,
27895 IX86_BUILTIN_GATHERSIV8SF,
27896 IX86_BUILTIN_GATHERDIV4SF,
27897 IX86_BUILTIN_GATHERDIV8SF,
27898 IX86_BUILTIN_GATHERSIV2DI,
27899 IX86_BUILTIN_GATHERSIV4DI,
27900 IX86_BUILTIN_GATHERDIV2DI,
27901 IX86_BUILTIN_GATHERDIV4DI,
27902 IX86_BUILTIN_GATHERSIV4SI,
27903 IX86_BUILTIN_GATHERSIV8SI,
27904 IX86_BUILTIN_GATHERDIV4SI,
27905 IX86_BUILTIN_GATHERDIV8SI,
27906
27907 /* AVX512F */
27908 IX86_BUILTIN_ADDPD512,
27909 IX86_BUILTIN_ADDPS512,
27910 IX86_BUILTIN_ADDSD_ROUND,
27911 IX86_BUILTIN_ADDSS_ROUND,
27912 IX86_BUILTIN_ALIGND512,
27913 IX86_BUILTIN_ALIGNQ512,
27914 IX86_BUILTIN_BLENDMD512,
27915 IX86_BUILTIN_BLENDMPD512,
27916 IX86_BUILTIN_BLENDMPS512,
27917 IX86_BUILTIN_BLENDMQ512,
27918 IX86_BUILTIN_BROADCASTF32X4_512,
27919 IX86_BUILTIN_BROADCASTF64X4_512,
27920 IX86_BUILTIN_BROADCASTI32X4_512,
27921 IX86_BUILTIN_BROADCASTI64X4_512,
27922 IX86_BUILTIN_BROADCASTSD512,
27923 IX86_BUILTIN_BROADCASTSS512,
27924 IX86_BUILTIN_CMPD512,
27925 IX86_BUILTIN_CMPPD512,
27926 IX86_BUILTIN_CMPPS512,
27927 IX86_BUILTIN_CMPQ512,
27928 IX86_BUILTIN_CMPSD_MASK,
27929 IX86_BUILTIN_CMPSS_MASK,
27930 IX86_BUILTIN_COMIDF,
27931 IX86_BUILTIN_COMISF,
27932 IX86_BUILTIN_COMPRESSPD512,
27933 IX86_BUILTIN_COMPRESSPDSTORE512,
27934 IX86_BUILTIN_COMPRESSPS512,
27935 IX86_BUILTIN_COMPRESSPSSTORE512,
27936 IX86_BUILTIN_CVTDQ2PD512,
27937 IX86_BUILTIN_CVTDQ2PS512,
27938 IX86_BUILTIN_CVTPD2DQ512,
27939 IX86_BUILTIN_CVTPD2PS512,
27940 IX86_BUILTIN_CVTPD2UDQ512,
27941 IX86_BUILTIN_CVTPH2PS512,
27942 IX86_BUILTIN_CVTPS2DQ512,
27943 IX86_BUILTIN_CVTPS2PD512,
27944 IX86_BUILTIN_CVTPS2PH512,
27945 IX86_BUILTIN_CVTPS2UDQ512,
27946 IX86_BUILTIN_CVTSD2SS_ROUND,
27947 IX86_BUILTIN_CVTSI2SD64,
27948 IX86_BUILTIN_CVTSI2SS32,
27949 IX86_BUILTIN_CVTSI2SS64,
27950 IX86_BUILTIN_CVTSS2SD_ROUND,
27951 IX86_BUILTIN_CVTTPD2DQ512,
27952 IX86_BUILTIN_CVTTPD2UDQ512,
27953 IX86_BUILTIN_CVTTPS2DQ512,
27954 IX86_BUILTIN_CVTTPS2UDQ512,
27955 IX86_BUILTIN_CVTUDQ2PD512,
27956 IX86_BUILTIN_CVTUDQ2PS512,
27957 IX86_BUILTIN_CVTUSI2SD32,
27958 IX86_BUILTIN_CVTUSI2SD64,
27959 IX86_BUILTIN_CVTUSI2SS32,
27960 IX86_BUILTIN_CVTUSI2SS64,
27961 IX86_BUILTIN_DIVPD512,
27962 IX86_BUILTIN_DIVPS512,
27963 IX86_BUILTIN_DIVSD_ROUND,
27964 IX86_BUILTIN_DIVSS_ROUND,
27965 IX86_BUILTIN_EXPANDPD512,
27966 IX86_BUILTIN_EXPANDPD512Z,
27967 IX86_BUILTIN_EXPANDPDLOAD512,
27968 IX86_BUILTIN_EXPANDPDLOAD512Z,
27969 IX86_BUILTIN_EXPANDPS512,
27970 IX86_BUILTIN_EXPANDPS512Z,
27971 IX86_BUILTIN_EXPANDPSLOAD512,
27972 IX86_BUILTIN_EXPANDPSLOAD512Z,
27973 IX86_BUILTIN_EXTRACTF32X4,
27974 IX86_BUILTIN_EXTRACTF64X4,
27975 IX86_BUILTIN_EXTRACTI32X4,
27976 IX86_BUILTIN_EXTRACTI64X4,
27977 IX86_BUILTIN_FIXUPIMMPD512_MASK,
27978 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
27979 IX86_BUILTIN_FIXUPIMMPS512_MASK,
27980 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
27981 IX86_BUILTIN_FIXUPIMMSD128_MASK,
27982 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
27983 IX86_BUILTIN_FIXUPIMMSS128_MASK,
27984 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
27985 IX86_BUILTIN_GETEXPPD512,
27986 IX86_BUILTIN_GETEXPPS512,
27987 IX86_BUILTIN_GETEXPSD128,
27988 IX86_BUILTIN_GETEXPSS128,
27989 IX86_BUILTIN_GETMANTPD512,
27990 IX86_BUILTIN_GETMANTPS512,
27991 IX86_BUILTIN_GETMANTSD128,
27992 IX86_BUILTIN_GETMANTSS128,
27993 IX86_BUILTIN_INSERTF32X4,
27994 IX86_BUILTIN_INSERTF64X4,
27995 IX86_BUILTIN_INSERTI32X4,
27996 IX86_BUILTIN_INSERTI64X4,
27997 IX86_BUILTIN_LOADAPD512,
27998 IX86_BUILTIN_LOADAPS512,
27999 IX86_BUILTIN_LOADDQUDI512,
28000 IX86_BUILTIN_LOADDQUSI512,
28001 IX86_BUILTIN_LOADUPD512,
28002 IX86_BUILTIN_LOADUPS512,
28003 IX86_BUILTIN_MAXPD512,
28004 IX86_BUILTIN_MAXPS512,
28005 IX86_BUILTIN_MAXSD_ROUND,
28006 IX86_BUILTIN_MAXSS_ROUND,
28007 IX86_BUILTIN_MINPD512,
28008 IX86_BUILTIN_MINPS512,
28009 IX86_BUILTIN_MINSD_ROUND,
28010 IX86_BUILTIN_MINSS_ROUND,
28011 IX86_BUILTIN_MOVAPD512,
28012 IX86_BUILTIN_MOVAPS512,
28013 IX86_BUILTIN_MOVDDUP512,
28014 IX86_BUILTIN_MOVDQA32LOAD512,
28015 IX86_BUILTIN_MOVDQA32STORE512,
28016 IX86_BUILTIN_MOVDQA32_512,
28017 IX86_BUILTIN_MOVDQA64LOAD512,
28018 IX86_BUILTIN_MOVDQA64STORE512,
28019 IX86_BUILTIN_MOVDQA64_512,
28020 IX86_BUILTIN_MOVNTDQ512,
28021 IX86_BUILTIN_MOVNTDQA512,
28022 IX86_BUILTIN_MOVNTPD512,
28023 IX86_BUILTIN_MOVNTPS512,
28024 IX86_BUILTIN_MOVSHDUP512,
28025 IX86_BUILTIN_MOVSLDUP512,
28026 IX86_BUILTIN_MULPD512,
28027 IX86_BUILTIN_MULPS512,
28028 IX86_BUILTIN_MULSD_ROUND,
28029 IX86_BUILTIN_MULSS_ROUND,
28030 IX86_BUILTIN_PABSD512,
28031 IX86_BUILTIN_PABSQ512,
28032 IX86_BUILTIN_PADDD512,
28033 IX86_BUILTIN_PADDQ512,
28034 IX86_BUILTIN_PANDD512,
28035 IX86_BUILTIN_PANDND512,
28036 IX86_BUILTIN_PANDNQ512,
28037 IX86_BUILTIN_PANDQ512,
28038 IX86_BUILTIN_PBROADCASTD512,
28039 IX86_BUILTIN_PBROADCASTD512_GPR,
28040 IX86_BUILTIN_PBROADCASTMB512,
28041 IX86_BUILTIN_PBROADCASTMW512,
28042 IX86_BUILTIN_PBROADCASTQ512,
28043 IX86_BUILTIN_PBROADCASTQ512_GPR,
28044 IX86_BUILTIN_PBROADCASTQ512_MEM,
28045 IX86_BUILTIN_PCMPEQD512_MASK,
28046 IX86_BUILTIN_PCMPEQQ512_MASK,
28047 IX86_BUILTIN_PCMPGTD512_MASK,
28048 IX86_BUILTIN_PCMPGTQ512_MASK,
28049 IX86_BUILTIN_PCOMPRESSD512,
28050 IX86_BUILTIN_PCOMPRESSDSTORE512,
28051 IX86_BUILTIN_PCOMPRESSQ512,
28052 IX86_BUILTIN_PCOMPRESSQSTORE512,
28053 IX86_BUILTIN_PEXPANDD512,
28054 IX86_BUILTIN_PEXPANDD512Z,
28055 IX86_BUILTIN_PEXPANDDLOAD512,
28056 IX86_BUILTIN_PEXPANDDLOAD512Z,
28057 IX86_BUILTIN_PEXPANDQ512,
28058 IX86_BUILTIN_PEXPANDQ512Z,
28059 IX86_BUILTIN_PEXPANDQLOAD512,
28060 IX86_BUILTIN_PEXPANDQLOAD512Z,
28061 IX86_BUILTIN_PMAXSD512,
28062 IX86_BUILTIN_PMAXSQ512,
28063 IX86_BUILTIN_PMAXUD512,
28064 IX86_BUILTIN_PMAXUQ512,
28065 IX86_BUILTIN_PMINSD512,
28066 IX86_BUILTIN_PMINSQ512,
28067 IX86_BUILTIN_PMINUD512,
28068 IX86_BUILTIN_PMINUQ512,
28069 IX86_BUILTIN_PMOVDB512,
28070 IX86_BUILTIN_PMOVDW512,
28071 IX86_BUILTIN_PMOVQB512,
28072 IX86_BUILTIN_PMOVQD512,
28073 IX86_BUILTIN_PMOVQW512,
28074 IX86_BUILTIN_PMOVSDB512,
28075 IX86_BUILTIN_PMOVSDW512,
28076 IX86_BUILTIN_PMOVSQB512,
28077 IX86_BUILTIN_PMOVSQD512,
28078 IX86_BUILTIN_PMOVSQW512,
28079 IX86_BUILTIN_PMOVSXBD512,
28080 IX86_BUILTIN_PMOVSXBQ512,
28081 IX86_BUILTIN_PMOVSXDQ512,
28082 IX86_BUILTIN_PMOVSXWD512,
28083 IX86_BUILTIN_PMOVSXWQ512,
28084 IX86_BUILTIN_PMOVUSDB512,
28085 IX86_BUILTIN_PMOVUSDW512,
28086 IX86_BUILTIN_PMOVUSQB512,
28087 IX86_BUILTIN_PMOVUSQD512,
28088 IX86_BUILTIN_PMOVUSQW512,
28089 IX86_BUILTIN_PMOVZXBD512,
28090 IX86_BUILTIN_PMOVZXBQ512,
28091 IX86_BUILTIN_PMOVZXDQ512,
28092 IX86_BUILTIN_PMOVZXWD512,
28093 IX86_BUILTIN_PMOVZXWQ512,
28094 IX86_BUILTIN_PMULDQ512,
28095 IX86_BUILTIN_PMULLD512,
28096 IX86_BUILTIN_PMULUDQ512,
28097 IX86_BUILTIN_PORD512,
28098 IX86_BUILTIN_PORQ512,
28099 IX86_BUILTIN_PROLD512,
28100 IX86_BUILTIN_PROLQ512,
28101 IX86_BUILTIN_PROLVD512,
28102 IX86_BUILTIN_PROLVQ512,
28103 IX86_BUILTIN_PRORD512,
28104 IX86_BUILTIN_PRORQ512,
28105 IX86_BUILTIN_PRORVD512,
28106 IX86_BUILTIN_PRORVQ512,
28107 IX86_BUILTIN_PSHUFD512,
28108 IX86_BUILTIN_PSLLD512,
28109 IX86_BUILTIN_PSLLDI512,
28110 IX86_BUILTIN_PSLLQ512,
28111 IX86_BUILTIN_PSLLQI512,
28112 IX86_BUILTIN_PSLLVV16SI,
28113 IX86_BUILTIN_PSLLVV8DI,
28114 IX86_BUILTIN_PSRAD512,
28115 IX86_BUILTIN_PSRADI512,
28116 IX86_BUILTIN_PSRAQ512,
28117 IX86_BUILTIN_PSRAQI512,
28118 IX86_BUILTIN_PSRAVV16SI,
28119 IX86_BUILTIN_PSRAVV8DI,
28120 IX86_BUILTIN_PSRLD512,
28121 IX86_BUILTIN_PSRLDI512,
28122 IX86_BUILTIN_PSRLQ512,
28123 IX86_BUILTIN_PSRLQI512,
28124 IX86_BUILTIN_PSRLVV16SI,
28125 IX86_BUILTIN_PSRLVV8DI,
28126 IX86_BUILTIN_PSUBD512,
28127 IX86_BUILTIN_PSUBQ512,
28128 IX86_BUILTIN_PTESTMD512,
28129 IX86_BUILTIN_PTESTMQ512,
28130 IX86_BUILTIN_PTESTNMD512,
28131 IX86_BUILTIN_PTESTNMQ512,
28132 IX86_BUILTIN_PUNPCKHDQ512,
28133 IX86_BUILTIN_PUNPCKHQDQ512,
28134 IX86_BUILTIN_PUNPCKLDQ512,
28135 IX86_BUILTIN_PUNPCKLQDQ512,
28136 IX86_BUILTIN_PXORD512,
28137 IX86_BUILTIN_PXORQ512,
28138 IX86_BUILTIN_RCP14PD512,
28139 IX86_BUILTIN_RCP14PS512,
28140 IX86_BUILTIN_RCP14SD,
28141 IX86_BUILTIN_RCP14SS,
28142 IX86_BUILTIN_RNDSCALEPD,
28143 IX86_BUILTIN_RNDSCALEPS,
28144 IX86_BUILTIN_RNDSCALESD,
28145 IX86_BUILTIN_RNDSCALESS,
28146 IX86_BUILTIN_RSQRT14PD512,
28147 IX86_BUILTIN_RSQRT14PS512,
28148 IX86_BUILTIN_RSQRT14SD,
28149 IX86_BUILTIN_RSQRT14SS,
28150 IX86_BUILTIN_SCALEFPD512,
28151 IX86_BUILTIN_SCALEFPS512,
28152 IX86_BUILTIN_SCALEFSD,
28153 IX86_BUILTIN_SCALEFSS,
28154 IX86_BUILTIN_SHUFPD512,
28155 IX86_BUILTIN_SHUFPS512,
28156 IX86_BUILTIN_SHUF_F32x4,
28157 IX86_BUILTIN_SHUF_F64x2,
28158 IX86_BUILTIN_SHUF_I32x4,
28159 IX86_BUILTIN_SHUF_I64x2,
28160 IX86_BUILTIN_SQRTPD512,
28161 IX86_BUILTIN_SQRTPD512_MASK,
28162 IX86_BUILTIN_SQRTPS512_MASK,
28163 IX86_BUILTIN_SQRTPS_NR512,
28164 IX86_BUILTIN_SQRTSD_ROUND,
28165 IX86_BUILTIN_SQRTSS_ROUND,
28166 IX86_BUILTIN_STOREAPD512,
28167 IX86_BUILTIN_STOREAPS512,
28168 IX86_BUILTIN_STOREDQUDI512,
28169 IX86_BUILTIN_STOREDQUSI512,
28170 IX86_BUILTIN_STOREUPD512,
28171 IX86_BUILTIN_STOREUPS512,
28172 IX86_BUILTIN_SUBPD512,
28173 IX86_BUILTIN_SUBPS512,
28174 IX86_BUILTIN_SUBSD_ROUND,
28175 IX86_BUILTIN_SUBSS_ROUND,
28176 IX86_BUILTIN_UCMPD512,
28177 IX86_BUILTIN_UCMPQ512,
28178 IX86_BUILTIN_UNPCKHPD512,
28179 IX86_BUILTIN_UNPCKHPS512,
28180 IX86_BUILTIN_UNPCKLPD512,
28181 IX86_BUILTIN_UNPCKLPS512,
28182 IX86_BUILTIN_VCVTSD2SI32,
28183 IX86_BUILTIN_VCVTSD2SI64,
28184 IX86_BUILTIN_VCVTSD2USI32,
28185 IX86_BUILTIN_VCVTSD2USI64,
28186 IX86_BUILTIN_VCVTSS2SI32,
28187 IX86_BUILTIN_VCVTSS2SI64,
28188 IX86_BUILTIN_VCVTSS2USI32,
28189 IX86_BUILTIN_VCVTSS2USI64,
28190 IX86_BUILTIN_VCVTTSD2SI32,
28191 IX86_BUILTIN_VCVTTSD2SI64,
28192 IX86_BUILTIN_VCVTTSD2USI32,
28193 IX86_BUILTIN_VCVTTSD2USI64,
28194 IX86_BUILTIN_VCVTTSS2SI32,
28195 IX86_BUILTIN_VCVTTSS2SI64,
28196 IX86_BUILTIN_VCVTTSS2USI32,
28197 IX86_BUILTIN_VCVTTSS2USI64,
28198 IX86_BUILTIN_VFMADDPD512_MASK,
28199 IX86_BUILTIN_VFMADDPD512_MASK3,
28200 IX86_BUILTIN_VFMADDPD512_MASKZ,
28201 IX86_BUILTIN_VFMADDPS512_MASK,
28202 IX86_BUILTIN_VFMADDPS512_MASK3,
28203 IX86_BUILTIN_VFMADDPS512_MASKZ,
28204 IX86_BUILTIN_VFMADDSD3_ROUND,
28205 IX86_BUILTIN_VFMADDSS3_ROUND,
28206 IX86_BUILTIN_VFMADDSUBPD512_MASK,
28207 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
28208 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
28209 IX86_BUILTIN_VFMADDSUBPS512_MASK,
28210 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
28211 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
28212 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
28213 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
28214 IX86_BUILTIN_VFMSUBPD512_MASK3,
28215 IX86_BUILTIN_VFMSUBPS512_MASK3,
28216 IX86_BUILTIN_VFMSUBSD3_MASK3,
28217 IX86_BUILTIN_VFMSUBSS3_MASK3,
28218 IX86_BUILTIN_VFNMADDPD512_MASK,
28219 IX86_BUILTIN_VFNMADDPS512_MASK,
28220 IX86_BUILTIN_VFNMSUBPD512_MASK,
28221 IX86_BUILTIN_VFNMSUBPD512_MASK3,
28222 IX86_BUILTIN_VFNMSUBPS512_MASK,
28223 IX86_BUILTIN_VFNMSUBPS512_MASK3,
28224 IX86_BUILTIN_VPCLZCNTD512,
28225 IX86_BUILTIN_VPCLZCNTQ512,
28226 IX86_BUILTIN_VPCONFLICTD512,
28227 IX86_BUILTIN_VPCONFLICTQ512,
28228 IX86_BUILTIN_VPERMDF512,
28229 IX86_BUILTIN_VPERMDI512,
28230 IX86_BUILTIN_VPERMI2VARD512,
28231 IX86_BUILTIN_VPERMI2VARPD512,
28232 IX86_BUILTIN_VPERMI2VARPS512,
28233 IX86_BUILTIN_VPERMI2VARQ512,
28234 IX86_BUILTIN_VPERMILPD512,
28235 IX86_BUILTIN_VPERMILPS512,
28236 IX86_BUILTIN_VPERMILVARPD512,
28237 IX86_BUILTIN_VPERMILVARPS512,
28238 IX86_BUILTIN_VPERMT2VARD512,
28239 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
28240 IX86_BUILTIN_VPERMT2VARPD512,
28241 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
28242 IX86_BUILTIN_VPERMT2VARPS512,
28243 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
28244 IX86_BUILTIN_VPERMT2VARQ512,
28245 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
28246 IX86_BUILTIN_VPERMVARDF512,
28247 IX86_BUILTIN_VPERMVARDI512,
28248 IX86_BUILTIN_VPERMVARSF512,
28249 IX86_BUILTIN_VPERMVARSI512,
28250 IX86_BUILTIN_VTERNLOGD512_MASK,
28251 IX86_BUILTIN_VTERNLOGD512_MASKZ,
28252 IX86_BUILTIN_VTERNLOGQ512_MASK,
28253 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
28254
28255 /* Mask arithmetic operations */
28256 IX86_BUILTIN_KAND16,
28257 IX86_BUILTIN_KANDN16,
28258 IX86_BUILTIN_KNOT16,
28259 IX86_BUILTIN_KOR16,
28260 IX86_BUILTIN_KORTESTC16,
28261 IX86_BUILTIN_KORTESTZ16,
28262 IX86_BUILTIN_KUNPCKBW,
28263 IX86_BUILTIN_KXNOR16,
28264 IX86_BUILTIN_KXOR16,
28265
28266 /* Alternate 4 and 8 element gather/scatter for the vectorizer
28267 where all operands are 32-byte or 64-byte wide respectively. */
28268 IX86_BUILTIN_GATHERALTSIV4DF,
28269 IX86_BUILTIN_GATHERALTDIV8SF,
28270 IX86_BUILTIN_GATHERALTSIV4DI,
28271 IX86_BUILTIN_GATHERALTDIV8SI,
28272 IX86_BUILTIN_GATHER3ALTDIV16SF,
28273 IX86_BUILTIN_GATHER3ALTDIV16SI,
28274 IX86_BUILTIN_GATHER3ALTSIV8DF,
28275 IX86_BUILTIN_GATHER3ALTSIV8DI,
28276 IX86_BUILTIN_GATHER3DIV16SF,
28277 IX86_BUILTIN_GATHER3DIV16SI,
28278 IX86_BUILTIN_GATHER3DIV8DF,
28279 IX86_BUILTIN_GATHER3DIV8DI,
28280 IX86_BUILTIN_GATHER3SIV16SF,
28281 IX86_BUILTIN_GATHER3SIV16SI,
28282 IX86_BUILTIN_GATHER3SIV8DF,
28283 IX86_BUILTIN_GATHER3SIV8DI,
28284 IX86_BUILTIN_SCATTERDIV16SF,
28285 IX86_BUILTIN_SCATTERDIV16SI,
28286 IX86_BUILTIN_SCATTERDIV8DF,
28287 IX86_BUILTIN_SCATTERDIV8DI,
28288 IX86_BUILTIN_SCATTERSIV16SF,
28289 IX86_BUILTIN_SCATTERSIV16SI,
28290 IX86_BUILTIN_SCATTERSIV8DF,
28291 IX86_BUILTIN_SCATTERSIV8DI,
28292
28293 /* AVX512PF */
28294 IX86_BUILTIN_GATHERPFDPS,
28295 IX86_BUILTIN_GATHERPFQPS,
28296 IX86_BUILTIN_SCATTERPFDPS,
28297 IX86_BUILTIN_SCATTERPFQPS,
28298
28299 /* AVX-512ER */
28300 IX86_BUILTIN_EXP2PD_MASK,
28301 IX86_BUILTIN_EXP2PS_MASK,
28302 IX86_BUILTIN_EXP2PS,
28303 IX86_BUILTIN_RCP28PD,
28304 IX86_BUILTIN_RCP28PS,
28305 IX86_BUILTIN_RCP28SD,
28306 IX86_BUILTIN_RCP28SS,
28307 IX86_BUILTIN_RSQRT28PD,
28308 IX86_BUILTIN_RSQRT28PS,
28309 IX86_BUILTIN_RSQRT28SD,
28310 IX86_BUILTIN_RSQRT28SS,
28311
28312 /* SHA builtins. */
28313 IX86_BUILTIN_SHA1MSG1,
28314 IX86_BUILTIN_SHA1MSG2,
28315 IX86_BUILTIN_SHA1NEXTE,
28316 IX86_BUILTIN_SHA1RNDS4,
28317 IX86_BUILTIN_SHA256MSG1,
28318 IX86_BUILTIN_SHA256MSG2,
28319 IX86_BUILTIN_SHA256RNDS2,
28320
28321 /* TFmode support builtins. */
28322 IX86_BUILTIN_INFQ,
28323 IX86_BUILTIN_HUGE_VALQ,
28324 IX86_BUILTIN_FABSQ,
28325 IX86_BUILTIN_COPYSIGNQ,
28326
28327 /* Vectorizer support builtins. */
28328 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
28329 IX86_BUILTIN_CPYSGNPS,
28330 IX86_BUILTIN_CPYSGNPD,
28331 IX86_BUILTIN_CPYSGNPS256,
28332 IX86_BUILTIN_CPYSGNPS512,
28333 IX86_BUILTIN_CPYSGNPD256,
28334 IX86_BUILTIN_CPYSGNPD512,
28335 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
28336 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
28337
28338
28339 /* FMA4 instructions. */
28340 IX86_BUILTIN_VFMADDSS,
28341 IX86_BUILTIN_VFMADDSD,
28342 IX86_BUILTIN_VFMADDPS,
28343 IX86_BUILTIN_VFMADDPD,
28344 IX86_BUILTIN_VFMADDPS256,
28345 IX86_BUILTIN_VFMADDPD256,
28346 IX86_BUILTIN_VFMADDSUBPS,
28347 IX86_BUILTIN_VFMADDSUBPD,
28348 IX86_BUILTIN_VFMADDSUBPS256,
28349 IX86_BUILTIN_VFMADDSUBPD256,
28350
28351 /* FMA3 instructions. */
28352 IX86_BUILTIN_VFMADDSS3,
28353 IX86_BUILTIN_VFMADDSD3,
28354
28355 /* XOP instructions. */
28356 IX86_BUILTIN_VPCMOV,
28357 IX86_BUILTIN_VPCMOV_V2DI,
28358 IX86_BUILTIN_VPCMOV_V4SI,
28359 IX86_BUILTIN_VPCMOV_V8HI,
28360 IX86_BUILTIN_VPCMOV_V16QI,
28361 IX86_BUILTIN_VPCMOV_V4SF,
28362 IX86_BUILTIN_VPCMOV_V2DF,
28363 IX86_BUILTIN_VPCMOV256,
28364 IX86_BUILTIN_VPCMOV_V4DI256,
28365 IX86_BUILTIN_VPCMOV_V8SI256,
28366 IX86_BUILTIN_VPCMOV_V16HI256,
28367 IX86_BUILTIN_VPCMOV_V32QI256,
28368 IX86_BUILTIN_VPCMOV_V8SF256,
28369 IX86_BUILTIN_VPCMOV_V4DF256,
28370
28371 IX86_BUILTIN_VPPERM,
28372
28373 IX86_BUILTIN_VPMACSSWW,
28374 IX86_BUILTIN_VPMACSWW,
28375 IX86_BUILTIN_VPMACSSWD,
28376 IX86_BUILTIN_VPMACSWD,
28377 IX86_BUILTIN_VPMACSSDD,
28378 IX86_BUILTIN_VPMACSDD,
28379 IX86_BUILTIN_VPMACSSDQL,
28380 IX86_BUILTIN_VPMACSSDQH,
28381 IX86_BUILTIN_VPMACSDQL,
28382 IX86_BUILTIN_VPMACSDQH,
28383 IX86_BUILTIN_VPMADCSSWD,
28384 IX86_BUILTIN_VPMADCSWD,
28385
28386 IX86_BUILTIN_VPHADDBW,
28387 IX86_BUILTIN_VPHADDBD,
28388 IX86_BUILTIN_VPHADDBQ,
28389 IX86_BUILTIN_VPHADDWD,
28390 IX86_BUILTIN_VPHADDWQ,
28391 IX86_BUILTIN_VPHADDDQ,
28392 IX86_BUILTIN_VPHADDUBW,
28393 IX86_BUILTIN_VPHADDUBD,
28394 IX86_BUILTIN_VPHADDUBQ,
28395 IX86_BUILTIN_VPHADDUWD,
28396 IX86_BUILTIN_VPHADDUWQ,
28397 IX86_BUILTIN_VPHADDUDQ,
28398 IX86_BUILTIN_VPHSUBBW,
28399 IX86_BUILTIN_VPHSUBWD,
28400 IX86_BUILTIN_VPHSUBDQ,
28401
28402 IX86_BUILTIN_VPROTB,
28403 IX86_BUILTIN_VPROTW,
28404 IX86_BUILTIN_VPROTD,
28405 IX86_BUILTIN_VPROTQ,
28406 IX86_BUILTIN_VPROTB_IMM,
28407 IX86_BUILTIN_VPROTW_IMM,
28408 IX86_BUILTIN_VPROTD_IMM,
28409 IX86_BUILTIN_VPROTQ_IMM,
28410
28411 IX86_BUILTIN_VPSHLB,
28412 IX86_BUILTIN_VPSHLW,
28413 IX86_BUILTIN_VPSHLD,
28414 IX86_BUILTIN_VPSHLQ,
28415 IX86_BUILTIN_VPSHAB,
28416 IX86_BUILTIN_VPSHAW,
28417 IX86_BUILTIN_VPSHAD,
28418 IX86_BUILTIN_VPSHAQ,
28419
28420 IX86_BUILTIN_VFRCZSS,
28421 IX86_BUILTIN_VFRCZSD,
28422 IX86_BUILTIN_VFRCZPS,
28423 IX86_BUILTIN_VFRCZPD,
28424 IX86_BUILTIN_VFRCZPS256,
28425 IX86_BUILTIN_VFRCZPD256,
28426
28427 IX86_BUILTIN_VPCOMEQUB,
28428 IX86_BUILTIN_VPCOMNEUB,
28429 IX86_BUILTIN_VPCOMLTUB,
28430 IX86_BUILTIN_VPCOMLEUB,
28431 IX86_BUILTIN_VPCOMGTUB,
28432 IX86_BUILTIN_VPCOMGEUB,
28433 IX86_BUILTIN_VPCOMFALSEUB,
28434 IX86_BUILTIN_VPCOMTRUEUB,
28435
28436 IX86_BUILTIN_VPCOMEQUW,
28437 IX86_BUILTIN_VPCOMNEUW,
28438 IX86_BUILTIN_VPCOMLTUW,
28439 IX86_BUILTIN_VPCOMLEUW,
28440 IX86_BUILTIN_VPCOMGTUW,
28441 IX86_BUILTIN_VPCOMGEUW,
28442 IX86_BUILTIN_VPCOMFALSEUW,
28443 IX86_BUILTIN_VPCOMTRUEUW,
28444
28445 IX86_BUILTIN_VPCOMEQUD,
28446 IX86_BUILTIN_VPCOMNEUD,
28447 IX86_BUILTIN_VPCOMLTUD,
28448 IX86_BUILTIN_VPCOMLEUD,
28449 IX86_BUILTIN_VPCOMGTUD,
28450 IX86_BUILTIN_VPCOMGEUD,
28451 IX86_BUILTIN_VPCOMFALSEUD,
28452 IX86_BUILTIN_VPCOMTRUEUD,
28453
28454 IX86_BUILTIN_VPCOMEQUQ,
28455 IX86_BUILTIN_VPCOMNEUQ,
28456 IX86_BUILTIN_VPCOMLTUQ,
28457 IX86_BUILTIN_VPCOMLEUQ,
28458 IX86_BUILTIN_VPCOMGTUQ,
28459 IX86_BUILTIN_VPCOMGEUQ,
28460 IX86_BUILTIN_VPCOMFALSEUQ,
28461 IX86_BUILTIN_VPCOMTRUEUQ,
28462
28463 IX86_BUILTIN_VPCOMEQB,
28464 IX86_BUILTIN_VPCOMNEB,
28465 IX86_BUILTIN_VPCOMLTB,
28466 IX86_BUILTIN_VPCOMLEB,
28467 IX86_BUILTIN_VPCOMGTB,
28468 IX86_BUILTIN_VPCOMGEB,
28469 IX86_BUILTIN_VPCOMFALSEB,
28470 IX86_BUILTIN_VPCOMTRUEB,
28471
28472 IX86_BUILTIN_VPCOMEQW,
28473 IX86_BUILTIN_VPCOMNEW,
28474 IX86_BUILTIN_VPCOMLTW,
28475 IX86_BUILTIN_VPCOMLEW,
28476 IX86_BUILTIN_VPCOMGTW,
28477 IX86_BUILTIN_VPCOMGEW,
28478 IX86_BUILTIN_VPCOMFALSEW,
28479 IX86_BUILTIN_VPCOMTRUEW,
28480
28481 IX86_BUILTIN_VPCOMEQD,
28482 IX86_BUILTIN_VPCOMNED,
28483 IX86_BUILTIN_VPCOMLTD,
28484 IX86_BUILTIN_VPCOMLED,
28485 IX86_BUILTIN_VPCOMGTD,
28486 IX86_BUILTIN_VPCOMGED,
28487 IX86_BUILTIN_VPCOMFALSED,
28488 IX86_BUILTIN_VPCOMTRUED,
28489
28490 IX86_BUILTIN_VPCOMEQQ,
28491 IX86_BUILTIN_VPCOMNEQ,
28492 IX86_BUILTIN_VPCOMLTQ,
28493 IX86_BUILTIN_VPCOMLEQ,
28494 IX86_BUILTIN_VPCOMGTQ,
28495 IX86_BUILTIN_VPCOMGEQ,
28496 IX86_BUILTIN_VPCOMFALSEQ,
28497 IX86_BUILTIN_VPCOMTRUEQ,
28498
28499 /* LWP instructions. */
28500 IX86_BUILTIN_LLWPCB,
28501 IX86_BUILTIN_SLWPCB,
28502 IX86_BUILTIN_LWPVAL32,
28503 IX86_BUILTIN_LWPVAL64,
28504 IX86_BUILTIN_LWPINS32,
28505 IX86_BUILTIN_LWPINS64,
28506
28507 IX86_BUILTIN_CLZS,
28508
28509 /* RTM */
28510 IX86_BUILTIN_XBEGIN,
28511 IX86_BUILTIN_XEND,
28512 IX86_BUILTIN_XABORT,
28513 IX86_BUILTIN_XTEST,
28514
28515 /* BMI instructions. */
28516 IX86_BUILTIN_BEXTR32,
28517 IX86_BUILTIN_BEXTR64,
28518 IX86_BUILTIN_CTZS,
28519
28520 /* TBM instructions. */
28521 IX86_BUILTIN_BEXTRI32,
28522 IX86_BUILTIN_BEXTRI64,
28523
28524 /* BMI2 instructions. */
28525 IX86_BUILTIN_BZHI32,
28526 IX86_BUILTIN_BZHI64,
28527 IX86_BUILTIN_PDEP32,
28528 IX86_BUILTIN_PDEP64,
28529 IX86_BUILTIN_PEXT32,
28530 IX86_BUILTIN_PEXT64,
28531
28532 /* ADX instructions. */
28533 IX86_BUILTIN_ADDCARRYX32,
28534 IX86_BUILTIN_ADDCARRYX64,
28535
28536 /* FSGSBASE instructions. */
28537 IX86_BUILTIN_RDFSBASE32,
28538 IX86_BUILTIN_RDFSBASE64,
28539 IX86_BUILTIN_RDGSBASE32,
28540 IX86_BUILTIN_RDGSBASE64,
28541 IX86_BUILTIN_WRFSBASE32,
28542 IX86_BUILTIN_WRFSBASE64,
28543 IX86_BUILTIN_WRGSBASE32,
28544 IX86_BUILTIN_WRGSBASE64,
28545
28546 /* RDRND instructions. */
28547 IX86_BUILTIN_RDRAND16_STEP,
28548 IX86_BUILTIN_RDRAND32_STEP,
28549 IX86_BUILTIN_RDRAND64_STEP,
28550
28551 /* RDSEED instructions. */
28552 IX86_BUILTIN_RDSEED16_STEP,
28553 IX86_BUILTIN_RDSEED32_STEP,
28554 IX86_BUILTIN_RDSEED64_STEP,
28555
28556 /* F16C instructions. */
28557 IX86_BUILTIN_CVTPH2PS,
28558 IX86_BUILTIN_CVTPH2PS256,
28559 IX86_BUILTIN_CVTPS2PH,
28560 IX86_BUILTIN_CVTPS2PH256,
28561
28562 /* CFString built-in for darwin */
28563 IX86_BUILTIN_CFSTRING,
28564
28565 /* Builtins to get CPU type and supported features. */
28566 IX86_BUILTIN_CPU_INIT,
28567 IX86_BUILTIN_CPU_IS,
28568 IX86_BUILTIN_CPU_SUPPORTS,
28569
28570 /* Read/write FLAGS register built-ins. */
28571 IX86_BUILTIN_READ_FLAGS,
28572 IX86_BUILTIN_WRITE_FLAGS,
28573
28574 IX86_BUILTIN_MAX
28575 };
28576
28577 /* Table for the ix86 builtin decls. */
28578 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
28579
28580 /* Table of all of the builtin functions that are possible with different ISA's
28581 but are waiting to be built until a function is declared to use that
28582 ISA. */
28583 struct builtin_isa {
28584 const char *name; /* function name */
28585 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
28586 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
28587 bool const_p; /* true if the declaration is constant */
28588 bool set_and_not_built_p;
28589 };
28590
28591 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
28592
28593
28594 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
28595 of which isa_flags to use in the ix86_builtins_isa array. Stores the
28596 function decl in the ix86_builtins array. Returns the function decl or
28597 NULL_TREE, if the builtin was not added.
28598
28599 If the front end has a special hook for builtin functions, delay adding
28600 builtin functions that aren't in the current ISA until the ISA is changed
28601 with function specific optimization. Doing so, can save about 300K for the
28602 default compiler. When the builtin is expanded, check at that time whether
28603 it is valid.
28604
28605 If the front end doesn't have a special hook, record all builtins, even if
28606 it isn't an instruction set in the current ISA in case the user uses
28607 function specific options for a different ISA, so that we don't get scope
28608 errors if a builtin is added in the middle of a function scope. */
28609
28610 static inline tree
28611 def_builtin (HOST_WIDE_INT mask, const char *name,
28612 enum ix86_builtin_func_type tcode,
28613 enum ix86_builtins code)
28614 {
28615 tree decl = NULL_TREE;
28616
28617 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
28618 {
28619 ix86_builtins_isa[(int) code].isa = mask;
28620
28621 mask &= ~OPTION_MASK_ISA_64BIT;
28622 if (mask == 0
28623 || (mask & ix86_isa_flags) != 0
28624 || (lang_hooks.builtin_function
28625 == lang_hooks.builtin_function_ext_scope))
28626
28627 {
28628 tree type = ix86_get_builtin_func_type (tcode);
28629 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
28630 NULL, NULL_TREE);
28631 ix86_builtins[(int) code] = decl;
28632 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
28633 }
28634 else
28635 {
28636 ix86_builtins[(int) code] = NULL_TREE;
28637 ix86_builtins_isa[(int) code].tcode = tcode;
28638 ix86_builtins_isa[(int) code].name = name;
28639 ix86_builtins_isa[(int) code].const_p = false;
28640 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
28641 }
28642 }
28643
28644 return decl;
28645 }
28646
28647 /* Like def_builtin, but also marks the function decl "const". */
28648
28649 static inline tree
28650 def_builtin_const (HOST_WIDE_INT mask, const char *name,
28651 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
28652 {
28653 tree decl = def_builtin (mask, name, tcode, code);
28654 if (decl)
28655 TREE_READONLY (decl) = 1;
28656 else
28657 ix86_builtins_isa[(int) code].const_p = true;
28658
28659 return decl;
28660 }
28661
28662 /* Add any new builtin functions for a given ISA that may not have been
28663 declared. This saves a bit of space compared to adding all of the
28664 declarations to the tree, even if we didn't use them. */
28665
28666 static void
28667 ix86_add_new_builtins (HOST_WIDE_INT isa)
28668 {
28669 int i;
28670
28671 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
28672 {
28673 if ((ix86_builtins_isa[i].isa & isa) != 0
28674 && ix86_builtins_isa[i].set_and_not_built_p)
28675 {
28676 tree decl, type;
28677
28678 /* Don't define the builtin again. */
28679 ix86_builtins_isa[i].set_and_not_built_p = false;
28680
28681 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
28682 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
28683 type, i, BUILT_IN_MD, NULL,
28684 NULL_TREE);
28685
28686 ix86_builtins[i] = decl;
28687 if (ix86_builtins_isa[i].const_p)
28688 TREE_READONLY (decl) = 1;
28689 }
28690 }
28691 }
28692
28693 /* Bits for builtin_description.flag. */
28694
28695 /* Set when we don't support the comparison natively, and should
28696 swap_comparison in order to support it. */
28697 #define BUILTIN_DESC_SWAP_OPERANDS 1
28698
28699 struct builtin_description
28700 {
28701 const HOST_WIDE_INT mask;
28702 const enum insn_code icode;
28703 const char *const name;
28704 const enum ix86_builtins code;
28705 const enum rtx_code comparison;
28706 const int flag;
28707 };
28708
28709 static const struct builtin_description bdesc_comi[] =
28710 {
28711 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
28712 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
28713 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
28714 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
28715 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
28716 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
28717 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
28718 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
28719 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
28720 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
28721 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
28722 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
28723 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
28724 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
28725 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
28726 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
28727 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
28728 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
28729 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
28730 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
28731 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
28732 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
28733 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
28734 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
28735 };
28736
28737 static const struct builtin_description bdesc_pcmpestr[] =
28738 {
28739 /* SSE4.2 */
28740 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
28741 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
28742 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
28743 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
28744 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
28745 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
28746 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
28747 };
28748
28749 static const struct builtin_description bdesc_pcmpistr[] =
28750 {
28751 /* SSE4.2 */
28752 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
28753 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
28754 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
28755 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
28756 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
28757 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
28758 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
28759 };
28760
28761 /* Special builtins with variable number of arguments. */
28762 static const struct builtin_description bdesc_special_args[] =
28763 {
28764 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
28765 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
28766 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
28767
28768 /* 80387 (for use internally for atomic compound assignment). */
28769 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
28770 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
28771 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) VOID_FTYPE_PUSHORT },
28772 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
28773
28774 /* MMX */
28775 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
28776
28777 /* 3DNow! */
28778 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
28779
28780 /* FXSR, XSAVE and XSAVEOPT */
28781 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
28782 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
28783 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28784 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28785 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28786
28787 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
28788 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
28789 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28790 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28791 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28792
28793 /* SSE */
28794 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
28795 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
28796 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
28797
28798 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
28799 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
28800 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
28801 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
28802
28803 /* SSE or 3DNow!A */
28804 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
28805 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
28806
28807 /* SSE2 */
28808 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
28809 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
28810 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
28811 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
28812 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
28813 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
28814 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
28815 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
28816 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
28817 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
28818
28819 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
28820 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
28821
28822 /* SSE3 */
28823 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
28824
28825 /* SSE4.1 */
28826 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
28827
28828 /* SSE4A */
28829 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
28830 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
28831
28832 /* AVX */
28833 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
28834 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
28835
28836 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
28837 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
28838 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
28839 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
28840 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
28841
28842 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
28843 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
28844 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
28845 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
28846 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
28847 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
28848 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
28849
28850 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
28851 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
28852 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
28853
28854 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
28855 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
28856 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
28857 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
28858 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
28859 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
28860 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
28861 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
28862
28863 /* AVX2 */
28864 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
28865 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
28866 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
28867 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
28868 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
28869 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
28870 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
28871 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
28872 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
28873
28874 /* AVX512F */
28875 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
28876 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
28877 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
28878 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
28879 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
28880 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
28881 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
28882 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
28883 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
28884 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
28885 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
28886 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
28887 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
28888 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
28889 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
28890 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
28891 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
28892 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
28893 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
28894 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
28895 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
28896 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
28897 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
28898 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
28899 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
28900 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
28901 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
28902 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
28903 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
28904 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
28905 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
28906 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
28907
28908 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
28909 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
28910 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
28911 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
28912 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
28913 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
28914
28915 /* FSGSBASE */
28916 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
28917 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
28918 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
28919 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
28920 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
28921 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
28922 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
28923 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
28924
28925 /* RTM */
28926 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
28927 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
28928 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
28929 };
28930
28931 /* Builtins with variable number of arguments. */
28932 static const struct builtin_description bdesc_args[] =
28933 {
28934 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
28935 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
28936 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
28937 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
28938 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
28939 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
28940 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
28941
28942 /* MMX */
28943 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
28944 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
28945 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
28946 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
28947 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
28948 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
28949
28950 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
28951 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
28952 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
28953 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
28954 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
28955 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
28956 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
28957 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
28958
28959 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
28960 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
28961
28962 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
28963 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
28964 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
28965 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
28966
28967 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
28968 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
28969 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
28970 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
28971 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
28972 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
28973
28974 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
28975 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
28976 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
28977 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
28978 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
28979 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
28980
28981 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
28982 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
28983 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
28984
28985 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
28986
28987 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
28988 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
28989 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
28990 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
28991 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
28992 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
28993
28994 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
28995 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
28996 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
28997 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
28998 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
28999 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29000
29001 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29002 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29003 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29004 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29005
29006 /* 3DNow! */
29007 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29008 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29009 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29010 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29011
29012 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29013 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29014 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29015 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29016 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29017 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29018 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29019 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29020 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29021 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29022 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29023 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29024 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29025 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29026 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29027
29028 /* 3DNow!A */
29029 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29030 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29031 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29032 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29033 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29034 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29035
29036 /* SSE */
29037 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
29038 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29039 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29040 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29041 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29042 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29043 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29044 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29045 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29046 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29047 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29048 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29049
29050 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29051
29052 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29053 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29054 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29055 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29056 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29057 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29058 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29059 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29060
29061 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29062 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29063 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29064 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29065 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29066 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29067 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29068 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29069 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29070 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29071 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
29072 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29073 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29074 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29075 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29076 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29077 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29078 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29079 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29080 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29081
29082 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29083 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29084 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29085 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29086
29087 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29088 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29089 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29090 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29091
29092 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29093
29094 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29095 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29096 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29097 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29098 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29099
29100 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
29101 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
29102 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
29103
29104 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
29105
29106 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29107 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29108 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29109
29110 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
29111 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
29112
29113 /* SSE MMX or 3Dnow!A */
29114 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29115 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29116 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29117
29118 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29119 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29120 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29121 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29122
29123 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
29124 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
29125
29126 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
29127
29128 /* SSE2 */
29129 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29130
29131 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
29132 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
29133 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29134 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
29135 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
29136
29137 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29138 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29139 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
29140 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29141 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29142
29143 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
29144
29145 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29146 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29147 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29148 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29149
29150 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29151 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
29152 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29153
29154 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29155 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29156 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29157 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29158 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29159 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29160 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29161 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29162
29163 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29164 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29165 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29166 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29167 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
29168 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29169 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29170 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29171 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29172 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29173 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29174 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29175 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29176 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29177 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29178 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29179 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29180 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29181 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29182 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29183
29184 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29185 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29186 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29187 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29188
29189 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29190 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29191 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29192 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29193
29194 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29195
29196 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29197 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29198 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29199
29200 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29201
29202 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29203 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29204 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29205 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29206 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29207 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29208 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29209 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29210
29211 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29212 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29213 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29214 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29215 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29216 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29217 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29218 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29219
29220 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29221 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
29222
29223 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29224 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29225 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29226 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29227
29228 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29229 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29230
29231 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29232 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29233 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29234 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29235 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29236 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29237
29238 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29239 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29240 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29241 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29242
29243 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29244 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29245 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29246 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29247 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29248 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29249 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29250 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29251
29252 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29253 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29254 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29255
29256 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29257 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
29258
29259 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
29260 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29261
29262 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
29263
29264 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
29265 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
29266 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
29267 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
29268
29269 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29270 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29271 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29272 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29273 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29274 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29275 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29276
29277 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29278 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29279 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29280 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29281 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29282 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29283 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29284
29285 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29286 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29287 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29288 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29289
29290 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
29291 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29292 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29293
29294 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
29295
29296 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29297
29298 /* SSE2 MMX */
29299 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29300 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29301
29302 /* SSE3 */
29303 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
29304 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29305
29306 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29307 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29308 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29309 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29310 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29311 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29312
29313 /* SSSE3 */
29314 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29315 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
29316 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29317 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
29318 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29319 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29320
29321 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29322 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29323 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29324 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29325 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29326 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29327 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29328 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29329 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29330 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29331 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29332 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29333 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
29334 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
29335 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29336 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29337 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29338 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29339 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29340 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29341 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29342 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29343 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29344 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29345
29346 /* SSSE3. */
29347 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
29348 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
29349
29350 /* SSE4.1 */
29351 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29352 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29353 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
29354 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
29355 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29356 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29357 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29358 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
29359 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
29360 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
29361
29362 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29363 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29364 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29365 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29366 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29367 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29368 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29369 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29370 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29371 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29372 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29373 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29374 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29375
29376 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29377 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29378 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29379 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29380 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29381 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29382 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29383 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29384 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29385 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29386 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29387 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29388
29389 /* SSE4.1 */
29390 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29391 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29392 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29393 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29394
29395 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
29396 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
29397 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
29398 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
29399
29400 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29401 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29402
29403 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29404 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29405
29406 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
29407 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
29408 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
29409 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
29410
29411 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
29412 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
29413
29414 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29415 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29416
29417 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29418 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29419 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29420
29421 /* SSE4.2 */
29422 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29423 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
29424 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
29425 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29426 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29427
29428 /* SSE4A */
29429 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
29430 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
29431 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
29432 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29433
29434 /* AES */
29435 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
29436 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29437
29438 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29439 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29440 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29441 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29442
29443 /* PCLMUL */
29444 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
29445
29446 /* AVX */
29447 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29448 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29449 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29450 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29451 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29452 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29453 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29454 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29455 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29456 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29457 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29458 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29459 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29460 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29461 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29462 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29463 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29464 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29465 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29466 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29467 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29468 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29469 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29470 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29471 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29472 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29473
29474 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
29475 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
29476 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
29477 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29478
29479 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29480 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29481 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
29482 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
29483 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29484 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29485 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29486 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29487 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29488 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29489 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29490 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29491 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29492 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
29493 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
29494 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
29495 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
29496 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
29497 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
29498 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29499 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
29500 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29501 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29502 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29503 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29504 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29505 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29506 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29507 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29508 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29509 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29510 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
29511 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
29512 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
29513
29514 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29515 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29516 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29517
29518 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29519 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29520 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29521 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29522 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29523
29524 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29525
29526 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29527 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29528
29529 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
29530 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
29531 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
29532 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
29533
29534 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29535 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29536
29537 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29538 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29539
29540 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
29541 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
29542 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
29543 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
29544
29545 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
29546 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
29547
29548 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29549 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29550
29551 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29552 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29553 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29554 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29555
29556 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29557 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29558 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29559 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
29560 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
29561 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
29562
29563 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29564 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29565 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29566 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29567 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29568 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29569 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29570 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29571 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29572 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29573 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29574 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29575 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29576 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29577 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29578
29579 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
29580 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
29581
29582 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29583 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29584
29585 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29586
29587 /* AVX2 */
29588 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
29589 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
29590 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
29591 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
29592 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29593 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29594 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29595 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29596 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29597 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29598 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29599 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29600 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29601 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29602 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29603 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29604 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
29605 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29606 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29607 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29608 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29609 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
29610 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
29611 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29612 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29613 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29614 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29615 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29616 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29617 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29618 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29619 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29620 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29621 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29622 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29623 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29624 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29625 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29626 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
29627 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29628 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29629 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29630 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29631 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29632 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29633 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29634 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29635 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29636 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29637 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29638 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29639 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
29640 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29641 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29642 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29643 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29644 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29645 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29646 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29647 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29648 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29649 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29650 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29651 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29652 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29653 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29654 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29655 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29656 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29657 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29658 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29659 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29660 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29661 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29662 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
29663 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29664 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29665 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29666 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29667 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29668 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29669 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29670 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29671 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29672 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29673 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29674 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29675 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29676 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29677 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29678 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29679 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29680 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29681 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29682 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29683 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29684 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29685 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29686 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29687 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29688 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29689 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29690 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29691 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29692 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29693 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29694 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29695 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29696 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29697 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29698 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29699 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29700 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29701 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29702 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29703 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29704 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29705 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29706 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
29707 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
29708 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29709 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
29710 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
29711 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29712 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
29713 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29714 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29715 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29716 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29717 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29718 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29719 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29720 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
29721 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
29722 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
29723 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
29724 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29725 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29726 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29727 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29728 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29729 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29730 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29731 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29732 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29733 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29734
29735 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
29736
29737 /* BMI */
29738 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29739 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29740 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
29741
29742 /* TBM */
29743 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29744 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29745
29746 /* F16C */
29747 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
29748 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
29749 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
29750 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
29751
29752 /* BMI2 */
29753 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29754 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29755 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29756 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29757 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29758 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29759
29760 /* AVX512F */
29761 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
29762 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
29763 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
29764 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29765 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29766 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
29767 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
29768 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
29769 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
29770 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
29771 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
29772 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
29773 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
29774 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
29775 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29776 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29777 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
29778 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
29779 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
29780 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
29781 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29782 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29783 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29784 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29785 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
29786 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
29787 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
29788 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
29789 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
29790 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
29791 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
29792 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
29793 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29794 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29795 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29796 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
29797 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
29798 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29799 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29800 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
29801 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
29802 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29803 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
29804 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29805 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29806 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
29807 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
29808 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
29809 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
29810 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
29811 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
29812 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
29813 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
29814 { OPTION_MASK_ISA_AVX512F & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_memv8di_mask, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
29815 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
29816 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
29817 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
29818 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
29819 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
29820 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
29821 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
29822 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
29823 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
29824 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
29825 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29826 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
29827 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29828 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
29829 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29830 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
29831 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29832 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
29833 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
29834 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
29835 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
29836 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
29837 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
29838 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
29839 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
29840 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
29841 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
29842 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
29843 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
29844 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
29845 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
29846 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
29847 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
29848 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
29849 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
29850 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
29851 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
29852 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
29853 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
29854 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
29855 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
29856 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
29857 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
29858 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
29859 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29860 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
29861 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29862 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
29863 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
29864 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
29865 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29866 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
29867 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
29868 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
29869 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29870 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
29871 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
29872 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
29873 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
29874 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
29875 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
29876 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29877 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
29878 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
29879 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
29880 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
29881 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
29882 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29883 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
29884 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
29885 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
29886 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
29887 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
29888 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29889 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
29890 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29891 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
29892 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
29893 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
29894 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
29895 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
29896 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29897 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
29898 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29899 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
29900 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29901 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
29902 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29903 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29904 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29905 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29906 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29907 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29908 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29909 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29910 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
29911 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
29912 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
29913 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
29914 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
29915 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
29916 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
29917 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
29918 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
29919 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
29920 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
29921 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
29922 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
29923 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
29924 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
29925 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
29926 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
29927 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
29928 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29929 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
29930 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
29931 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
29932 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
29933 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
29934 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
29935 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
29936 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29937 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29938 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
29939 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
29940 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
29941 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
29942 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
29943 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
29944 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
29945 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
29946 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
29947 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29948 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
29949 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
29950 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
29951 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
29952
29953 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
29954 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
29955 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
29956 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
29957 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
29958 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
29959 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
29960 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
29961
29962 /* Mask arithmetic operations */
29963 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
29964 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
29965 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
29966 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
29967 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
29968 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
29969 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
29970 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
29971 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
29972
29973 /* SHA */
29974 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29975 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29976 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29977 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
29978 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29979 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29980 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
29981 };
29982
29983 /* Builtins with rounding support. */
29984 static const struct builtin_description bdesc_round_args[] =
29985 {
29986 /* AVX512F */
29987 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
29988 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
29989 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29990 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29991 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
29992 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
29993 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
29994 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
29995 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
29996 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
29997 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
29998 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
29999 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
30000 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv8dfv8si_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30001 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
30002 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30003 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
30004 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30005 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
30006 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
30007 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
30008 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
30009 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
30010 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30011 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30012 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30013 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30014 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30015 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
30016 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
30017 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
30018 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30019 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30020 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30021 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30022 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30023 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30024 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30025 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30026 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30027 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30028 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30029 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30030 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30031 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30032 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30033 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30034 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30035 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30036 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30037 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30038 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30039 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30040 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30042 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30044 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30046 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30047 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30048 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30049 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30050 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30051 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30052 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30053 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30054 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30055 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30056 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30057 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30058 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30060 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30061 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30062 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30063 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30064 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30065 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30066 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30067 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30068 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30069 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30070 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30071 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30072 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30073 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30074 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30075 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30076 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30077 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30078 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30079 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30080 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30081 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30082 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30083 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30084 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30085 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30086 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30087 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30088 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
30089 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
30090 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30091 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30092 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30093 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30094 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30095 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30096 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30097 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30098 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30100 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30106
30107 /* AVX512ER */
30108 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30109 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30110 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30111 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30112 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30113 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30114 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30115 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30116 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30117 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30118 };
30119
30120 /* FMA4 and XOP. */
30121 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
30122 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
30123 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
30124 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
30125 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
30126 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
30127 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
30128 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
30129 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
30130 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
30131 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
30132 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
30133 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
30134 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
30135 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
30136 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
30137 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
30138 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
30139 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
30140 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
30141 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
30142 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
30143 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
30144 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
30145 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
30146 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
30147 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
30148 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
30149 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
30150 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
30151 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
30152 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
30153 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
30154 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
30155 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
30156 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
30157 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
30158 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
30159 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
30160 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
30161 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
30162 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
30163 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
30164 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
30165 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
30166 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
30167 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
30168 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
30169 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
30170 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
30171 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
30172 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
30173
30174 static const struct builtin_description bdesc_multi_arg[] =
30175 {
30176 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
30177 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
30178 UNKNOWN, (int)MULTI_ARG_3_SF },
30179 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
30180 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
30181 UNKNOWN, (int)MULTI_ARG_3_DF },
30182
30183 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
30184 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
30185 UNKNOWN, (int)MULTI_ARG_3_SF },
30186 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
30187 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
30188 UNKNOWN, (int)MULTI_ARG_3_DF },
30189
30190 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
30191 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
30192 UNKNOWN, (int)MULTI_ARG_3_SF },
30193 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
30194 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
30195 UNKNOWN, (int)MULTI_ARG_3_DF },
30196 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
30197 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
30198 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30199 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
30200 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
30201 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30202
30203 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
30204 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
30205 UNKNOWN, (int)MULTI_ARG_3_SF },
30206 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
30207 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
30208 UNKNOWN, (int)MULTI_ARG_3_DF },
30209 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
30210 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
30211 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30212 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
30213 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
30214 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30215
30216 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
30217 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
30218 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
30219 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
30220 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
30221 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
30222 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
30223
30224 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30225 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30226 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
30227 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
30228 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
30229 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
30230 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
30231
30232 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
30233
30234 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30235 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30236 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30237 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30238 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30239 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30240 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30241 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30242 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30243 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30244 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30245 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30246
30247 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30248 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
30249 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
30250 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
30251 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
30252 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
30253 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
30254 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
30255 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30256 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
30257 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
30258 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
30259 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30260 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
30261 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
30262 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
30263
30264 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
30265 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
30266 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
30267 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
30268 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
30269 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
30270
30271 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30272 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30273 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30274 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30275 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30276 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30277 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30278 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30279 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30280 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30281 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30282 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30283 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30284 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30285 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30286
30287 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
30288 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30289 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30290 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
30291 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
30292 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
30293 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
30294
30295 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
30296 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30297 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30298 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
30299 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
30300 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
30301 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
30302
30303 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
30304 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30305 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30306 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
30307 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
30308 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
30309 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
30310
30311 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30312 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30313 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30314 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
30315 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
30316 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
30317 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
30318
30319 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
30320 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30321 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30322 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
30323 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
30324 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
30325 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
30326
30327 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
30328 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30329 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30330 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
30331 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
30332 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
30333 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
30334
30335 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
30336 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30337 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30338 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
30339 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
30340 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
30341 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
30342
30343 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30344 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30345 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30346 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
30347 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
30348 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
30349 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
30350
30351 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30352 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30353 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30354 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30355 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30356 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30357 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30358 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30359
30360 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30361 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30362 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30363 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30364 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30365 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30366 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30367 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30368
30369 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
30370 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
30371 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
30372 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
30373
30374 };
30375 \f
30376 /* TM vector builtins. */
30377
30378 /* Reuse the existing x86-specific `struct builtin_description' cause
30379 we're lazy. Add casts to make them fit. */
30380 static const struct builtin_description bdesc_tm[] =
30381 {
30382 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30383 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30384 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30385 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30386 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30387 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30388 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30389
30390 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30391 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30392 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30393 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30394 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30395 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30396 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30397
30398 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30399 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30400 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30401 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30402 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30403 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30404 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30405
30406 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
30407 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
30408 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
30409 };
30410
30411 /* TM callbacks. */
30412
30413 /* Return the builtin decl needed to load a vector of TYPE. */
30414
30415 static tree
30416 ix86_builtin_tm_load (tree type)
30417 {
30418 if (TREE_CODE (type) == VECTOR_TYPE)
30419 {
30420 switch (tree_to_uhwi (TYPE_SIZE (type)))
30421 {
30422 case 64:
30423 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
30424 case 128:
30425 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
30426 case 256:
30427 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
30428 }
30429 }
30430 return NULL_TREE;
30431 }
30432
30433 /* Return the builtin decl needed to store a vector of TYPE. */
30434
30435 static tree
30436 ix86_builtin_tm_store (tree type)
30437 {
30438 if (TREE_CODE (type) == VECTOR_TYPE)
30439 {
30440 switch (tree_to_uhwi (TYPE_SIZE (type)))
30441 {
30442 case 64:
30443 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
30444 case 128:
30445 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
30446 case 256:
30447 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
30448 }
30449 }
30450 return NULL_TREE;
30451 }
30452 \f
30453 /* Initialize the transactional memory vector load/store builtins. */
30454
30455 static void
30456 ix86_init_tm_builtins (void)
30457 {
30458 enum ix86_builtin_func_type ftype;
30459 const struct builtin_description *d;
30460 size_t i;
30461 tree decl;
30462 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
30463 tree attrs_log, attrs_type_log;
30464
30465 if (!flag_tm)
30466 return;
30467
30468 /* If there are no builtins defined, we must be compiling in a
30469 language without trans-mem support. */
30470 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
30471 return;
30472
30473 /* Use whatever attributes a normal TM load has. */
30474 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
30475 attrs_load = DECL_ATTRIBUTES (decl);
30476 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30477 /* Use whatever attributes a normal TM store has. */
30478 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
30479 attrs_store = DECL_ATTRIBUTES (decl);
30480 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30481 /* Use whatever attributes a normal TM log has. */
30482 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
30483 attrs_log = DECL_ATTRIBUTES (decl);
30484 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30485
30486 for (i = 0, d = bdesc_tm;
30487 i < ARRAY_SIZE (bdesc_tm);
30488 i++, d++)
30489 {
30490 if ((d->mask & ix86_isa_flags) != 0
30491 || (lang_hooks.builtin_function
30492 == lang_hooks.builtin_function_ext_scope))
30493 {
30494 tree type, attrs, attrs_type;
30495 enum built_in_function code = (enum built_in_function) d->code;
30496
30497 ftype = (enum ix86_builtin_func_type) d->flag;
30498 type = ix86_get_builtin_func_type (ftype);
30499
30500 if (BUILTIN_TM_LOAD_P (code))
30501 {
30502 attrs = attrs_load;
30503 attrs_type = attrs_type_load;
30504 }
30505 else if (BUILTIN_TM_STORE_P (code))
30506 {
30507 attrs = attrs_store;
30508 attrs_type = attrs_type_store;
30509 }
30510 else
30511 {
30512 attrs = attrs_log;
30513 attrs_type = attrs_type_log;
30514 }
30515 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
30516 /* The builtin without the prefix for
30517 calling it directly. */
30518 d->name + strlen ("__builtin_"),
30519 attrs);
30520 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
30521 set the TYPE_ATTRIBUTES. */
30522 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
30523
30524 set_builtin_decl (code, decl, false);
30525 }
30526 }
30527 }
30528
30529 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
30530 in the current target ISA to allow the user to compile particular modules
30531 with different target specific options that differ from the command line
30532 options. */
30533 static void
30534 ix86_init_mmx_sse_builtins (void)
30535 {
30536 const struct builtin_description * d;
30537 enum ix86_builtin_func_type ftype;
30538 size_t i;
30539
30540 /* Add all special builtins with variable number of operands. */
30541 for (i = 0, d = bdesc_special_args;
30542 i < ARRAY_SIZE (bdesc_special_args);
30543 i++, d++)
30544 {
30545 if (d->name == 0)
30546 continue;
30547
30548 ftype = (enum ix86_builtin_func_type) d->flag;
30549 def_builtin (d->mask, d->name, ftype, d->code);
30550 }
30551
30552 /* Add all builtins with variable number of operands. */
30553 for (i = 0, d = bdesc_args;
30554 i < ARRAY_SIZE (bdesc_args);
30555 i++, d++)
30556 {
30557 if (d->name == 0)
30558 continue;
30559
30560 ftype = (enum ix86_builtin_func_type) d->flag;
30561 def_builtin_const (d->mask, d->name, ftype, d->code);
30562 }
30563
30564 /* Add all builtins with rounding. */
30565 for (i = 0, d = bdesc_round_args;
30566 i < ARRAY_SIZE (bdesc_round_args);
30567 i++, d++)
30568 {
30569 if (d->name == 0)
30570 continue;
30571
30572 ftype = (enum ix86_builtin_func_type) d->flag;
30573 def_builtin_const (d->mask, d->name, ftype, d->code);
30574 }
30575
30576 /* pcmpestr[im] insns. */
30577 for (i = 0, d = bdesc_pcmpestr;
30578 i < ARRAY_SIZE (bdesc_pcmpestr);
30579 i++, d++)
30580 {
30581 if (d->code == IX86_BUILTIN_PCMPESTRM128)
30582 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
30583 else
30584 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
30585 def_builtin_const (d->mask, d->name, ftype, d->code);
30586 }
30587
30588 /* pcmpistr[im] insns. */
30589 for (i = 0, d = bdesc_pcmpistr;
30590 i < ARRAY_SIZE (bdesc_pcmpistr);
30591 i++, d++)
30592 {
30593 if (d->code == IX86_BUILTIN_PCMPISTRM128)
30594 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
30595 else
30596 ftype = INT_FTYPE_V16QI_V16QI_INT;
30597 def_builtin_const (d->mask, d->name, ftype, d->code);
30598 }
30599
30600 /* comi/ucomi insns. */
30601 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
30602 {
30603 if (d->mask == OPTION_MASK_ISA_SSE2)
30604 ftype = INT_FTYPE_V2DF_V2DF;
30605 else
30606 ftype = INT_FTYPE_V4SF_V4SF;
30607 def_builtin_const (d->mask, d->name, ftype, d->code);
30608 }
30609
30610 /* SSE */
30611 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
30612 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
30613 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
30614 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
30615
30616 /* SSE or 3DNow!A */
30617 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
30618 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
30619 IX86_BUILTIN_MASKMOVQ);
30620
30621 /* SSE2 */
30622 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
30623 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
30624
30625 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
30626 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
30627 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
30628 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
30629
30630 /* SSE3. */
30631 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
30632 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
30633 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
30634 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
30635
30636 /* AES */
30637 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
30638 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
30639 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
30640 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
30641 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
30642 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
30643 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
30644 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
30645 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
30646 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
30647 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
30648 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
30649
30650 /* PCLMUL */
30651 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
30652 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
30653
30654 /* RDRND */
30655 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
30656 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
30657 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
30658 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
30659 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
30660 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
30661 IX86_BUILTIN_RDRAND64_STEP);
30662
30663 /* AVX2 */
30664 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
30665 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
30666 IX86_BUILTIN_GATHERSIV2DF);
30667
30668 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
30669 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
30670 IX86_BUILTIN_GATHERSIV4DF);
30671
30672 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
30673 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
30674 IX86_BUILTIN_GATHERDIV2DF);
30675
30676 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
30677 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
30678 IX86_BUILTIN_GATHERDIV4DF);
30679
30680 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
30681 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
30682 IX86_BUILTIN_GATHERSIV4SF);
30683
30684 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
30685 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
30686 IX86_BUILTIN_GATHERSIV8SF);
30687
30688 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
30689 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
30690 IX86_BUILTIN_GATHERDIV4SF);
30691
30692 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
30693 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
30694 IX86_BUILTIN_GATHERDIV8SF);
30695
30696 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
30697 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
30698 IX86_BUILTIN_GATHERSIV2DI);
30699
30700 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
30701 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
30702 IX86_BUILTIN_GATHERSIV4DI);
30703
30704 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
30705 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
30706 IX86_BUILTIN_GATHERDIV2DI);
30707
30708 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
30709 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
30710 IX86_BUILTIN_GATHERDIV4DI);
30711
30712 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
30713 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
30714 IX86_BUILTIN_GATHERSIV4SI);
30715
30716 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
30717 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
30718 IX86_BUILTIN_GATHERSIV8SI);
30719
30720 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
30721 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
30722 IX86_BUILTIN_GATHERDIV4SI);
30723
30724 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
30725 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
30726 IX86_BUILTIN_GATHERDIV8SI);
30727
30728 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
30729 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
30730 IX86_BUILTIN_GATHERALTSIV4DF);
30731
30732 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
30733 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
30734 IX86_BUILTIN_GATHERALTDIV8SF);
30735
30736 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
30737 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
30738 IX86_BUILTIN_GATHERALTSIV4DI);
30739
30740 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
30741 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
30742 IX86_BUILTIN_GATHERALTDIV8SI);
30743
30744 /* AVX512F */
30745 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
30746 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
30747 IX86_BUILTIN_GATHER3SIV16SF);
30748
30749 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
30750 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
30751 IX86_BUILTIN_GATHER3SIV8DF);
30752
30753 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
30754 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
30755 IX86_BUILTIN_GATHER3DIV16SF);
30756
30757 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
30758 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
30759 IX86_BUILTIN_GATHER3DIV8DF);
30760
30761 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
30762 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
30763 IX86_BUILTIN_GATHER3SIV16SI);
30764
30765 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
30766 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
30767 IX86_BUILTIN_GATHER3SIV8DI);
30768
30769 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
30770 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
30771 IX86_BUILTIN_GATHER3DIV16SI);
30772
30773 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
30774 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
30775 IX86_BUILTIN_GATHER3DIV8DI);
30776
30777 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
30778 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
30779 IX86_BUILTIN_GATHER3ALTSIV8DF);
30780
30781 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
30782 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
30783 IX86_BUILTIN_GATHER3ALTDIV16SF);
30784
30785 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
30786 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
30787 IX86_BUILTIN_GATHER3ALTSIV8DI);
30788
30789 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
30790 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
30791 IX86_BUILTIN_GATHER3ALTDIV16SI);
30792
30793 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
30794 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
30795 IX86_BUILTIN_SCATTERSIV16SF);
30796
30797 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
30798 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
30799 IX86_BUILTIN_SCATTERSIV8DF);
30800
30801 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
30802 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
30803 IX86_BUILTIN_SCATTERDIV16SF);
30804
30805 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
30806 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
30807 IX86_BUILTIN_SCATTERDIV8DF);
30808
30809 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
30810 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
30811 IX86_BUILTIN_SCATTERSIV16SI);
30812
30813 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
30814 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
30815 IX86_BUILTIN_SCATTERSIV8DI);
30816
30817 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
30818 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
30819 IX86_BUILTIN_SCATTERDIV16SI);
30820
30821 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
30822 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
30823 IX86_BUILTIN_SCATTERDIV8DI);
30824
30825 /* AVX512PF */
30826 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
30827 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
30828 IX86_BUILTIN_GATHERPFDPS);
30829 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
30830 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
30831 IX86_BUILTIN_GATHERPFQPS);
30832 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
30833 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
30834 IX86_BUILTIN_SCATTERPFDPS);
30835 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
30836 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
30837 IX86_BUILTIN_SCATTERPFQPS);
30838
30839 /* SHA */
30840 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
30841 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
30842 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
30843 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
30844 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
30845 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
30846 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
30847 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
30848 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
30849 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
30850 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
30851 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
30852 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
30853 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
30854
30855 /* RTM. */
30856 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
30857 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
30858
30859 /* MMX access to the vec_init patterns. */
30860 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
30861 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
30862
30863 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
30864 V4HI_FTYPE_HI_HI_HI_HI,
30865 IX86_BUILTIN_VEC_INIT_V4HI);
30866
30867 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
30868 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
30869 IX86_BUILTIN_VEC_INIT_V8QI);
30870
30871 /* Access to the vec_extract patterns. */
30872 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
30873 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
30874 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
30875 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
30876 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
30877 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
30878 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
30879 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
30880 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
30881 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
30882
30883 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
30884 "__builtin_ia32_vec_ext_v4hi",
30885 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
30886
30887 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
30888 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
30889
30890 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
30891 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
30892
30893 /* Access to the vec_set patterns. */
30894 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
30895 "__builtin_ia32_vec_set_v2di",
30896 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
30897
30898 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
30899 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
30900
30901 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
30902 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
30903
30904 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
30905 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
30906
30907 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
30908 "__builtin_ia32_vec_set_v4hi",
30909 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
30910
30911 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
30912 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
30913
30914 /* RDSEED */
30915 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
30916 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
30917 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
30918 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
30919 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
30920 "__builtin_ia32_rdseed_di_step",
30921 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
30922
30923 /* ADCX */
30924 def_builtin (0, "__builtin_ia32_addcarryx_u32",
30925 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
30926 def_builtin (OPTION_MASK_ISA_64BIT,
30927 "__builtin_ia32_addcarryx_u64",
30928 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
30929 IX86_BUILTIN_ADDCARRYX64);
30930
30931 /* Read/write FLAGS. */
30932 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
30933 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
30934 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
30935 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
30936 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
30937 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
30938 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
30939 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
30940
30941
30942 /* Add FMA4 multi-arg argument instructions */
30943 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
30944 {
30945 if (d->name == 0)
30946 continue;
30947
30948 ftype = (enum ix86_builtin_func_type) d->flag;
30949 def_builtin_const (d->mask, d->name, ftype, d->code);
30950 }
30951 }
30952
30953 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
30954 to return a pointer to VERSION_DECL if the outcome of the expression
30955 formed by PREDICATE_CHAIN is true. This function will be called during
30956 version dispatch to decide which function version to execute. It returns
30957 the basic block at the end, to which more conditions can be added. */
30958
30959 static basic_block
30960 add_condition_to_bb (tree function_decl, tree version_decl,
30961 tree predicate_chain, basic_block new_bb)
30962 {
30963 gimple return_stmt;
30964 tree convert_expr, result_var;
30965 gimple convert_stmt;
30966 gimple call_cond_stmt;
30967 gimple if_else_stmt;
30968
30969 basic_block bb1, bb2, bb3;
30970 edge e12, e23;
30971
30972 tree cond_var, and_expr_var = NULL_TREE;
30973 gimple_seq gseq;
30974
30975 tree predicate_decl, predicate_arg;
30976
30977 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
30978
30979 gcc_assert (new_bb != NULL);
30980 gseq = bb_seq (new_bb);
30981
30982
30983 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
30984 build_fold_addr_expr (version_decl));
30985 result_var = create_tmp_var (ptr_type_node, NULL);
30986 convert_stmt = gimple_build_assign (result_var, convert_expr);
30987 return_stmt = gimple_build_return (result_var);
30988
30989 if (predicate_chain == NULL_TREE)
30990 {
30991 gimple_seq_add_stmt (&gseq, convert_stmt);
30992 gimple_seq_add_stmt (&gseq, return_stmt);
30993 set_bb_seq (new_bb, gseq);
30994 gimple_set_bb (convert_stmt, new_bb);
30995 gimple_set_bb (return_stmt, new_bb);
30996 pop_cfun ();
30997 return new_bb;
30998 }
30999
31000 while (predicate_chain != NULL)
31001 {
31002 cond_var = create_tmp_var (integer_type_node, NULL);
31003 predicate_decl = TREE_PURPOSE (predicate_chain);
31004 predicate_arg = TREE_VALUE (predicate_chain);
31005 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
31006 gimple_call_set_lhs (call_cond_stmt, cond_var);
31007
31008 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
31009 gimple_set_bb (call_cond_stmt, new_bb);
31010 gimple_seq_add_stmt (&gseq, call_cond_stmt);
31011
31012 predicate_chain = TREE_CHAIN (predicate_chain);
31013
31014 if (and_expr_var == NULL)
31015 and_expr_var = cond_var;
31016 else
31017 {
31018 gimple assign_stmt;
31019 /* Use MIN_EXPR to check if any integer is zero?.
31020 and_expr_var = min_expr <cond_var, and_expr_var> */
31021 assign_stmt = gimple_build_assign (and_expr_var,
31022 build2 (MIN_EXPR, integer_type_node,
31023 cond_var, and_expr_var));
31024
31025 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
31026 gimple_set_bb (assign_stmt, new_bb);
31027 gimple_seq_add_stmt (&gseq, assign_stmt);
31028 }
31029 }
31030
31031 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
31032 integer_zero_node,
31033 NULL_TREE, NULL_TREE);
31034 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
31035 gimple_set_bb (if_else_stmt, new_bb);
31036 gimple_seq_add_stmt (&gseq, if_else_stmt);
31037
31038 gimple_seq_add_stmt (&gseq, convert_stmt);
31039 gimple_seq_add_stmt (&gseq, return_stmt);
31040 set_bb_seq (new_bb, gseq);
31041
31042 bb1 = new_bb;
31043 e12 = split_block (bb1, if_else_stmt);
31044 bb2 = e12->dest;
31045 e12->flags &= ~EDGE_FALLTHRU;
31046 e12->flags |= EDGE_TRUE_VALUE;
31047
31048 e23 = split_block (bb2, return_stmt);
31049
31050 gimple_set_bb (convert_stmt, bb2);
31051 gimple_set_bb (return_stmt, bb2);
31052
31053 bb3 = e23->dest;
31054 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
31055
31056 remove_edge (e23);
31057 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
31058
31059 pop_cfun ();
31060
31061 return bb3;
31062 }
31063
31064 /* This parses the attribute arguments to target in DECL and determines
31065 the right builtin to use to match the platform specification.
31066 It returns the priority value for this version decl. If PREDICATE_LIST
31067 is not NULL, it stores the list of cpu features that need to be checked
31068 before dispatching this function. */
31069
31070 static unsigned int
31071 get_builtin_code_for_version (tree decl, tree *predicate_list)
31072 {
31073 tree attrs;
31074 struct cl_target_option cur_target;
31075 tree target_node;
31076 struct cl_target_option *new_target;
31077 const char *arg_str = NULL;
31078 const char *attrs_str = NULL;
31079 char *tok_str = NULL;
31080 char *token;
31081
31082 /* Priority of i386 features, greater value is higher priority. This is
31083 used to decide the order in which function dispatch must happen. For
31084 instance, a version specialized for SSE4.2 should be checked for dispatch
31085 before a version for SSE3, as SSE4.2 implies SSE3. */
31086 enum feature_priority
31087 {
31088 P_ZERO = 0,
31089 P_MMX,
31090 P_SSE,
31091 P_SSE2,
31092 P_SSE3,
31093 P_SSSE3,
31094 P_PROC_SSSE3,
31095 P_SSE4_A,
31096 P_PROC_SSE4_A,
31097 P_SSE4_1,
31098 P_SSE4_2,
31099 P_PROC_SSE4_2,
31100 P_POPCNT,
31101 P_AVX,
31102 P_PROC_AVX,
31103 P_FMA4,
31104 P_XOP,
31105 P_PROC_XOP,
31106 P_FMA,
31107 P_PROC_FMA,
31108 P_AVX2,
31109 P_PROC_AVX2
31110 };
31111
31112 enum feature_priority priority = P_ZERO;
31113
31114 /* These are the target attribute strings for which a dispatcher is
31115 available, from fold_builtin_cpu. */
31116
31117 static struct _feature_list
31118 {
31119 const char *const name;
31120 const enum feature_priority priority;
31121 }
31122 const feature_list[] =
31123 {
31124 {"mmx", P_MMX},
31125 {"sse", P_SSE},
31126 {"sse2", P_SSE2},
31127 {"sse3", P_SSE3},
31128 {"sse4a", P_SSE4_A},
31129 {"ssse3", P_SSSE3},
31130 {"sse4.1", P_SSE4_1},
31131 {"sse4.2", P_SSE4_2},
31132 {"popcnt", P_POPCNT},
31133 {"avx", P_AVX},
31134 {"fma4", P_FMA4},
31135 {"xop", P_XOP},
31136 {"fma", P_FMA},
31137 {"avx2", P_AVX2}
31138 };
31139
31140
31141 static unsigned int NUM_FEATURES
31142 = sizeof (feature_list) / sizeof (struct _feature_list);
31143
31144 unsigned int i;
31145
31146 tree predicate_chain = NULL_TREE;
31147 tree predicate_decl, predicate_arg;
31148
31149 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31150 gcc_assert (attrs != NULL);
31151
31152 attrs = TREE_VALUE (TREE_VALUE (attrs));
31153
31154 gcc_assert (TREE_CODE (attrs) == STRING_CST);
31155 attrs_str = TREE_STRING_POINTER (attrs);
31156
31157 /* Return priority zero for default function. */
31158 if (strcmp (attrs_str, "default") == 0)
31159 return 0;
31160
31161 /* Handle arch= if specified. For priority, set it to be 1 more than
31162 the best instruction set the processor can handle. For instance, if
31163 there is a version for atom and a version for ssse3 (the highest ISA
31164 priority for atom), the atom version must be checked for dispatch
31165 before the ssse3 version. */
31166 if (strstr (attrs_str, "arch=") != NULL)
31167 {
31168 cl_target_option_save (&cur_target, &global_options);
31169 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
31170 &global_options_set);
31171
31172 gcc_assert (target_node);
31173 new_target = TREE_TARGET_OPTION (target_node);
31174 gcc_assert (new_target);
31175
31176 if (new_target->arch_specified && new_target->arch > 0)
31177 {
31178 switch (new_target->arch)
31179 {
31180 case PROCESSOR_CORE2:
31181 arg_str = "core2";
31182 priority = P_PROC_SSSE3;
31183 break;
31184 case PROCESSOR_NEHALEM:
31185 /* We translate "arch=corei7" and "arch=nehelam" to
31186 "corei7" so that it will be mapped to M_INTEL_COREI7
31187 as cpu type to cover all M_INTEL_COREI7_XXXs. */
31188 arg_str = "corei7";
31189 priority = P_PROC_SSE4_2;
31190 break;
31191 case PROCESSOR_SANDYBRIDGE:
31192 arg_str = "sandybridge";
31193 priority = P_PROC_AVX;
31194 break;
31195 case PROCESSOR_HASWELL:
31196 arg_str = "haswell";
31197 priority = P_PROC_AVX2;
31198 break;
31199 case PROCESSOR_BONNELL:
31200 arg_str = "bonnell";
31201 priority = P_PROC_SSSE3;
31202 break;
31203 case PROCESSOR_SILVERMONT:
31204 arg_str = "silvermont";
31205 priority = P_PROC_SSE4_2;
31206 break;
31207 case PROCESSOR_AMDFAM10:
31208 arg_str = "amdfam10h";
31209 priority = P_PROC_SSE4_A;
31210 break;
31211 case PROCESSOR_BTVER1:
31212 arg_str = "btver1";
31213 priority = P_PROC_SSE4_A;
31214 break;
31215 case PROCESSOR_BTVER2:
31216 arg_str = "btver2";
31217 priority = P_PROC_AVX;
31218 break;
31219 case PROCESSOR_BDVER1:
31220 arg_str = "bdver1";
31221 priority = P_PROC_XOP;
31222 break;
31223 case PROCESSOR_BDVER2:
31224 arg_str = "bdver2";
31225 priority = P_PROC_FMA;
31226 break;
31227 case PROCESSOR_BDVER3:
31228 arg_str = "bdver3";
31229 priority = P_PROC_FMA;
31230 break;
31231 case PROCESSOR_BDVER4:
31232 arg_str = "bdver4";
31233 priority = P_PROC_AVX2;
31234 break;
31235 }
31236 }
31237
31238 cl_target_option_restore (&global_options, &cur_target);
31239
31240 if (predicate_list && arg_str == NULL)
31241 {
31242 error_at (DECL_SOURCE_LOCATION (decl),
31243 "No dispatcher found for the versioning attributes");
31244 return 0;
31245 }
31246
31247 if (predicate_list)
31248 {
31249 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
31250 /* For a C string literal the length includes the trailing NULL. */
31251 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
31252 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31253 predicate_chain);
31254 }
31255 }
31256
31257 /* Process feature name. */
31258 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
31259 strcpy (tok_str, attrs_str);
31260 token = strtok (tok_str, ",");
31261 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
31262
31263 while (token != NULL)
31264 {
31265 /* Do not process "arch=" */
31266 if (strncmp (token, "arch=", 5) == 0)
31267 {
31268 token = strtok (NULL, ",");
31269 continue;
31270 }
31271 for (i = 0; i < NUM_FEATURES; ++i)
31272 {
31273 if (strcmp (token, feature_list[i].name) == 0)
31274 {
31275 if (predicate_list)
31276 {
31277 predicate_arg = build_string_literal (
31278 strlen (feature_list[i].name) + 1,
31279 feature_list[i].name);
31280 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31281 predicate_chain);
31282 }
31283 /* Find the maximum priority feature. */
31284 if (feature_list[i].priority > priority)
31285 priority = feature_list[i].priority;
31286
31287 break;
31288 }
31289 }
31290 if (predicate_list && i == NUM_FEATURES)
31291 {
31292 error_at (DECL_SOURCE_LOCATION (decl),
31293 "No dispatcher found for %s", token);
31294 return 0;
31295 }
31296 token = strtok (NULL, ",");
31297 }
31298 free (tok_str);
31299
31300 if (predicate_list && predicate_chain == NULL_TREE)
31301 {
31302 error_at (DECL_SOURCE_LOCATION (decl),
31303 "No dispatcher found for the versioning attributes : %s",
31304 attrs_str);
31305 return 0;
31306 }
31307 else if (predicate_list)
31308 {
31309 predicate_chain = nreverse (predicate_chain);
31310 *predicate_list = predicate_chain;
31311 }
31312
31313 return priority;
31314 }
31315
31316 /* This compares the priority of target features in function DECL1
31317 and DECL2. It returns positive value if DECL1 is higher priority,
31318 negative value if DECL2 is higher priority and 0 if they are the
31319 same. */
31320
31321 static int
31322 ix86_compare_version_priority (tree decl1, tree decl2)
31323 {
31324 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
31325 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
31326
31327 return (int)priority1 - (int)priority2;
31328 }
31329
31330 /* V1 and V2 point to function versions with different priorities
31331 based on the target ISA. This function compares their priorities. */
31332
31333 static int
31334 feature_compare (const void *v1, const void *v2)
31335 {
31336 typedef struct _function_version_info
31337 {
31338 tree version_decl;
31339 tree predicate_chain;
31340 unsigned int dispatch_priority;
31341 } function_version_info;
31342
31343 const function_version_info c1 = *(const function_version_info *)v1;
31344 const function_version_info c2 = *(const function_version_info *)v2;
31345 return (c2.dispatch_priority - c1.dispatch_priority);
31346 }
31347
31348 /* This function generates the dispatch function for
31349 multi-versioned functions. DISPATCH_DECL is the function which will
31350 contain the dispatch logic. FNDECLS are the function choices for
31351 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
31352 in DISPATCH_DECL in which the dispatch code is generated. */
31353
31354 static int
31355 dispatch_function_versions (tree dispatch_decl,
31356 void *fndecls_p,
31357 basic_block *empty_bb)
31358 {
31359 tree default_decl;
31360 gimple ifunc_cpu_init_stmt;
31361 gimple_seq gseq;
31362 int ix;
31363 tree ele;
31364 vec<tree> *fndecls;
31365 unsigned int num_versions = 0;
31366 unsigned int actual_versions = 0;
31367 unsigned int i;
31368
31369 struct _function_version_info
31370 {
31371 tree version_decl;
31372 tree predicate_chain;
31373 unsigned int dispatch_priority;
31374 }*function_version_info;
31375
31376 gcc_assert (dispatch_decl != NULL
31377 && fndecls_p != NULL
31378 && empty_bb != NULL);
31379
31380 /*fndecls_p is actually a vector. */
31381 fndecls = static_cast<vec<tree> *> (fndecls_p);
31382
31383 /* At least one more version other than the default. */
31384 num_versions = fndecls->length ();
31385 gcc_assert (num_versions >= 2);
31386
31387 function_version_info = (struct _function_version_info *)
31388 XNEWVEC (struct _function_version_info, (num_versions - 1));
31389
31390 /* The first version in the vector is the default decl. */
31391 default_decl = (*fndecls)[0];
31392
31393 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
31394
31395 gseq = bb_seq (*empty_bb);
31396 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
31397 constructors, so explicity call __builtin_cpu_init here. */
31398 ifunc_cpu_init_stmt = gimple_build_call_vec (
31399 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
31400 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
31401 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
31402 set_bb_seq (*empty_bb, gseq);
31403
31404 pop_cfun ();
31405
31406
31407 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
31408 {
31409 tree version_decl = ele;
31410 tree predicate_chain = NULL_TREE;
31411 unsigned int priority;
31412 /* Get attribute string, parse it and find the right predicate decl.
31413 The predicate function could be a lengthy combination of many
31414 features, like arch-type and various isa-variants. */
31415 priority = get_builtin_code_for_version (version_decl,
31416 &predicate_chain);
31417
31418 if (predicate_chain == NULL_TREE)
31419 continue;
31420
31421 function_version_info [actual_versions].version_decl = version_decl;
31422 function_version_info [actual_versions].predicate_chain
31423 = predicate_chain;
31424 function_version_info [actual_versions].dispatch_priority = priority;
31425 actual_versions++;
31426 }
31427
31428 /* Sort the versions according to descending order of dispatch priority. The
31429 priority is based on the ISA. This is not a perfect solution. There
31430 could still be ambiguity. If more than one function version is suitable
31431 to execute, which one should be dispatched? In future, allow the user
31432 to specify a dispatch priority next to the version. */
31433 qsort (function_version_info, actual_versions,
31434 sizeof (struct _function_version_info), feature_compare);
31435
31436 for (i = 0; i < actual_versions; ++i)
31437 *empty_bb = add_condition_to_bb (dispatch_decl,
31438 function_version_info[i].version_decl,
31439 function_version_info[i].predicate_chain,
31440 *empty_bb);
31441
31442 /* dispatch default version at the end. */
31443 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
31444 NULL, *empty_bb);
31445
31446 free (function_version_info);
31447 return 0;
31448 }
31449
31450 /* Comparator function to be used in qsort routine to sort attribute
31451 specification strings to "target". */
31452
31453 static int
31454 attr_strcmp (const void *v1, const void *v2)
31455 {
31456 const char *c1 = *(char *const*)v1;
31457 const char *c2 = *(char *const*)v2;
31458 return strcmp (c1, c2);
31459 }
31460
31461 /* ARGLIST is the argument to target attribute. This function tokenizes
31462 the comma separated arguments, sorts them and returns a string which
31463 is a unique identifier for the comma separated arguments. It also
31464 replaces non-identifier characters "=,-" with "_". */
31465
31466 static char *
31467 sorted_attr_string (tree arglist)
31468 {
31469 tree arg;
31470 size_t str_len_sum = 0;
31471 char **args = NULL;
31472 char *attr_str, *ret_str;
31473 char *attr = NULL;
31474 unsigned int argnum = 1;
31475 unsigned int i;
31476
31477 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31478 {
31479 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31480 size_t len = strlen (str);
31481 str_len_sum += len + 1;
31482 if (arg != arglist)
31483 argnum++;
31484 for (i = 0; i < strlen (str); i++)
31485 if (str[i] == ',')
31486 argnum++;
31487 }
31488
31489 attr_str = XNEWVEC (char, str_len_sum);
31490 str_len_sum = 0;
31491 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31492 {
31493 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31494 size_t len = strlen (str);
31495 memcpy (attr_str + str_len_sum, str, len);
31496 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
31497 str_len_sum += len + 1;
31498 }
31499
31500 /* Replace "=,-" with "_". */
31501 for (i = 0; i < strlen (attr_str); i++)
31502 if (attr_str[i] == '=' || attr_str[i]== '-')
31503 attr_str[i] = '_';
31504
31505 if (argnum == 1)
31506 return attr_str;
31507
31508 args = XNEWVEC (char *, argnum);
31509
31510 i = 0;
31511 attr = strtok (attr_str, ",");
31512 while (attr != NULL)
31513 {
31514 args[i] = attr;
31515 i++;
31516 attr = strtok (NULL, ",");
31517 }
31518
31519 qsort (args, argnum, sizeof (char *), attr_strcmp);
31520
31521 ret_str = XNEWVEC (char, str_len_sum);
31522 str_len_sum = 0;
31523 for (i = 0; i < argnum; i++)
31524 {
31525 size_t len = strlen (args[i]);
31526 memcpy (ret_str + str_len_sum, args[i], len);
31527 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
31528 str_len_sum += len + 1;
31529 }
31530
31531 XDELETEVEC (args);
31532 XDELETEVEC (attr_str);
31533 return ret_str;
31534 }
31535
31536 /* This function changes the assembler name for functions that are
31537 versions. If DECL is a function version and has a "target"
31538 attribute, it appends the attribute string to its assembler name. */
31539
31540 static tree
31541 ix86_mangle_function_version_assembler_name (tree decl, tree id)
31542 {
31543 tree version_attr;
31544 const char *orig_name, *version_string;
31545 char *attr_str, *assembler_name;
31546
31547 if (DECL_DECLARED_INLINE_P (decl)
31548 && lookup_attribute ("gnu_inline",
31549 DECL_ATTRIBUTES (decl)))
31550 error_at (DECL_SOURCE_LOCATION (decl),
31551 "Function versions cannot be marked as gnu_inline,"
31552 " bodies have to be generated");
31553
31554 if (DECL_VIRTUAL_P (decl)
31555 || DECL_VINDEX (decl))
31556 sorry ("Virtual function multiversioning not supported");
31557
31558 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31559
31560 /* target attribute string cannot be NULL. */
31561 gcc_assert (version_attr != NULL_TREE);
31562
31563 orig_name = IDENTIFIER_POINTER (id);
31564 version_string
31565 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
31566
31567 if (strcmp (version_string, "default") == 0)
31568 return id;
31569
31570 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
31571 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
31572
31573 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
31574
31575 /* Allow assembler name to be modified if already set. */
31576 if (DECL_ASSEMBLER_NAME_SET_P (decl))
31577 SET_DECL_RTL (decl, NULL);
31578
31579 tree ret = get_identifier (assembler_name);
31580 XDELETEVEC (attr_str);
31581 XDELETEVEC (assembler_name);
31582 return ret;
31583 }
31584
31585 /* This function returns true if FN1 and FN2 are versions of the same function,
31586 that is, the target strings of the function decls are different. This assumes
31587 that FN1 and FN2 have the same signature. */
31588
31589 static bool
31590 ix86_function_versions (tree fn1, tree fn2)
31591 {
31592 tree attr1, attr2;
31593 char *target1, *target2;
31594 bool result;
31595
31596 if (TREE_CODE (fn1) != FUNCTION_DECL
31597 || TREE_CODE (fn2) != FUNCTION_DECL)
31598 return false;
31599
31600 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
31601 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
31602
31603 /* At least one function decl should have the target attribute specified. */
31604 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
31605 return false;
31606
31607 /* Diagnose missing target attribute if one of the decls is already
31608 multi-versioned. */
31609 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
31610 {
31611 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
31612 {
31613 if (attr2 != NULL_TREE)
31614 {
31615 tree tem = fn1;
31616 fn1 = fn2;
31617 fn2 = tem;
31618 attr1 = attr2;
31619 }
31620 error_at (DECL_SOURCE_LOCATION (fn2),
31621 "missing %<target%> attribute for multi-versioned %D",
31622 fn2);
31623 inform (DECL_SOURCE_LOCATION (fn1),
31624 "previous declaration of %D", fn1);
31625 /* Prevent diagnosing of the same error multiple times. */
31626 DECL_ATTRIBUTES (fn2)
31627 = tree_cons (get_identifier ("target"),
31628 copy_node (TREE_VALUE (attr1)),
31629 DECL_ATTRIBUTES (fn2));
31630 }
31631 return false;
31632 }
31633
31634 target1 = sorted_attr_string (TREE_VALUE (attr1));
31635 target2 = sorted_attr_string (TREE_VALUE (attr2));
31636
31637 /* The sorted target strings must be different for fn1 and fn2
31638 to be versions. */
31639 if (strcmp (target1, target2) == 0)
31640 result = false;
31641 else
31642 result = true;
31643
31644 XDELETEVEC (target1);
31645 XDELETEVEC (target2);
31646
31647 return result;
31648 }
31649
31650 static tree
31651 ix86_mangle_decl_assembler_name (tree decl, tree id)
31652 {
31653 /* For function version, add the target suffix to the assembler name. */
31654 if (TREE_CODE (decl) == FUNCTION_DECL
31655 && DECL_FUNCTION_VERSIONED (decl))
31656 id = ix86_mangle_function_version_assembler_name (decl, id);
31657 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
31658 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
31659 #endif
31660
31661 return id;
31662 }
31663
31664 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
31665 is true, append the full path name of the source file. */
31666
31667 static char *
31668 make_name (tree decl, const char *suffix, bool make_unique)
31669 {
31670 char *global_var_name;
31671 int name_len;
31672 const char *name;
31673 const char *unique_name = NULL;
31674
31675 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
31676
31677 /* Get a unique name that can be used globally without any chances
31678 of collision at link time. */
31679 if (make_unique)
31680 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
31681
31682 name_len = strlen (name) + strlen (suffix) + 2;
31683
31684 if (make_unique)
31685 name_len += strlen (unique_name) + 1;
31686 global_var_name = XNEWVEC (char, name_len);
31687
31688 /* Use '.' to concatenate names as it is demangler friendly. */
31689 if (make_unique)
31690 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
31691 suffix);
31692 else
31693 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
31694
31695 return global_var_name;
31696 }
31697
31698 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
31699
31700 /* Make a dispatcher declaration for the multi-versioned function DECL.
31701 Calls to DECL function will be replaced with calls to the dispatcher
31702 by the front-end. Return the decl created. */
31703
31704 static tree
31705 make_dispatcher_decl (const tree decl)
31706 {
31707 tree func_decl;
31708 char *func_name;
31709 tree fn_type, func_type;
31710 bool is_uniq = false;
31711
31712 if (TREE_PUBLIC (decl) == 0)
31713 is_uniq = true;
31714
31715 func_name = make_name (decl, "ifunc", is_uniq);
31716
31717 fn_type = TREE_TYPE (decl);
31718 func_type = build_function_type (TREE_TYPE (fn_type),
31719 TYPE_ARG_TYPES (fn_type));
31720
31721 func_decl = build_fn_decl (func_name, func_type);
31722 XDELETEVEC (func_name);
31723 TREE_USED (func_decl) = 1;
31724 DECL_CONTEXT (func_decl) = NULL_TREE;
31725 DECL_INITIAL (func_decl) = error_mark_node;
31726 DECL_ARTIFICIAL (func_decl) = 1;
31727 /* Mark this func as external, the resolver will flip it again if
31728 it gets generated. */
31729 DECL_EXTERNAL (func_decl) = 1;
31730 /* This will be of type IFUNCs have to be externally visible. */
31731 TREE_PUBLIC (func_decl) = 1;
31732
31733 return func_decl;
31734 }
31735
31736 #endif
31737
31738 /* Returns true if decl is multi-versioned and DECL is the default function,
31739 that is it is not tagged with target specific optimization. */
31740
31741 static bool
31742 is_function_default_version (const tree decl)
31743 {
31744 if (TREE_CODE (decl) != FUNCTION_DECL
31745 || !DECL_FUNCTION_VERSIONED (decl))
31746 return false;
31747 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31748 gcc_assert (attr);
31749 attr = TREE_VALUE (TREE_VALUE (attr));
31750 return (TREE_CODE (attr) == STRING_CST
31751 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
31752 }
31753
31754 /* Make a dispatcher declaration for the multi-versioned function DECL.
31755 Calls to DECL function will be replaced with calls to the dispatcher
31756 by the front-end. Returns the decl of the dispatcher function. */
31757
31758 static tree
31759 ix86_get_function_versions_dispatcher (void *decl)
31760 {
31761 tree fn = (tree) decl;
31762 struct cgraph_node *node = NULL;
31763 struct cgraph_node *default_node = NULL;
31764 struct cgraph_function_version_info *node_v = NULL;
31765 struct cgraph_function_version_info *first_v = NULL;
31766
31767 tree dispatch_decl = NULL;
31768
31769 struct cgraph_function_version_info *default_version_info = NULL;
31770
31771 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
31772
31773 node = cgraph_get_node (fn);
31774 gcc_assert (node != NULL);
31775
31776 node_v = get_cgraph_node_version (node);
31777 gcc_assert (node_v != NULL);
31778
31779 if (node_v->dispatcher_resolver != NULL)
31780 return node_v->dispatcher_resolver;
31781
31782 /* Find the default version and make it the first node. */
31783 first_v = node_v;
31784 /* Go to the beginning of the chain. */
31785 while (first_v->prev != NULL)
31786 first_v = first_v->prev;
31787 default_version_info = first_v;
31788 while (default_version_info != NULL)
31789 {
31790 if (is_function_default_version
31791 (default_version_info->this_node->decl))
31792 break;
31793 default_version_info = default_version_info->next;
31794 }
31795
31796 /* If there is no default node, just return NULL. */
31797 if (default_version_info == NULL)
31798 return NULL;
31799
31800 /* Make default info the first node. */
31801 if (first_v != default_version_info)
31802 {
31803 default_version_info->prev->next = default_version_info->next;
31804 if (default_version_info->next)
31805 default_version_info->next->prev = default_version_info->prev;
31806 first_v->prev = default_version_info;
31807 default_version_info->next = first_v;
31808 default_version_info->prev = NULL;
31809 }
31810
31811 default_node = default_version_info->this_node;
31812
31813 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
31814 if (targetm.has_ifunc_p ())
31815 {
31816 struct cgraph_function_version_info *it_v = NULL;
31817 struct cgraph_node *dispatcher_node = NULL;
31818 struct cgraph_function_version_info *dispatcher_version_info = NULL;
31819
31820 /* Right now, the dispatching is done via ifunc. */
31821 dispatch_decl = make_dispatcher_decl (default_node->decl);
31822
31823 dispatcher_node = cgraph_get_create_node (dispatch_decl);
31824 gcc_assert (dispatcher_node != NULL);
31825 dispatcher_node->dispatcher_function = 1;
31826 dispatcher_version_info
31827 = insert_new_cgraph_node_version (dispatcher_node);
31828 dispatcher_version_info->next = default_version_info;
31829 dispatcher_node->definition = 1;
31830
31831 /* Set the dispatcher for all the versions. */
31832 it_v = default_version_info;
31833 while (it_v != NULL)
31834 {
31835 it_v->dispatcher_resolver = dispatch_decl;
31836 it_v = it_v->next;
31837 }
31838 }
31839 else
31840 #endif
31841 {
31842 error_at (DECL_SOURCE_LOCATION (default_node->decl),
31843 "multiversioning needs ifunc which is not supported "
31844 "on this target");
31845 }
31846
31847 return dispatch_decl;
31848 }
31849
31850 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
31851 it to CHAIN. */
31852
31853 static tree
31854 make_attribute (const char *name, const char *arg_name, tree chain)
31855 {
31856 tree attr_name;
31857 tree attr_arg_name;
31858 tree attr_args;
31859 tree attr;
31860
31861 attr_name = get_identifier (name);
31862 attr_arg_name = build_string (strlen (arg_name), arg_name);
31863 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
31864 attr = tree_cons (attr_name, attr_args, chain);
31865 return attr;
31866 }
31867
31868 /* Make the resolver function decl to dispatch the versions of
31869 a multi-versioned function, DEFAULT_DECL. Create an
31870 empty basic block in the resolver and store the pointer in
31871 EMPTY_BB. Return the decl of the resolver function. */
31872
31873 static tree
31874 make_resolver_func (const tree default_decl,
31875 const tree dispatch_decl,
31876 basic_block *empty_bb)
31877 {
31878 char *resolver_name;
31879 tree decl, type, decl_name, t;
31880 bool is_uniq = false;
31881
31882 /* IFUNC's have to be globally visible. So, if the default_decl is
31883 not, then the name of the IFUNC should be made unique. */
31884 if (TREE_PUBLIC (default_decl) == 0)
31885 is_uniq = true;
31886
31887 /* Append the filename to the resolver function if the versions are
31888 not externally visible. This is because the resolver function has
31889 to be externally visible for the loader to find it. So, appending
31890 the filename will prevent conflicts with a resolver function from
31891 another module which is based on the same version name. */
31892 resolver_name = make_name (default_decl, "resolver", is_uniq);
31893
31894 /* The resolver function should return a (void *). */
31895 type = build_function_type_list (ptr_type_node, NULL_TREE);
31896
31897 decl = build_fn_decl (resolver_name, type);
31898 decl_name = get_identifier (resolver_name);
31899 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
31900
31901 DECL_NAME (decl) = decl_name;
31902 TREE_USED (decl) = 1;
31903 DECL_ARTIFICIAL (decl) = 1;
31904 DECL_IGNORED_P (decl) = 0;
31905 /* IFUNC resolvers have to be externally visible. */
31906 TREE_PUBLIC (decl) = 1;
31907 DECL_UNINLINABLE (decl) = 1;
31908
31909 /* Resolver is not external, body is generated. */
31910 DECL_EXTERNAL (decl) = 0;
31911 DECL_EXTERNAL (dispatch_decl) = 0;
31912
31913 DECL_CONTEXT (decl) = NULL_TREE;
31914 DECL_INITIAL (decl) = make_node (BLOCK);
31915 DECL_STATIC_CONSTRUCTOR (decl) = 0;
31916
31917 if (DECL_COMDAT_GROUP (default_decl)
31918 || TREE_PUBLIC (default_decl))
31919 {
31920 /* In this case, each translation unit with a call to this
31921 versioned function will put out a resolver. Ensure it
31922 is comdat to keep just one copy. */
31923 DECL_COMDAT (decl) = 1;
31924 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
31925 }
31926 /* Build result decl and add to function_decl. */
31927 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
31928 DECL_ARTIFICIAL (t) = 1;
31929 DECL_IGNORED_P (t) = 1;
31930 DECL_RESULT (decl) = t;
31931
31932 gimplify_function_tree (decl);
31933 push_cfun (DECL_STRUCT_FUNCTION (decl));
31934 *empty_bb = init_lowered_empty_function (decl, false);
31935
31936 cgraph_add_new_function (decl, true);
31937 cgraph_call_function_insertion_hooks (cgraph_get_create_node (decl));
31938
31939 pop_cfun ();
31940
31941 gcc_assert (dispatch_decl != NULL);
31942 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
31943 DECL_ATTRIBUTES (dispatch_decl)
31944 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
31945
31946 /* Create the alias for dispatch to resolver here. */
31947 /*cgraph_create_function_alias (dispatch_decl, decl);*/
31948 cgraph_same_body_alias (NULL, dispatch_decl, decl);
31949 XDELETEVEC (resolver_name);
31950 return decl;
31951 }
31952
31953 /* Generate the dispatching code body to dispatch multi-versioned function
31954 DECL. The target hook is called to process the "target" attributes and
31955 provide the code to dispatch the right function at run-time. NODE points
31956 to the dispatcher decl whose body will be created. */
31957
31958 static tree
31959 ix86_generate_version_dispatcher_body (void *node_p)
31960 {
31961 tree resolver_decl;
31962 basic_block empty_bb;
31963 tree default_ver_decl;
31964 struct cgraph_node *versn;
31965 struct cgraph_node *node;
31966
31967 struct cgraph_function_version_info *node_version_info = NULL;
31968 struct cgraph_function_version_info *versn_info = NULL;
31969
31970 node = (cgraph_node *)node_p;
31971
31972 node_version_info = get_cgraph_node_version (node);
31973 gcc_assert (node->dispatcher_function
31974 && node_version_info != NULL);
31975
31976 if (node_version_info->dispatcher_resolver)
31977 return node_version_info->dispatcher_resolver;
31978
31979 /* The first version in the chain corresponds to the default version. */
31980 default_ver_decl = node_version_info->next->this_node->decl;
31981
31982 /* node is going to be an alias, so remove the finalized bit. */
31983 node->definition = false;
31984
31985 resolver_decl = make_resolver_func (default_ver_decl,
31986 node->decl, &empty_bb);
31987
31988 node_version_info->dispatcher_resolver = resolver_decl;
31989
31990 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
31991
31992 auto_vec<tree, 2> fn_ver_vec;
31993
31994 for (versn_info = node_version_info->next; versn_info;
31995 versn_info = versn_info->next)
31996 {
31997 versn = versn_info->this_node;
31998 /* Check for virtual functions here again, as by this time it should
31999 have been determined if this function needs a vtable index or
32000 not. This happens for methods in derived classes that override
32001 virtual methods in base classes but are not explicitly marked as
32002 virtual. */
32003 if (DECL_VINDEX (versn->decl))
32004 sorry ("Virtual function multiversioning not supported");
32005
32006 fn_ver_vec.safe_push (versn->decl);
32007 }
32008
32009 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
32010 rebuild_cgraph_edges ();
32011 pop_cfun ();
32012 return resolver_decl;
32013 }
32014 /* This builds the processor_model struct type defined in
32015 libgcc/config/i386/cpuinfo.c */
32016
32017 static tree
32018 build_processor_model_struct (void)
32019 {
32020 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
32021 "__cpu_features"};
32022 tree field = NULL_TREE, field_chain = NULL_TREE;
32023 int i;
32024 tree type = make_node (RECORD_TYPE);
32025
32026 /* The first 3 fields are unsigned int. */
32027 for (i = 0; i < 3; ++i)
32028 {
32029 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32030 get_identifier (field_name[i]), unsigned_type_node);
32031 if (field_chain != NULL_TREE)
32032 DECL_CHAIN (field) = field_chain;
32033 field_chain = field;
32034 }
32035
32036 /* The last field is an array of unsigned integers of size one. */
32037 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32038 get_identifier (field_name[3]),
32039 build_array_type (unsigned_type_node,
32040 build_index_type (size_one_node)));
32041 if (field_chain != NULL_TREE)
32042 DECL_CHAIN (field) = field_chain;
32043 field_chain = field;
32044
32045 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
32046 return type;
32047 }
32048
32049 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
32050
32051 static tree
32052 make_var_decl (tree type, const char *name)
32053 {
32054 tree new_decl;
32055
32056 new_decl = build_decl (UNKNOWN_LOCATION,
32057 VAR_DECL,
32058 get_identifier(name),
32059 type);
32060
32061 DECL_EXTERNAL (new_decl) = 1;
32062 TREE_STATIC (new_decl) = 1;
32063 TREE_PUBLIC (new_decl) = 1;
32064 DECL_INITIAL (new_decl) = 0;
32065 DECL_ARTIFICIAL (new_decl) = 0;
32066 DECL_PRESERVE_P (new_decl) = 1;
32067
32068 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
32069 assemble_variable (new_decl, 0, 0, 0);
32070
32071 return new_decl;
32072 }
32073
32074 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
32075 into an integer defined in libgcc/config/i386/cpuinfo.c */
32076
32077 static tree
32078 fold_builtin_cpu (tree fndecl, tree *args)
32079 {
32080 unsigned int i;
32081 enum ix86_builtins fn_code = (enum ix86_builtins)
32082 DECL_FUNCTION_CODE (fndecl);
32083 tree param_string_cst = NULL;
32084
32085 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
32086 enum processor_features
32087 {
32088 F_CMOV = 0,
32089 F_MMX,
32090 F_POPCNT,
32091 F_SSE,
32092 F_SSE2,
32093 F_SSE3,
32094 F_SSSE3,
32095 F_SSE4_1,
32096 F_SSE4_2,
32097 F_AVX,
32098 F_AVX2,
32099 F_SSE4_A,
32100 F_FMA4,
32101 F_XOP,
32102 F_FMA,
32103 F_MAX
32104 };
32105
32106 /* These are the values for vendor types and cpu types and subtypes
32107 in cpuinfo.c. Cpu types and subtypes should be subtracted by
32108 the corresponding start value. */
32109 enum processor_model
32110 {
32111 M_INTEL = 1,
32112 M_AMD,
32113 M_CPU_TYPE_START,
32114 M_INTEL_BONNELL,
32115 M_INTEL_CORE2,
32116 M_INTEL_COREI7,
32117 M_AMDFAM10H,
32118 M_AMDFAM15H,
32119 M_INTEL_SILVERMONT,
32120 M_AMD_BTVER1,
32121 M_AMD_BTVER2,
32122 M_CPU_SUBTYPE_START,
32123 M_INTEL_COREI7_NEHALEM,
32124 M_INTEL_COREI7_WESTMERE,
32125 M_INTEL_COREI7_SANDYBRIDGE,
32126 M_AMDFAM10H_BARCELONA,
32127 M_AMDFAM10H_SHANGHAI,
32128 M_AMDFAM10H_ISTANBUL,
32129 M_AMDFAM15H_BDVER1,
32130 M_AMDFAM15H_BDVER2,
32131 M_AMDFAM15H_BDVER3,
32132 M_AMDFAM15H_BDVER4,
32133 M_INTEL_COREI7_IVYBRIDGE,
32134 M_INTEL_COREI7_HASWELL
32135 };
32136
32137 static struct _arch_names_table
32138 {
32139 const char *const name;
32140 const enum processor_model model;
32141 }
32142 const arch_names_table[] =
32143 {
32144 {"amd", M_AMD},
32145 {"intel", M_INTEL},
32146 {"atom", M_INTEL_BONNELL},
32147 {"slm", M_INTEL_SILVERMONT},
32148 {"core2", M_INTEL_CORE2},
32149 {"corei7", M_INTEL_COREI7},
32150 {"nehalem", M_INTEL_COREI7_NEHALEM},
32151 {"westmere", M_INTEL_COREI7_WESTMERE},
32152 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
32153 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
32154 {"haswell", M_INTEL_COREI7_HASWELL},
32155 {"bonnell", M_INTEL_BONNELL},
32156 {"silvermont", M_INTEL_SILVERMONT},
32157 {"amdfam10h", M_AMDFAM10H},
32158 {"barcelona", M_AMDFAM10H_BARCELONA},
32159 {"shanghai", M_AMDFAM10H_SHANGHAI},
32160 {"istanbul", M_AMDFAM10H_ISTANBUL},
32161 {"btver1", M_AMD_BTVER1},
32162 {"amdfam15h", M_AMDFAM15H},
32163 {"bdver1", M_AMDFAM15H_BDVER1},
32164 {"bdver2", M_AMDFAM15H_BDVER2},
32165 {"bdver3", M_AMDFAM15H_BDVER3},
32166 {"bdver4", M_AMDFAM15H_BDVER4},
32167 {"btver2", M_AMD_BTVER2},
32168 };
32169
32170 static struct _isa_names_table
32171 {
32172 const char *const name;
32173 const enum processor_features feature;
32174 }
32175 const isa_names_table[] =
32176 {
32177 {"cmov", F_CMOV},
32178 {"mmx", F_MMX},
32179 {"popcnt", F_POPCNT},
32180 {"sse", F_SSE},
32181 {"sse2", F_SSE2},
32182 {"sse3", F_SSE3},
32183 {"ssse3", F_SSSE3},
32184 {"sse4a", F_SSE4_A},
32185 {"sse4.1", F_SSE4_1},
32186 {"sse4.2", F_SSE4_2},
32187 {"avx", F_AVX},
32188 {"fma4", F_FMA4},
32189 {"xop", F_XOP},
32190 {"fma", F_FMA},
32191 {"avx2", F_AVX2}
32192 };
32193
32194 tree __processor_model_type = build_processor_model_struct ();
32195 tree __cpu_model_var = make_var_decl (__processor_model_type,
32196 "__cpu_model");
32197
32198
32199 varpool_add_new_variable (__cpu_model_var);
32200
32201 gcc_assert ((args != NULL) && (*args != NULL));
32202
32203 param_string_cst = *args;
32204 while (param_string_cst
32205 && TREE_CODE (param_string_cst) != STRING_CST)
32206 {
32207 /* *args must be a expr that can contain other EXPRS leading to a
32208 STRING_CST. */
32209 if (!EXPR_P (param_string_cst))
32210 {
32211 error ("Parameter to builtin must be a string constant or literal");
32212 return integer_zero_node;
32213 }
32214 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
32215 }
32216
32217 gcc_assert (param_string_cst);
32218
32219 if (fn_code == IX86_BUILTIN_CPU_IS)
32220 {
32221 tree ref;
32222 tree field;
32223 tree final;
32224
32225 unsigned int field_val = 0;
32226 unsigned int NUM_ARCH_NAMES
32227 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
32228
32229 for (i = 0; i < NUM_ARCH_NAMES; i++)
32230 if (strcmp (arch_names_table[i].name,
32231 TREE_STRING_POINTER (param_string_cst)) == 0)
32232 break;
32233
32234 if (i == NUM_ARCH_NAMES)
32235 {
32236 error ("Parameter to builtin not valid: %s",
32237 TREE_STRING_POINTER (param_string_cst));
32238 return integer_zero_node;
32239 }
32240
32241 field = TYPE_FIELDS (__processor_model_type);
32242 field_val = arch_names_table[i].model;
32243
32244 /* CPU types are stored in the next field. */
32245 if (field_val > M_CPU_TYPE_START
32246 && field_val < M_CPU_SUBTYPE_START)
32247 {
32248 field = DECL_CHAIN (field);
32249 field_val -= M_CPU_TYPE_START;
32250 }
32251
32252 /* CPU subtypes are stored in the next field. */
32253 if (field_val > M_CPU_SUBTYPE_START)
32254 {
32255 field = DECL_CHAIN ( DECL_CHAIN (field));
32256 field_val -= M_CPU_SUBTYPE_START;
32257 }
32258
32259 /* Get the appropriate field in __cpu_model. */
32260 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32261 field, NULL_TREE);
32262
32263 /* Check the value. */
32264 final = build2 (EQ_EXPR, unsigned_type_node, ref,
32265 build_int_cstu (unsigned_type_node, field_val));
32266 return build1 (CONVERT_EXPR, integer_type_node, final);
32267 }
32268 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32269 {
32270 tree ref;
32271 tree array_elt;
32272 tree field;
32273 tree final;
32274
32275 unsigned int field_val = 0;
32276 unsigned int NUM_ISA_NAMES
32277 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
32278
32279 for (i = 0; i < NUM_ISA_NAMES; i++)
32280 if (strcmp (isa_names_table[i].name,
32281 TREE_STRING_POINTER (param_string_cst)) == 0)
32282 break;
32283
32284 if (i == NUM_ISA_NAMES)
32285 {
32286 error ("Parameter to builtin not valid: %s",
32287 TREE_STRING_POINTER (param_string_cst));
32288 return integer_zero_node;
32289 }
32290
32291 field = TYPE_FIELDS (__processor_model_type);
32292 /* Get the last field, which is __cpu_features. */
32293 while (DECL_CHAIN (field))
32294 field = DECL_CHAIN (field);
32295
32296 /* Get the appropriate field: __cpu_model.__cpu_features */
32297 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32298 field, NULL_TREE);
32299
32300 /* Access the 0th element of __cpu_features array. */
32301 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
32302 integer_zero_node, NULL_TREE, NULL_TREE);
32303
32304 field_val = (1 << isa_names_table[i].feature);
32305 /* Return __cpu_model.__cpu_features[0] & field_val */
32306 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
32307 build_int_cstu (unsigned_type_node, field_val));
32308 return build1 (CONVERT_EXPR, integer_type_node, final);
32309 }
32310 gcc_unreachable ();
32311 }
32312
32313 static tree
32314 ix86_fold_builtin (tree fndecl, int n_args,
32315 tree *args, bool ignore ATTRIBUTE_UNUSED)
32316 {
32317 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
32318 {
32319 enum ix86_builtins fn_code = (enum ix86_builtins)
32320 DECL_FUNCTION_CODE (fndecl);
32321 if (fn_code == IX86_BUILTIN_CPU_IS
32322 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32323 {
32324 gcc_assert (n_args == 1);
32325 return fold_builtin_cpu (fndecl, args);
32326 }
32327 }
32328
32329 #ifdef SUBTARGET_FOLD_BUILTIN
32330 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
32331 #endif
32332
32333 return NULL_TREE;
32334 }
32335
32336 /* Make builtins to detect cpu type and features supported. NAME is
32337 the builtin name, CODE is the builtin code, and FTYPE is the function
32338 type of the builtin. */
32339
32340 static void
32341 make_cpu_type_builtin (const char* name, int code,
32342 enum ix86_builtin_func_type ftype, bool is_const)
32343 {
32344 tree decl;
32345 tree type;
32346
32347 type = ix86_get_builtin_func_type (ftype);
32348 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
32349 NULL, NULL_TREE);
32350 gcc_assert (decl != NULL_TREE);
32351 ix86_builtins[(int) code] = decl;
32352 TREE_READONLY (decl) = is_const;
32353 }
32354
32355 /* Make builtins to get CPU type and features supported. The created
32356 builtins are :
32357
32358 __builtin_cpu_init (), to detect cpu type and features,
32359 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
32360 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
32361 */
32362
32363 static void
32364 ix86_init_platform_type_builtins (void)
32365 {
32366 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
32367 INT_FTYPE_VOID, false);
32368 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
32369 INT_FTYPE_PCCHAR, true);
32370 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
32371 INT_FTYPE_PCCHAR, true);
32372 }
32373
32374 /* Internal method for ix86_init_builtins. */
32375
32376 static void
32377 ix86_init_builtins_va_builtins_abi (void)
32378 {
32379 tree ms_va_ref, sysv_va_ref;
32380 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
32381 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
32382 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
32383 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
32384
32385 if (!TARGET_64BIT)
32386 return;
32387 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
32388 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
32389 ms_va_ref = build_reference_type (ms_va_list_type_node);
32390 sysv_va_ref =
32391 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
32392
32393 fnvoid_va_end_ms =
32394 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32395 fnvoid_va_start_ms =
32396 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32397 fnvoid_va_end_sysv =
32398 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
32399 fnvoid_va_start_sysv =
32400 build_varargs_function_type_list (void_type_node, sysv_va_ref,
32401 NULL_TREE);
32402 fnvoid_va_copy_ms =
32403 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
32404 NULL_TREE);
32405 fnvoid_va_copy_sysv =
32406 build_function_type_list (void_type_node, sysv_va_ref,
32407 sysv_va_ref, NULL_TREE);
32408
32409 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
32410 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
32411 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
32412 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
32413 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
32414 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
32415 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
32416 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32417 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
32418 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32419 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
32420 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32421 }
32422
32423 static void
32424 ix86_init_builtin_types (void)
32425 {
32426 tree float128_type_node, float80_type_node;
32427
32428 /* The __float80 type. */
32429 float80_type_node = long_double_type_node;
32430 if (TYPE_MODE (float80_type_node) != XFmode)
32431 {
32432 /* The __float80 type. */
32433 float80_type_node = make_node (REAL_TYPE);
32434
32435 TYPE_PRECISION (float80_type_node) = 80;
32436 layout_type (float80_type_node);
32437 }
32438 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
32439
32440 /* The __float128 type. */
32441 float128_type_node = make_node (REAL_TYPE);
32442 TYPE_PRECISION (float128_type_node) = 128;
32443 layout_type (float128_type_node);
32444 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
32445
32446 /* This macro is built by i386-builtin-types.awk. */
32447 DEFINE_BUILTIN_PRIMITIVE_TYPES;
32448 }
32449
32450 static void
32451 ix86_init_builtins (void)
32452 {
32453 tree t;
32454
32455 ix86_init_builtin_types ();
32456
32457 /* Builtins to get CPU type and features. */
32458 ix86_init_platform_type_builtins ();
32459
32460 /* TFmode support builtins. */
32461 def_builtin_const (0, "__builtin_infq",
32462 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
32463 def_builtin_const (0, "__builtin_huge_valq",
32464 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
32465
32466 /* We will expand them to normal call if SSE isn't available since
32467 they are used by libgcc. */
32468 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
32469 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
32470 BUILT_IN_MD, "__fabstf2", NULL_TREE);
32471 TREE_READONLY (t) = 1;
32472 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
32473
32474 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
32475 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
32476 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
32477 TREE_READONLY (t) = 1;
32478 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
32479
32480 ix86_init_tm_builtins ();
32481 ix86_init_mmx_sse_builtins ();
32482
32483 if (TARGET_LP64)
32484 ix86_init_builtins_va_builtins_abi ();
32485
32486 #ifdef SUBTARGET_INIT_BUILTINS
32487 SUBTARGET_INIT_BUILTINS;
32488 #endif
32489 }
32490
32491 /* Return the ix86 builtin for CODE. */
32492
32493 static tree
32494 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
32495 {
32496 if (code >= IX86_BUILTIN_MAX)
32497 return error_mark_node;
32498
32499 return ix86_builtins[code];
32500 }
32501
32502 /* Errors in the source file can cause expand_expr to return const0_rtx
32503 where we expect a vector. To avoid crashing, use one of the vector
32504 clear instructions. */
32505 static rtx
32506 safe_vector_operand (rtx x, enum machine_mode mode)
32507 {
32508 if (x == const0_rtx)
32509 x = CONST0_RTX (mode);
32510 return x;
32511 }
32512
32513 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
32514
32515 static rtx
32516 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
32517 {
32518 rtx pat;
32519 tree arg0 = CALL_EXPR_ARG (exp, 0);
32520 tree arg1 = CALL_EXPR_ARG (exp, 1);
32521 rtx op0 = expand_normal (arg0);
32522 rtx op1 = expand_normal (arg1);
32523 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32524 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
32525 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
32526
32527 if (VECTOR_MODE_P (mode0))
32528 op0 = safe_vector_operand (op0, mode0);
32529 if (VECTOR_MODE_P (mode1))
32530 op1 = safe_vector_operand (op1, mode1);
32531
32532 if (optimize || !target
32533 || GET_MODE (target) != tmode
32534 || !insn_data[icode].operand[0].predicate (target, tmode))
32535 target = gen_reg_rtx (tmode);
32536
32537 if (GET_MODE (op1) == SImode && mode1 == TImode)
32538 {
32539 rtx x = gen_reg_rtx (V4SImode);
32540 emit_insn (gen_sse2_loadd (x, op1));
32541 op1 = gen_lowpart (TImode, x);
32542 }
32543
32544 if (!insn_data[icode].operand[1].predicate (op0, mode0))
32545 op0 = copy_to_mode_reg (mode0, op0);
32546 if (!insn_data[icode].operand[2].predicate (op1, mode1))
32547 op1 = copy_to_mode_reg (mode1, op1);
32548
32549 pat = GEN_FCN (icode) (target, op0, op1);
32550 if (! pat)
32551 return 0;
32552
32553 emit_insn (pat);
32554
32555 return target;
32556 }
32557
32558 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
32559
32560 static rtx
32561 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
32562 enum ix86_builtin_func_type m_type,
32563 enum rtx_code sub_code)
32564 {
32565 rtx pat;
32566 int i;
32567 int nargs;
32568 bool comparison_p = false;
32569 bool tf_p = false;
32570 bool last_arg_constant = false;
32571 int num_memory = 0;
32572 struct {
32573 rtx op;
32574 enum machine_mode mode;
32575 } args[4];
32576
32577 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32578
32579 switch (m_type)
32580 {
32581 case MULTI_ARG_4_DF2_DI_I:
32582 case MULTI_ARG_4_DF2_DI_I1:
32583 case MULTI_ARG_4_SF2_SI_I:
32584 case MULTI_ARG_4_SF2_SI_I1:
32585 nargs = 4;
32586 last_arg_constant = true;
32587 break;
32588
32589 case MULTI_ARG_3_SF:
32590 case MULTI_ARG_3_DF:
32591 case MULTI_ARG_3_SF2:
32592 case MULTI_ARG_3_DF2:
32593 case MULTI_ARG_3_DI:
32594 case MULTI_ARG_3_SI:
32595 case MULTI_ARG_3_SI_DI:
32596 case MULTI_ARG_3_HI:
32597 case MULTI_ARG_3_HI_SI:
32598 case MULTI_ARG_3_QI:
32599 case MULTI_ARG_3_DI2:
32600 case MULTI_ARG_3_SI2:
32601 case MULTI_ARG_3_HI2:
32602 case MULTI_ARG_3_QI2:
32603 nargs = 3;
32604 break;
32605
32606 case MULTI_ARG_2_SF:
32607 case MULTI_ARG_2_DF:
32608 case MULTI_ARG_2_DI:
32609 case MULTI_ARG_2_SI:
32610 case MULTI_ARG_2_HI:
32611 case MULTI_ARG_2_QI:
32612 nargs = 2;
32613 break;
32614
32615 case MULTI_ARG_2_DI_IMM:
32616 case MULTI_ARG_2_SI_IMM:
32617 case MULTI_ARG_2_HI_IMM:
32618 case MULTI_ARG_2_QI_IMM:
32619 nargs = 2;
32620 last_arg_constant = true;
32621 break;
32622
32623 case MULTI_ARG_1_SF:
32624 case MULTI_ARG_1_DF:
32625 case MULTI_ARG_1_SF2:
32626 case MULTI_ARG_1_DF2:
32627 case MULTI_ARG_1_DI:
32628 case MULTI_ARG_1_SI:
32629 case MULTI_ARG_1_HI:
32630 case MULTI_ARG_1_QI:
32631 case MULTI_ARG_1_SI_DI:
32632 case MULTI_ARG_1_HI_DI:
32633 case MULTI_ARG_1_HI_SI:
32634 case MULTI_ARG_1_QI_DI:
32635 case MULTI_ARG_1_QI_SI:
32636 case MULTI_ARG_1_QI_HI:
32637 nargs = 1;
32638 break;
32639
32640 case MULTI_ARG_2_DI_CMP:
32641 case MULTI_ARG_2_SI_CMP:
32642 case MULTI_ARG_2_HI_CMP:
32643 case MULTI_ARG_2_QI_CMP:
32644 nargs = 2;
32645 comparison_p = true;
32646 break;
32647
32648 case MULTI_ARG_2_SF_TF:
32649 case MULTI_ARG_2_DF_TF:
32650 case MULTI_ARG_2_DI_TF:
32651 case MULTI_ARG_2_SI_TF:
32652 case MULTI_ARG_2_HI_TF:
32653 case MULTI_ARG_2_QI_TF:
32654 nargs = 2;
32655 tf_p = true;
32656 break;
32657
32658 default:
32659 gcc_unreachable ();
32660 }
32661
32662 if (optimize || !target
32663 || GET_MODE (target) != tmode
32664 || !insn_data[icode].operand[0].predicate (target, tmode))
32665 target = gen_reg_rtx (tmode);
32666
32667 gcc_assert (nargs <= 4);
32668
32669 for (i = 0; i < nargs; i++)
32670 {
32671 tree arg = CALL_EXPR_ARG (exp, i);
32672 rtx op = expand_normal (arg);
32673 int adjust = (comparison_p) ? 1 : 0;
32674 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
32675
32676 if (last_arg_constant && i == nargs - 1)
32677 {
32678 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
32679 {
32680 enum insn_code new_icode = icode;
32681 switch (icode)
32682 {
32683 case CODE_FOR_xop_vpermil2v2df3:
32684 case CODE_FOR_xop_vpermil2v4sf3:
32685 case CODE_FOR_xop_vpermil2v4df3:
32686 case CODE_FOR_xop_vpermil2v8sf3:
32687 error ("the last argument must be a 2-bit immediate");
32688 return gen_reg_rtx (tmode);
32689 case CODE_FOR_xop_rotlv2di3:
32690 new_icode = CODE_FOR_rotlv2di3;
32691 goto xop_rotl;
32692 case CODE_FOR_xop_rotlv4si3:
32693 new_icode = CODE_FOR_rotlv4si3;
32694 goto xop_rotl;
32695 case CODE_FOR_xop_rotlv8hi3:
32696 new_icode = CODE_FOR_rotlv8hi3;
32697 goto xop_rotl;
32698 case CODE_FOR_xop_rotlv16qi3:
32699 new_icode = CODE_FOR_rotlv16qi3;
32700 xop_rotl:
32701 if (CONST_INT_P (op))
32702 {
32703 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
32704 op = GEN_INT (INTVAL (op) & mask);
32705 gcc_checking_assert
32706 (insn_data[icode].operand[i + 1].predicate (op, mode));
32707 }
32708 else
32709 {
32710 gcc_checking_assert
32711 (nargs == 2
32712 && insn_data[new_icode].operand[0].mode == tmode
32713 && insn_data[new_icode].operand[1].mode == tmode
32714 && insn_data[new_icode].operand[2].mode == mode
32715 && insn_data[new_icode].operand[0].predicate
32716 == insn_data[icode].operand[0].predicate
32717 && insn_data[new_icode].operand[1].predicate
32718 == insn_data[icode].operand[1].predicate);
32719 icode = new_icode;
32720 goto non_constant;
32721 }
32722 break;
32723 default:
32724 gcc_unreachable ();
32725 }
32726 }
32727 }
32728 else
32729 {
32730 non_constant:
32731 if (VECTOR_MODE_P (mode))
32732 op = safe_vector_operand (op, mode);
32733
32734 /* If we aren't optimizing, only allow one memory operand to be
32735 generated. */
32736 if (memory_operand (op, mode))
32737 num_memory++;
32738
32739 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
32740
32741 if (optimize
32742 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
32743 || num_memory > 1)
32744 op = force_reg (mode, op);
32745 }
32746
32747 args[i].op = op;
32748 args[i].mode = mode;
32749 }
32750
32751 switch (nargs)
32752 {
32753 case 1:
32754 pat = GEN_FCN (icode) (target, args[0].op);
32755 break;
32756
32757 case 2:
32758 if (tf_p)
32759 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
32760 GEN_INT ((int)sub_code));
32761 else if (! comparison_p)
32762 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
32763 else
32764 {
32765 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
32766 args[0].op,
32767 args[1].op);
32768
32769 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
32770 }
32771 break;
32772
32773 case 3:
32774 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
32775 break;
32776
32777 case 4:
32778 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
32779 break;
32780
32781 default:
32782 gcc_unreachable ();
32783 }
32784
32785 if (! pat)
32786 return 0;
32787
32788 emit_insn (pat);
32789 return target;
32790 }
32791
32792 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
32793 insns with vec_merge. */
32794
32795 static rtx
32796 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
32797 rtx target)
32798 {
32799 rtx pat;
32800 tree arg0 = CALL_EXPR_ARG (exp, 0);
32801 rtx op1, op0 = expand_normal (arg0);
32802 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32803 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
32804
32805 if (optimize || !target
32806 || GET_MODE (target) != tmode
32807 || !insn_data[icode].operand[0].predicate (target, tmode))
32808 target = gen_reg_rtx (tmode);
32809
32810 if (VECTOR_MODE_P (mode0))
32811 op0 = safe_vector_operand (op0, mode0);
32812
32813 if ((optimize && !register_operand (op0, mode0))
32814 || !insn_data[icode].operand[1].predicate (op0, mode0))
32815 op0 = copy_to_mode_reg (mode0, op0);
32816
32817 op1 = op0;
32818 if (!insn_data[icode].operand[2].predicate (op1, mode0))
32819 op1 = copy_to_mode_reg (mode0, op1);
32820
32821 pat = GEN_FCN (icode) (target, op0, op1);
32822 if (! pat)
32823 return 0;
32824 emit_insn (pat);
32825 return target;
32826 }
32827
32828 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
32829
32830 static rtx
32831 ix86_expand_sse_compare (const struct builtin_description *d,
32832 tree exp, rtx target, bool swap)
32833 {
32834 rtx pat;
32835 tree arg0 = CALL_EXPR_ARG (exp, 0);
32836 tree arg1 = CALL_EXPR_ARG (exp, 1);
32837 rtx op0 = expand_normal (arg0);
32838 rtx op1 = expand_normal (arg1);
32839 rtx op2;
32840 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
32841 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
32842 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
32843 enum rtx_code comparison = d->comparison;
32844
32845 if (VECTOR_MODE_P (mode0))
32846 op0 = safe_vector_operand (op0, mode0);
32847 if (VECTOR_MODE_P (mode1))
32848 op1 = safe_vector_operand (op1, mode1);
32849
32850 /* Swap operands if we have a comparison that isn't available in
32851 hardware. */
32852 if (swap)
32853 {
32854 rtx tmp = gen_reg_rtx (mode1);
32855 emit_move_insn (tmp, op1);
32856 op1 = op0;
32857 op0 = tmp;
32858 }
32859
32860 if (optimize || !target
32861 || GET_MODE (target) != tmode
32862 || !insn_data[d->icode].operand[0].predicate (target, tmode))
32863 target = gen_reg_rtx (tmode);
32864
32865 if ((optimize && !register_operand (op0, mode0))
32866 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
32867 op0 = copy_to_mode_reg (mode0, op0);
32868 if ((optimize && !register_operand (op1, mode1))
32869 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
32870 op1 = copy_to_mode_reg (mode1, op1);
32871
32872 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
32873 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
32874 if (! pat)
32875 return 0;
32876 emit_insn (pat);
32877 return target;
32878 }
32879
32880 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
32881
32882 static rtx
32883 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
32884 rtx target)
32885 {
32886 rtx pat;
32887 tree arg0 = CALL_EXPR_ARG (exp, 0);
32888 tree arg1 = CALL_EXPR_ARG (exp, 1);
32889 rtx op0 = expand_normal (arg0);
32890 rtx op1 = expand_normal (arg1);
32891 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
32892 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
32893 enum rtx_code comparison = d->comparison;
32894
32895 if (VECTOR_MODE_P (mode0))
32896 op0 = safe_vector_operand (op0, mode0);
32897 if (VECTOR_MODE_P (mode1))
32898 op1 = safe_vector_operand (op1, mode1);
32899
32900 /* Swap operands if we have a comparison that isn't available in
32901 hardware. */
32902 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
32903 {
32904 rtx tmp = op1;
32905 op1 = op0;
32906 op0 = tmp;
32907 }
32908
32909 target = gen_reg_rtx (SImode);
32910 emit_move_insn (target, const0_rtx);
32911 target = gen_rtx_SUBREG (QImode, target, 0);
32912
32913 if ((optimize && !register_operand (op0, mode0))
32914 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
32915 op0 = copy_to_mode_reg (mode0, op0);
32916 if ((optimize && !register_operand (op1, mode1))
32917 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
32918 op1 = copy_to_mode_reg (mode1, op1);
32919
32920 pat = GEN_FCN (d->icode) (op0, op1);
32921 if (! pat)
32922 return 0;
32923 emit_insn (pat);
32924 emit_insn (gen_rtx_SET (VOIDmode,
32925 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
32926 gen_rtx_fmt_ee (comparison, QImode,
32927 SET_DEST (pat),
32928 const0_rtx)));
32929
32930 return SUBREG_REG (target);
32931 }
32932
32933 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
32934
32935 static rtx
32936 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
32937 rtx target)
32938 {
32939 rtx pat;
32940 tree arg0 = CALL_EXPR_ARG (exp, 0);
32941 rtx op1, op0 = expand_normal (arg0);
32942 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
32943 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
32944
32945 if (optimize || target == 0
32946 || GET_MODE (target) != tmode
32947 || !insn_data[d->icode].operand[0].predicate (target, tmode))
32948 target = gen_reg_rtx (tmode);
32949
32950 if (VECTOR_MODE_P (mode0))
32951 op0 = safe_vector_operand (op0, mode0);
32952
32953 if ((optimize && !register_operand (op0, mode0))
32954 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
32955 op0 = copy_to_mode_reg (mode0, op0);
32956
32957 op1 = GEN_INT (d->comparison);
32958
32959 pat = GEN_FCN (d->icode) (target, op0, op1);
32960 if (! pat)
32961 return 0;
32962 emit_insn (pat);
32963 return target;
32964 }
32965
32966 static rtx
32967 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
32968 tree exp, rtx target)
32969 {
32970 rtx pat;
32971 tree arg0 = CALL_EXPR_ARG (exp, 0);
32972 tree arg1 = CALL_EXPR_ARG (exp, 1);
32973 rtx op0 = expand_normal (arg0);
32974 rtx op1 = expand_normal (arg1);
32975 rtx op2;
32976 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
32977 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
32978 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
32979
32980 if (optimize || target == 0
32981 || GET_MODE (target) != tmode
32982 || !insn_data[d->icode].operand[0].predicate (target, tmode))
32983 target = gen_reg_rtx (tmode);
32984
32985 op0 = safe_vector_operand (op0, mode0);
32986 op1 = safe_vector_operand (op1, mode1);
32987
32988 if ((optimize && !register_operand (op0, mode0))
32989 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
32990 op0 = copy_to_mode_reg (mode0, op0);
32991 if ((optimize && !register_operand (op1, mode1))
32992 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
32993 op1 = copy_to_mode_reg (mode1, op1);
32994
32995 op2 = GEN_INT (d->comparison);
32996
32997 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
32998 if (! pat)
32999 return 0;
33000 emit_insn (pat);
33001 return target;
33002 }
33003
33004 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
33005
33006 static rtx
33007 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
33008 rtx target)
33009 {
33010 rtx pat;
33011 tree arg0 = CALL_EXPR_ARG (exp, 0);
33012 tree arg1 = CALL_EXPR_ARG (exp, 1);
33013 rtx op0 = expand_normal (arg0);
33014 rtx op1 = expand_normal (arg1);
33015 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33016 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33017 enum rtx_code comparison = d->comparison;
33018
33019 if (VECTOR_MODE_P (mode0))
33020 op0 = safe_vector_operand (op0, mode0);
33021 if (VECTOR_MODE_P (mode1))
33022 op1 = safe_vector_operand (op1, mode1);
33023
33024 target = gen_reg_rtx (SImode);
33025 emit_move_insn (target, const0_rtx);
33026 target = gen_rtx_SUBREG (QImode, target, 0);
33027
33028 if ((optimize && !register_operand (op0, mode0))
33029 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33030 op0 = copy_to_mode_reg (mode0, op0);
33031 if ((optimize && !register_operand (op1, mode1))
33032 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33033 op1 = copy_to_mode_reg (mode1, op1);
33034
33035 pat = GEN_FCN (d->icode) (op0, op1);
33036 if (! pat)
33037 return 0;
33038 emit_insn (pat);
33039 emit_insn (gen_rtx_SET (VOIDmode,
33040 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33041 gen_rtx_fmt_ee (comparison, QImode,
33042 SET_DEST (pat),
33043 const0_rtx)));
33044
33045 return SUBREG_REG (target);
33046 }
33047
33048 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
33049
33050 static rtx
33051 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
33052 tree exp, rtx target)
33053 {
33054 rtx pat;
33055 tree arg0 = CALL_EXPR_ARG (exp, 0);
33056 tree arg1 = CALL_EXPR_ARG (exp, 1);
33057 tree arg2 = CALL_EXPR_ARG (exp, 2);
33058 tree arg3 = CALL_EXPR_ARG (exp, 3);
33059 tree arg4 = CALL_EXPR_ARG (exp, 4);
33060 rtx scratch0, scratch1;
33061 rtx op0 = expand_normal (arg0);
33062 rtx op1 = expand_normal (arg1);
33063 rtx op2 = expand_normal (arg2);
33064 rtx op3 = expand_normal (arg3);
33065 rtx op4 = expand_normal (arg4);
33066 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
33067
33068 tmode0 = insn_data[d->icode].operand[0].mode;
33069 tmode1 = insn_data[d->icode].operand[1].mode;
33070 modev2 = insn_data[d->icode].operand[2].mode;
33071 modei3 = insn_data[d->icode].operand[3].mode;
33072 modev4 = insn_data[d->icode].operand[4].mode;
33073 modei5 = insn_data[d->icode].operand[5].mode;
33074 modeimm = insn_data[d->icode].operand[6].mode;
33075
33076 if (VECTOR_MODE_P (modev2))
33077 op0 = safe_vector_operand (op0, modev2);
33078 if (VECTOR_MODE_P (modev4))
33079 op2 = safe_vector_operand (op2, modev4);
33080
33081 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33082 op0 = copy_to_mode_reg (modev2, op0);
33083 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
33084 op1 = copy_to_mode_reg (modei3, op1);
33085 if ((optimize && !register_operand (op2, modev4))
33086 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
33087 op2 = copy_to_mode_reg (modev4, op2);
33088 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
33089 op3 = copy_to_mode_reg (modei5, op3);
33090
33091 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
33092 {
33093 error ("the fifth argument must be an 8-bit immediate");
33094 return const0_rtx;
33095 }
33096
33097 if (d->code == IX86_BUILTIN_PCMPESTRI128)
33098 {
33099 if (optimize || !target
33100 || GET_MODE (target) != tmode0
33101 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33102 target = gen_reg_rtx (tmode0);
33103
33104 scratch1 = gen_reg_rtx (tmode1);
33105
33106 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
33107 }
33108 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
33109 {
33110 if (optimize || !target
33111 || GET_MODE (target) != tmode1
33112 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33113 target = gen_reg_rtx (tmode1);
33114
33115 scratch0 = gen_reg_rtx (tmode0);
33116
33117 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
33118 }
33119 else
33120 {
33121 gcc_assert (d->flag);
33122
33123 scratch0 = gen_reg_rtx (tmode0);
33124 scratch1 = gen_reg_rtx (tmode1);
33125
33126 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
33127 }
33128
33129 if (! pat)
33130 return 0;
33131
33132 emit_insn (pat);
33133
33134 if (d->flag)
33135 {
33136 target = gen_reg_rtx (SImode);
33137 emit_move_insn (target, const0_rtx);
33138 target = gen_rtx_SUBREG (QImode, target, 0);
33139
33140 emit_insn
33141 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33142 gen_rtx_fmt_ee (EQ, QImode,
33143 gen_rtx_REG ((enum machine_mode) d->flag,
33144 FLAGS_REG),
33145 const0_rtx)));
33146 return SUBREG_REG (target);
33147 }
33148 else
33149 return target;
33150 }
33151
33152
33153 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
33154
33155 static rtx
33156 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
33157 tree exp, rtx target)
33158 {
33159 rtx pat;
33160 tree arg0 = CALL_EXPR_ARG (exp, 0);
33161 tree arg1 = CALL_EXPR_ARG (exp, 1);
33162 tree arg2 = CALL_EXPR_ARG (exp, 2);
33163 rtx scratch0, scratch1;
33164 rtx op0 = expand_normal (arg0);
33165 rtx op1 = expand_normal (arg1);
33166 rtx op2 = expand_normal (arg2);
33167 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
33168
33169 tmode0 = insn_data[d->icode].operand[0].mode;
33170 tmode1 = insn_data[d->icode].operand[1].mode;
33171 modev2 = insn_data[d->icode].operand[2].mode;
33172 modev3 = insn_data[d->icode].operand[3].mode;
33173 modeimm = insn_data[d->icode].operand[4].mode;
33174
33175 if (VECTOR_MODE_P (modev2))
33176 op0 = safe_vector_operand (op0, modev2);
33177 if (VECTOR_MODE_P (modev3))
33178 op1 = safe_vector_operand (op1, modev3);
33179
33180 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33181 op0 = copy_to_mode_reg (modev2, op0);
33182 if ((optimize && !register_operand (op1, modev3))
33183 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
33184 op1 = copy_to_mode_reg (modev3, op1);
33185
33186 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
33187 {
33188 error ("the third argument must be an 8-bit immediate");
33189 return const0_rtx;
33190 }
33191
33192 if (d->code == IX86_BUILTIN_PCMPISTRI128)
33193 {
33194 if (optimize || !target
33195 || GET_MODE (target) != tmode0
33196 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33197 target = gen_reg_rtx (tmode0);
33198
33199 scratch1 = gen_reg_rtx (tmode1);
33200
33201 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
33202 }
33203 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
33204 {
33205 if (optimize || !target
33206 || GET_MODE (target) != tmode1
33207 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33208 target = gen_reg_rtx (tmode1);
33209
33210 scratch0 = gen_reg_rtx (tmode0);
33211
33212 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
33213 }
33214 else
33215 {
33216 gcc_assert (d->flag);
33217
33218 scratch0 = gen_reg_rtx (tmode0);
33219 scratch1 = gen_reg_rtx (tmode1);
33220
33221 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
33222 }
33223
33224 if (! pat)
33225 return 0;
33226
33227 emit_insn (pat);
33228
33229 if (d->flag)
33230 {
33231 target = gen_reg_rtx (SImode);
33232 emit_move_insn (target, const0_rtx);
33233 target = gen_rtx_SUBREG (QImode, target, 0);
33234
33235 emit_insn
33236 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33237 gen_rtx_fmt_ee (EQ, QImode,
33238 gen_rtx_REG ((enum machine_mode) d->flag,
33239 FLAGS_REG),
33240 const0_rtx)));
33241 return SUBREG_REG (target);
33242 }
33243 else
33244 return target;
33245 }
33246
33247 /* Subroutine of ix86_expand_builtin to take care of insns with
33248 variable number of operands. */
33249
33250 static rtx
33251 ix86_expand_args_builtin (const struct builtin_description *d,
33252 tree exp, rtx target)
33253 {
33254 rtx pat, real_target;
33255 unsigned int i, nargs;
33256 unsigned int nargs_constant = 0;
33257 unsigned int mask_pos = 0;
33258 int num_memory = 0;
33259 struct
33260 {
33261 rtx op;
33262 enum machine_mode mode;
33263 } args[6];
33264 bool last_arg_count = false;
33265 enum insn_code icode = d->icode;
33266 const struct insn_data_d *insn_p = &insn_data[icode];
33267 enum machine_mode tmode = insn_p->operand[0].mode;
33268 enum machine_mode rmode = VOIDmode;
33269 bool swap = false;
33270 enum rtx_code comparison = d->comparison;
33271
33272 switch ((enum ix86_builtin_func_type) d->flag)
33273 {
33274 case V2DF_FTYPE_V2DF_ROUND:
33275 case V4DF_FTYPE_V4DF_ROUND:
33276 case V4SF_FTYPE_V4SF_ROUND:
33277 case V8SF_FTYPE_V8SF_ROUND:
33278 case V4SI_FTYPE_V4SF_ROUND:
33279 case V8SI_FTYPE_V8SF_ROUND:
33280 return ix86_expand_sse_round (d, exp, target);
33281 case V4SI_FTYPE_V2DF_V2DF_ROUND:
33282 case V8SI_FTYPE_V4DF_V4DF_ROUND:
33283 case V16SI_FTYPE_V8DF_V8DF_ROUND:
33284 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
33285 case INT_FTYPE_V8SF_V8SF_PTEST:
33286 case INT_FTYPE_V4DI_V4DI_PTEST:
33287 case INT_FTYPE_V4DF_V4DF_PTEST:
33288 case INT_FTYPE_V4SF_V4SF_PTEST:
33289 case INT_FTYPE_V2DI_V2DI_PTEST:
33290 case INT_FTYPE_V2DF_V2DF_PTEST:
33291 return ix86_expand_sse_ptest (d, exp, target);
33292 case FLOAT128_FTYPE_FLOAT128:
33293 case FLOAT_FTYPE_FLOAT:
33294 case INT_FTYPE_INT:
33295 case UINT64_FTYPE_INT:
33296 case UINT16_FTYPE_UINT16:
33297 case INT64_FTYPE_INT64:
33298 case INT64_FTYPE_V4SF:
33299 case INT64_FTYPE_V2DF:
33300 case INT_FTYPE_V16QI:
33301 case INT_FTYPE_V8QI:
33302 case INT_FTYPE_V8SF:
33303 case INT_FTYPE_V4DF:
33304 case INT_FTYPE_V4SF:
33305 case INT_FTYPE_V2DF:
33306 case INT_FTYPE_V32QI:
33307 case V16QI_FTYPE_V16QI:
33308 case V8SI_FTYPE_V8SF:
33309 case V8SI_FTYPE_V4SI:
33310 case V8HI_FTYPE_V8HI:
33311 case V8HI_FTYPE_V16QI:
33312 case V8QI_FTYPE_V8QI:
33313 case V8SF_FTYPE_V8SF:
33314 case V8SF_FTYPE_V8SI:
33315 case V8SF_FTYPE_V4SF:
33316 case V8SF_FTYPE_V8HI:
33317 case V4SI_FTYPE_V4SI:
33318 case V4SI_FTYPE_V16QI:
33319 case V4SI_FTYPE_V4SF:
33320 case V4SI_FTYPE_V8SI:
33321 case V4SI_FTYPE_V8HI:
33322 case V4SI_FTYPE_V4DF:
33323 case V4SI_FTYPE_V2DF:
33324 case V4HI_FTYPE_V4HI:
33325 case V4DF_FTYPE_V4DF:
33326 case V4DF_FTYPE_V4SI:
33327 case V4DF_FTYPE_V4SF:
33328 case V4DF_FTYPE_V2DF:
33329 case V4SF_FTYPE_V4SF:
33330 case V4SF_FTYPE_V4SI:
33331 case V4SF_FTYPE_V8SF:
33332 case V4SF_FTYPE_V4DF:
33333 case V4SF_FTYPE_V8HI:
33334 case V4SF_FTYPE_V2DF:
33335 case V2DI_FTYPE_V2DI:
33336 case V2DI_FTYPE_V16QI:
33337 case V2DI_FTYPE_V8HI:
33338 case V2DI_FTYPE_V4SI:
33339 case V2DF_FTYPE_V2DF:
33340 case V2DF_FTYPE_V4SI:
33341 case V2DF_FTYPE_V4DF:
33342 case V2DF_FTYPE_V4SF:
33343 case V2DF_FTYPE_V2SI:
33344 case V2SI_FTYPE_V2SI:
33345 case V2SI_FTYPE_V4SF:
33346 case V2SI_FTYPE_V2SF:
33347 case V2SI_FTYPE_V2DF:
33348 case V2SF_FTYPE_V2SF:
33349 case V2SF_FTYPE_V2SI:
33350 case V32QI_FTYPE_V32QI:
33351 case V32QI_FTYPE_V16QI:
33352 case V16HI_FTYPE_V16HI:
33353 case V16HI_FTYPE_V8HI:
33354 case V8SI_FTYPE_V8SI:
33355 case V16HI_FTYPE_V16QI:
33356 case V8SI_FTYPE_V16QI:
33357 case V4DI_FTYPE_V16QI:
33358 case V8SI_FTYPE_V8HI:
33359 case V4DI_FTYPE_V8HI:
33360 case V4DI_FTYPE_V4SI:
33361 case V4DI_FTYPE_V2DI:
33362 case HI_FTYPE_HI:
33363 case UINT_FTYPE_V2DF:
33364 case UINT_FTYPE_V4SF:
33365 case UINT64_FTYPE_V2DF:
33366 case UINT64_FTYPE_V4SF:
33367 case V16QI_FTYPE_V8DI:
33368 case V16HI_FTYPE_V16SI:
33369 case V16SI_FTYPE_HI:
33370 case V16SI_FTYPE_V16SI:
33371 case V16SI_FTYPE_INT:
33372 case V16SF_FTYPE_FLOAT:
33373 case V16SF_FTYPE_V4SF:
33374 case V16SF_FTYPE_V16SF:
33375 case V8HI_FTYPE_V8DI:
33376 case V8UHI_FTYPE_V8UHI:
33377 case V8SI_FTYPE_V8DI:
33378 case V8USI_FTYPE_V8USI:
33379 case V8SF_FTYPE_V8DF:
33380 case V8DI_FTYPE_QI:
33381 case V8DI_FTYPE_INT64:
33382 case V8DI_FTYPE_V4DI:
33383 case V8DI_FTYPE_V8DI:
33384 case V8DF_FTYPE_DOUBLE:
33385 case V8DF_FTYPE_V4DF:
33386 case V8DF_FTYPE_V8DF:
33387 case V8DF_FTYPE_V8SI:
33388 nargs = 1;
33389 break;
33390 case V4SF_FTYPE_V4SF_VEC_MERGE:
33391 case V2DF_FTYPE_V2DF_VEC_MERGE:
33392 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
33393 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
33394 case V16QI_FTYPE_V16QI_V16QI:
33395 case V16QI_FTYPE_V8HI_V8HI:
33396 case V16SI_FTYPE_V16SI_V16SI:
33397 case V16SF_FTYPE_V16SF_V16SF:
33398 case V16SF_FTYPE_V16SF_V16SI:
33399 case V8QI_FTYPE_V8QI_V8QI:
33400 case V8QI_FTYPE_V4HI_V4HI:
33401 case V8HI_FTYPE_V8HI_V8HI:
33402 case V8HI_FTYPE_V16QI_V16QI:
33403 case V8HI_FTYPE_V4SI_V4SI:
33404 case V8SF_FTYPE_V8SF_V8SF:
33405 case V8SF_FTYPE_V8SF_V8SI:
33406 case V8DI_FTYPE_V8DI_V8DI:
33407 case V8DF_FTYPE_V8DF_V8DF:
33408 case V8DF_FTYPE_V8DF_V8DI:
33409 case V4SI_FTYPE_V4SI_V4SI:
33410 case V4SI_FTYPE_V8HI_V8HI:
33411 case V4SI_FTYPE_V4SF_V4SF:
33412 case V4SI_FTYPE_V2DF_V2DF:
33413 case V4HI_FTYPE_V4HI_V4HI:
33414 case V4HI_FTYPE_V8QI_V8QI:
33415 case V4HI_FTYPE_V2SI_V2SI:
33416 case V4DF_FTYPE_V4DF_V4DF:
33417 case V4DF_FTYPE_V4DF_V4DI:
33418 case V4SF_FTYPE_V4SF_V4SF:
33419 case V4SF_FTYPE_V4SF_V4SI:
33420 case V4SF_FTYPE_V4SF_V2SI:
33421 case V4SF_FTYPE_V4SF_V2DF:
33422 case V4SF_FTYPE_V4SF_UINT:
33423 case V4SF_FTYPE_V4SF_UINT64:
33424 case V4SF_FTYPE_V4SF_DI:
33425 case V4SF_FTYPE_V4SF_SI:
33426 case V2DI_FTYPE_V2DI_V2DI:
33427 case V2DI_FTYPE_V16QI_V16QI:
33428 case V2DI_FTYPE_V4SI_V4SI:
33429 case V2UDI_FTYPE_V4USI_V4USI:
33430 case V2DI_FTYPE_V2DI_V16QI:
33431 case V2DI_FTYPE_V2DF_V2DF:
33432 case V2SI_FTYPE_V2SI_V2SI:
33433 case V2SI_FTYPE_V4HI_V4HI:
33434 case V2SI_FTYPE_V2SF_V2SF:
33435 case V2DF_FTYPE_V2DF_V2DF:
33436 case V2DF_FTYPE_V2DF_V4SF:
33437 case V2DF_FTYPE_V2DF_V2DI:
33438 case V2DF_FTYPE_V2DF_DI:
33439 case V2DF_FTYPE_V2DF_SI:
33440 case V2DF_FTYPE_V2DF_UINT:
33441 case V2DF_FTYPE_V2DF_UINT64:
33442 case V2SF_FTYPE_V2SF_V2SF:
33443 case V1DI_FTYPE_V1DI_V1DI:
33444 case V1DI_FTYPE_V8QI_V8QI:
33445 case V1DI_FTYPE_V2SI_V2SI:
33446 case V32QI_FTYPE_V16HI_V16HI:
33447 case V16HI_FTYPE_V8SI_V8SI:
33448 case V32QI_FTYPE_V32QI_V32QI:
33449 case V16HI_FTYPE_V32QI_V32QI:
33450 case V16HI_FTYPE_V16HI_V16HI:
33451 case V8SI_FTYPE_V4DF_V4DF:
33452 case V8SI_FTYPE_V8SI_V8SI:
33453 case V8SI_FTYPE_V16HI_V16HI:
33454 case V4DI_FTYPE_V4DI_V4DI:
33455 case V4DI_FTYPE_V8SI_V8SI:
33456 case V4UDI_FTYPE_V8USI_V8USI:
33457 case QI_FTYPE_V8DI_V8DI:
33458 case HI_FTYPE_V16SI_V16SI:
33459 if (comparison == UNKNOWN)
33460 return ix86_expand_binop_builtin (icode, exp, target);
33461 nargs = 2;
33462 break;
33463 case V4SF_FTYPE_V4SF_V4SF_SWAP:
33464 case V2DF_FTYPE_V2DF_V2DF_SWAP:
33465 gcc_assert (comparison != UNKNOWN);
33466 nargs = 2;
33467 swap = true;
33468 break;
33469 case V16HI_FTYPE_V16HI_V8HI_COUNT:
33470 case V16HI_FTYPE_V16HI_SI_COUNT:
33471 case V8SI_FTYPE_V8SI_V4SI_COUNT:
33472 case V8SI_FTYPE_V8SI_SI_COUNT:
33473 case V4DI_FTYPE_V4DI_V2DI_COUNT:
33474 case V4DI_FTYPE_V4DI_INT_COUNT:
33475 case V8HI_FTYPE_V8HI_V8HI_COUNT:
33476 case V8HI_FTYPE_V8HI_SI_COUNT:
33477 case V4SI_FTYPE_V4SI_V4SI_COUNT:
33478 case V4SI_FTYPE_V4SI_SI_COUNT:
33479 case V4HI_FTYPE_V4HI_V4HI_COUNT:
33480 case V4HI_FTYPE_V4HI_SI_COUNT:
33481 case V2DI_FTYPE_V2DI_V2DI_COUNT:
33482 case V2DI_FTYPE_V2DI_SI_COUNT:
33483 case V2SI_FTYPE_V2SI_V2SI_COUNT:
33484 case V2SI_FTYPE_V2SI_SI_COUNT:
33485 case V1DI_FTYPE_V1DI_V1DI_COUNT:
33486 case V1DI_FTYPE_V1DI_SI_COUNT:
33487 nargs = 2;
33488 last_arg_count = true;
33489 break;
33490 case UINT64_FTYPE_UINT64_UINT64:
33491 case UINT_FTYPE_UINT_UINT:
33492 case UINT_FTYPE_UINT_USHORT:
33493 case UINT_FTYPE_UINT_UCHAR:
33494 case UINT16_FTYPE_UINT16_INT:
33495 case UINT8_FTYPE_UINT8_INT:
33496 case HI_FTYPE_HI_HI:
33497 case V16SI_FTYPE_V8DF_V8DF:
33498 nargs = 2;
33499 break;
33500 case V2DI_FTYPE_V2DI_INT_CONVERT:
33501 nargs = 2;
33502 rmode = V1TImode;
33503 nargs_constant = 1;
33504 break;
33505 case V4DI_FTYPE_V4DI_INT_CONVERT:
33506 nargs = 2;
33507 rmode = V2TImode;
33508 nargs_constant = 1;
33509 break;
33510 case V8HI_FTYPE_V8HI_INT:
33511 case V8HI_FTYPE_V8SF_INT:
33512 case V16HI_FTYPE_V16SF_INT:
33513 case V8HI_FTYPE_V4SF_INT:
33514 case V8SF_FTYPE_V8SF_INT:
33515 case V4SF_FTYPE_V16SF_INT:
33516 case V16SF_FTYPE_V16SF_INT:
33517 case V4SI_FTYPE_V4SI_INT:
33518 case V4SI_FTYPE_V8SI_INT:
33519 case V4HI_FTYPE_V4HI_INT:
33520 case V4DF_FTYPE_V4DF_INT:
33521 case V4DF_FTYPE_V8DF_INT:
33522 case V4SF_FTYPE_V4SF_INT:
33523 case V4SF_FTYPE_V8SF_INT:
33524 case V2DI_FTYPE_V2DI_INT:
33525 case V2DF_FTYPE_V2DF_INT:
33526 case V2DF_FTYPE_V4DF_INT:
33527 case V16HI_FTYPE_V16HI_INT:
33528 case V8SI_FTYPE_V8SI_INT:
33529 case V16SI_FTYPE_V16SI_INT:
33530 case V4SI_FTYPE_V16SI_INT:
33531 case V4DI_FTYPE_V4DI_INT:
33532 case V2DI_FTYPE_V4DI_INT:
33533 case V4DI_FTYPE_V8DI_INT:
33534 case HI_FTYPE_HI_INT:
33535 nargs = 2;
33536 nargs_constant = 1;
33537 break;
33538 case V16QI_FTYPE_V16QI_V16QI_V16QI:
33539 case V8SF_FTYPE_V8SF_V8SF_V8SF:
33540 case V4DF_FTYPE_V4DF_V4DF_V4DF:
33541 case V4SF_FTYPE_V4SF_V4SF_V4SF:
33542 case V2DF_FTYPE_V2DF_V2DF_V2DF:
33543 case V32QI_FTYPE_V32QI_V32QI_V32QI:
33544 case HI_FTYPE_V16SI_V16SI_HI:
33545 case QI_FTYPE_V8DI_V8DI_QI:
33546 case V16HI_FTYPE_V16SI_V16HI_HI:
33547 case V16QI_FTYPE_V16SI_V16QI_HI:
33548 case V16QI_FTYPE_V8DI_V16QI_QI:
33549 case V16SF_FTYPE_V16SF_V16SF_HI:
33550 case V16SF_FTYPE_V16SF_V16SF_V16SF:
33551 case V16SF_FTYPE_V16SF_V16SI_V16SF:
33552 case V16SF_FTYPE_V16SI_V16SF_HI:
33553 case V16SF_FTYPE_V16SI_V16SF_V16SF:
33554 case V16SF_FTYPE_V4SF_V16SF_HI:
33555 case V16SI_FTYPE_SI_V16SI_HI:
33556 case V16SI_FTYPE_V16HI_V16SI_HI:
33557 case V16SI_FTYPE_V16QI_V16SI_HI:
33558 case V16SI_FTYPE_V16SF_V16SI_HI:
33559 case V16SI_FTYPE_V16SI_V16SI_HI:
33560 case V16SI_FTYPE_V16SI_V16SI_V16SI:
33561 case V16SI_FTYPE_V4SI_V16SI_HI:
33562 case V2DI_FTYPE_V2DI_V2DI_V2DI:
33563 case V4DI_FTYPE_V4DI_V4DI_V4DI:
33564 case V8DF_FTYPE_V2DF_V8DF_QI:
33565 case V8DF_FTYPE_V4DF_V8DF_QI:
33566 case V8DF_FTYPE_V8DF_V8DF_QI:
33567 case V8DF_FTYPE_V8DF_V8DF_V8DF:
33568 case V8DF_FTYPE_V8DF_V8DI_V8DF:
33569 case V8DF_FTYPE_V8DI_V8DF_V8DF:
33570 case V8DF_FTYPE_V8SF_V8DF_QI:
33571 case V8DF_FTYPE_V8SI_V8DF_QI:
33572 case V8DI_FTYPE_DI_V8DI_QI:
33573 case V8DI_FTYPE_V16QI_V8DI_QI:
33574 case V8DI_FTYPE_V2DI_V8DI_QI:
33575 case V8DI_FTYPE_V4DI_V8DI_QI:
33576 case V8DI_FTYPE_V8DI_V8DI_QI:
33577 case V8DI_FTYPE_V8DI_V8DI_V8DI:
33578 case V8DI_FTYPE_V8HI_V8DI_QI:
33579 case V8DI_FTYPE_V8SI_V8DI_QI:
33580 case V8HI_FTYPE_V8DI_V8HI_QI:
33581 case V8SF_FTYPE_V8DF_V8SF_QI:
33582 case V8SI_FTYPE_V8DF_V8SI_QI:
33583 case V8SI_FTYPE_V8DI_V8SI_QI:
33584 case V4SI_FTYPE_V4SI_V4SI_V4SI:
33585 nargs = 3;
33586 break;
33587 case V32QI_FTYPE_V32QI_V32QI_INT:
33588 case V16HI_FTYPE_V16HI_V16HI_INT:
33589 case V16QI_FTYPE_V16QI_V16QI_INT:
33590 case V4DI_FTYPE_V4DI_V4DI_INT:
33591 case V8HI_FTYPE_V8HI_V8HI_INT:
33592 case V8SI_FTYPE_V8SI_V8SI_INT:
33593 case V8SI_FTYPE_V8SI_V4SI_INT:
33594 case V8SF_FTYPE_V8SF_V8SF_INT:
33595 case V8SF_FTYPE_V8SF_V4SF_INT:
33596 case V4SI_FTYPE_V4SI_V4SI_INT:
33597 case V4DF_FTYPE_V4DF_V4DF_INT:
33598 case V16SF_FTYPE_V16SF_V16SF_INT:
33599 case V16SF_FTYPE_V16SF_V4SF_INT:
33600 case V16SI_FTYPE_V16SI_V4SI_INT:
33601 case V4DF_FTYPE_V4DF_V2DF_INT:
33602 case V4SF_FTYPE_V4SF_V4SF_INT:
33603 case V2DI_FTYPE_V2DI_V2DI_INT:
33604 case V4DI_FTYPE_V4DI_V2DI_INT:
33605 case V2DF_FTYPE_V2DF_V2DF_INT:
33606 case QI_FTYPE_V8DI_V8DI_INT:
33607 case QI_FTYPE_V8DF_V8DF_INT:
33608 case QI_FTYPE_V2DF_V2DF_INT:
33609 case QI_FTYPE_V4SF_V4SF_INT:
33610 case HI_FTYPE_V16SI_V16SI_INT:
33611 case HI_FTYPE_V16SF_V16SF_INT:
33612 nargs = 3;
33613 nargs_constant = 1;
33614 break;
33615 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
33616 nargs = 3;
33617 rmode = V4DImode;
33618 nargs_constant = 1;
33619 break;
33620 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
33621 nargs = 3;
33622 rmode = V2DImode;
33623 nargs_constant = 1;
33624 break;
33625 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
33626 nargs = 3;
33627 rmode = DImode;
33628 nargs_constant = 1;
33629 break;
33630 case V2DI_FTYPE_V2DI_UINT_UINT:
33631 nargs = 3;
33632 nargs_constant = 2;
33633 break;
33634 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
33635 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
33636 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
33637 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
33638 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
33639 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
33640 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
33641 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
33642 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
33643 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
33644 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
33645 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
33646 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
33647 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
33648 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
33649 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
33650 nargs = 4;
33651 break;
33652 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
33653 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
33654 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
33655 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
33656 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
33657 nargs = 4;
33658 nargs_constant = 1;
33659 break;
33660 case QI_FTYPE_V2DF_V2DF_INT_QI:
33661 case QI_FTYPE_V4SF_V4SF_INT_QI:
33662 nargs = 4;
33663 mask_pos = 1;
33664 nargs_constant = 1;
33665 break;
33666 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
33667 nargs = 4;
33668 nargs_constant = 2;
33669 break;
33670 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
33671 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
33672 nargs = 4;
33673 break;
33674 case QI_FTYPE_V8DI_V8DI_INT_QI:
33675 case HI_FTYPE_V16SI_V16SI_INT_HI:
33676 case QI_FTYPE_V8DF_V8DF_INT_QI:
33677 case HI_FTYPE_V16SF_V16SF_INT_HI:
33678 mask_pos = 1;
33679 nargs = 4;
33680 nargs_constant = 1;
33681 break;
33682 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
33683 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
33684 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
33685 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
33686 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
33687 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
33688 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
33689 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
33690 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
33691 nargs = 4;
33692 mask_pos = 2;
33693 nargs_constant = 1;
33694 break;
33695 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
33696 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
33697 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
33698 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
33699 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
33700 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
33701 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
33702 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
33703 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
33704 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
33705 nargs = 5;
33706 mask_pos = 2;
33707 nargs_constant = 1;
33708 break;
33709 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
33710 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
33711 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
33712 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
33713 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
33714 nargs = 5;
33715 mask_pos = 1;
33716 nargs_constant = 1;
33717 break;
33718
33719 default:
33720 gcc_unreachable ();
33721 }
33722
33723 gcc_assert (nargs <= ARRAY_SIZE (args));
33724
33725 if (comparison != UNKNOWN)
33726 {
33727 gcc_assert (nargs == 2);
33728 return ix86_expand_sse_compare (d, exp, target, swap);
33729 }
33730
33731 if (rmode == VOIDmode || rmode == tmode)
33732 {
33733 if (optimize
33734 || target == 0
33735 || GET_MODE (target) != tmode
33736 || !insn_p->operand[0].predicate (target, tmode))
33737 target = gen_reg_rtx (tmode);
33738 real_target = target;
33739 }
33740 else
33741 {
33742 real_target = gen_reg_rtx (tmode);
33743 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
33744 }
33745
33746 for (i = 0; i < nargs; i++)
33747 {
33748 tree arg = CALL_EXPR_ARG (exp, i);
33749 rtx op = expand_normal (arg);
33750 enum machine_mode mode = insn_p->operand[i + 1].mode;
33751 bool match = insn_p->operand[i + 1].predicate (op, mode);
33752
33753 if (last_arg_count && (i + 1) == nargs)
33754 {
33755 /* SIMD shift insns take either an 8-bit immediate or
33756 register as count. But builtin functions take int as
33757 count. If count doesn't match, we put it in register. */
33758 if (!match)
33759 {
33760 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
33761 if (!insn_p->operand[i + 1].predicate (op, mode))
33762 op = copy_to_reg (op);
33763 }
33764 }
33765 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
33766 (!mask_pos && (nargs - i) <= nargs_constant))
33767 {
33768 if (!match)
33769 switch (icode)
33770 {
33771 case CODE_FOR_avx2_inserti128:
33772 case CODE_FOR_avx2_extracti128:
33773 error ("the last argument must be an 1-bit immediate");
33774 return const0_rtx;
33775
33776 case CODE_FOR_avx512f_cmpv8di3_mask:
33777 case CODE_FOR_avx512f_cmpv16si3_mask:
33778 case CODE_FOR_avx512f_ucmpv8di3_mask:
33779 case CODE_FOR_avx512f_ucmpv16si3_mask:
33780 error ("the last argument must be a 3-bit immediate");
33781 return const0_rtx;
33782
33783 case CODE_FOR_sse4_1_roundsd:
33784 case CODE_FOR_sse4_1_roundss:
33785
33786 case CODE_FOR_sse4_1_roundpd:
33787 case CODE_FOR_sse4_1_roundps:
33788 case CODE_FOR_avx_roundpd256:
33789 case CODE_FOR_avx_roundps256:
33790
33791 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
33792 case CODE_FOR_sse4_1_roundps_sfix:
33793 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
33794 case CODE_FOR_avx_roundps_sfix256:
33795
33796 case CODE_FOR_sse4_1_blendps:
33797 case CODE_FOR_avx_blendpd256:
33798 case CODE_FOR_avx_vpermilv4df:
33799 case CODE_FOR_avx512f_getmantv8df_mask:
33800 case CODE_FOR_avx512f_getmantv16sf_mask:
33801 error ("the last argument must be a 4-bit immediate");
33802 return const0_rtx;
33803
33804 case CODE_FOR_sha1rnds4:
33805 case CODE_FOR_sse4_1_blendpd:
33806 case CODE_FOR_avx_vpermilv2df:
33807 case CODE_FOR_xop_vpermil2v2df3:
33808 case CODE_FOR_xop_vpermil2v4sf3:
33809 case CODE_FOR_xop_vpermil2v4df3:
33810 case CODE_FOR_xop_vpermil2v8sf3:
33811 case CODE_FOR_avx512f_vinsertf32x4_mask:
33812 case CODE_FOR_avx512f_vinserti32x4_mask:
33813 case CODE_FOR_avx512f_vextractf32x4_mask:
33814 case CODE_FOR_avx512f_vextracti32x4_mask:
33815 error ("the last argument must be a 2-bit immediate");
33816 return const0_rtx;
33817
33818 case CODE_FOR_avx_vextractf128v4df:
33819 case CODE_FOR_avx_vextractf128v8sf:
33820 case CODE_FOR_avx_vextractf128v8si:
33821 case CODE_FOR_avx_vinsertf128v4df:
33822 case CODE_FOR_avx_vinsertf128v8sf:
33823 case CODE_FOR_avx_vinsertf128v8si:
33824 case CODE_FOR_avx512f_vinsertf64x4_mask:
33825 case CODE_FOR_avx512f_vinserti64x4_mask:
33826 case CODE_FOR_avx512f_vextractf64x4_mask:
33827 case CODE_FOR_avx512f_vextracti64x4_mask:
33828 error ("the last argument must be a 1-bit immediate");
33829 return const0_rtx;
33830
33831 case CODE_FOR_avx_vmcmpv2df3:
33832 case CODE_FOR_avx_vmcmpv4sf3:
33833 case CODE_FOR_avx_cmpv2df3:
33834 case CODE_FOR_avx_cmpv4sf3:
33835 case CODE_FOR_avx_cmpv4df3:
33836 case CODE_FOR_avx_cmpv8sf3:
33837 case CODE_FOR_avx512f_cmpv8df3_mask:
33838 case CODE_FOR_avx512f_cmpv16sf3_mask:
33839 case CODE_FOR_avx512f_vmcmpv2df3_mask:
33840 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
33841 error ("the last argument must be a 5-bit immediate");
33842 return const0_rtx;
33843
33844 default:
33845 switch (nargs_constant)
33846 {
33847 case 2:
33848 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
33849 (!mask_pos && (nargs - i) == nargs_constant))
33850 {
33851 error ("the next to last argument must be an 8-bit immediate");
33852 break;
33853 }
33854 case 1:
33855 error ("the last argument must be an 8-bit immediate");
33856 break;
33857 default:
33858 gcc_unreachable ();
33859 }
33860 return const0_rtx;
33861 }
33862 }
33863 else
33864 {
33865 if (VECTOR_MODE_P (mode))
33866 op = safe_vector_operand (op, mode);
33867
33868 /* If we aren't optimizing, only allow one memory operand to
33869 be generated. */
33870 if (memory_operand (op, mode))
33871 num_memory++;
33872
33873 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
33874 {
33875 if (optimize || !match || num_memory > 1)
33876 op = copy_to_mode_reg (mode, op);
33877 }
33878 else
33879 {
33880 op = copy_to_reg (op);
33881 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
33882 }
33883 }
33884
33885 args[i].op = op;
33886 args[i].mode = mode;
33887 }
33888
33889 switch (nargs)
33890 {
33891 case 1:
33892 pat = GEN_FCN (icode) (real_target, args[0].op);
33893 break;
33894 case 2:
33895 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
33896 break;
33897 case 3:
33898 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
33899 args[2].op);
33900 break;
33901 case 4:
33902 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
33903 args[2].op, args[3].op);
33904 break;
33905 case 5:
33906 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
33907 args[2].op, args[3].op, args[4].op);
33908 case 6:
33909 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
33910 args[2].op, args[3].op, args[4].op,
33911 args[5].op);
33912 break;
33913 default:
33914 gcc_unreachable ();
33915 }
33916
33917 if (! pat)
33918 return 0;
33919
33920 emit_insn (pat);
33921 return target;
33922 }
33923
33924 /* Transform pattern of following layout:
33925 (parallel [
33926 set (A B)
33927 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
33928 ])
33929 into:
33930 (set (A B))
33931
33932 Or:
33933 (parallel [ A B
33934 ...
33935 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
33936 ...
33937 ])
33938 into:
33939 (parallel [ A B ... ]) */
33940
33941 static rtx
33942 ix86_erase_embedded_rounding (rtx pat)
33943 {
33944 if (GET_CODE (pat) == INSN)
33945 pat = PATTERN (pat);
33946
33947 gcc_assert (GET_CODE (pat) == PARALLEL);
33948
33949 if (XVECLEN (pat, 0) == 2)
33950 {
33951 rtx p0 = XVECEXP (pat, 0, 0);
33952 rtx p1 = XVECEXP (pat, 0, 1);
33953
33954 gcc_assert (GET_CODE (p0) == SET
33955 && GET_CODE (p1) == UNSPEC
33956 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
33957
33958 return p0;
33959 }
33960 else
33961 {
33962 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
33963 int i = 0;
33964 int j = 0;
33965
33966 for (; i < XVECLEN (pat, 0); ++i)
33967 {
33968 rtx elem = XVECEXP (pat, 0, i);
33969 if (GET_CODE (elem) != UNSPEC
33970 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
33971 res [j++] = elem;
33972 }
33973
33974 /* No more than 1 occurence was removed. */
33975 gcc_assert (j >= XVECLEN (pat, 0) - 1);
33976
33977 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
33978 }
33979 }
33980
33981 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
33982 with rounding. */
33983 static rtx
33984 ix86_expand_sse_comi_round (const struct builtin_description *d,
33985 tree exp, rtx target)
33986 {
33987 rtx pat, set_dst;
33988 tree arg0 = CALL_EXPR_ARG (exp, 0);
33989 tree arg1 = CALL_EXPR_ARG (exp, 1);
33990 tree arg2 = CALL_EXPR_ARG (exp, 2);
33991 tree arg3 = CALL_EXPR_ARG (exp, 3);
33992 rtx op0 = expand_normal (arg0);
33993 rtx op1 = expand_normal (arg1);
33994 rtx op2 = expand_normal (arg2);
33995 rtx op3 = expand_normal (arg3);
33996 enum insn_code icode = d->icode;
33997 const struct insn_data_d *insn_p = &insn_data[icode];
33998 enum machine_mode mode0 = insn_p->operand[0].mode;
33999 enum machine_mode mode1 = insn_p->operand[1].mode;
34000 enum rtx_code comparison = UNEQ;
34001 bool need_ucomi = false;
34002
34003 /* See avxintrin.h for values. */
34004 enum rtx_code comi_comparisons[32] =
34005 {
34006 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
34007 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
34008 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
34009 };
34010 bool need_ucomi_values[32] =
34011 {
34012 true, false, false, true, true, false, false, true,
34013 true, false, false, true, true, false, false, true,
34014 false, true, true, false, false, true, true, false,
34015 false, true, true, false, false, true, true, false
34016 };
34017
34018 if (!CONST_INT_P (op2))
34019 {
34020 error ("the third argument must be comparison constant");
34021 return const0_rtx;
34022 }
34023 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
34024 {
34025 error ("incorect comparison mode");
34026 return const0_rtx;
34027 }
34028
34029 if (!insn_p->operand[2].predicate (op3, SImode))
34030 {
34031 error ("incorrect rounding operand");
34032 return const0_rtx;
34033 }
34034
34035 comparison = comi_comparisons[INTVAL (op2)];
34036 need_ucomi = need_ucomi_values[INTVAL (op2)];
34037
34038 if (VECTOR_MODE_P (mode0))
34039 op0 = safe_vector_operand (op0, mode0);
34040 if (VECTOR_MODE_P (mode1))
34041 op1 = safe_vector_operand (op1, mode1);
34042
34043 target = gen_reg_rtx (SImode);
34044 emit_move_insn (target, const0_rtx);
34045 target = gen_rtx_SUBREG (QImode, target, 0);
34046
34047 if ((optimize && !register_operand (op0, mode0))
34048 || !insn_p->operand[0].predicate (op0, mode0))
34049 op0 = copy_to_mode_reg (mode0, op0);
34050 if ((optimize && !register_operand (op1, mode1))
34051 || !insn_p->operand[1].predicate (op1, mode1))
34052 op1 = copy_to_mode_reg (mode1, op1);
34053
34054 if (need_ucomi)
34055 icode = icode == CODE_FOR_sse_comi_round
34056 ? CODE_FOR_sse_ucomi_round
34057 : CODE_FOR_sse2_ucomi_round;
34058
34059 pat = GEN_FCN (icode) (op0, op1, op3);
34060 if (! pat)
34061 return 0;
34062
34063 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
34064 if (INTVAL (op3) == NO_ROUND)
34065 {
34066 pat = ix86_erase_embedded_rounding (pat);
34067 if (! pat)
34068 return 0;
34069
34070 set_dst = SET_DEST (pat);
34071 }
34072 else
34073 {
34074 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
34075 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
34076 }
34077
34078 emit_insn (pat);
34079 emit_insn (gen_rtx_SET (VOIDmode,
34080 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34081 gen_rtx_fmt_ee (comparison, QImode,
34082 set_dst,
34083 const0_rtx)));
34084
34085 return SUBREG_REG (target);
34086 }
34087
34088 static rtx
34089 ix86_expand_round_builtin (const struct builtin_description *d,
34090 tree exp, rtx target)
34091 {
34092 rtx pat;
34093 unsigned int i, nargs;
34094 struct
34095 {
34096 rtx op;
34097 enum machine_mode mode;
34098 } args[6];
34099 enum insn_code icode = d->icode;
34100 const struct insn_data_d *insn_p = &insn_data[icode];
34101 enum machine_mode tmode = insn_p->operand[0].mode;
34102 unsigned int nargs_constant = 0;
34103 unsigned int redundant_embed_rnd = 0;
34104
34105 switch ((enum ix86_builtin_func_type) d->flag)
34106 {
34107 case UINT64_FTYPE_V2DF_INT:
34108 case UINT64_FTYPE_V4SF_INT:
34109 case UINT_FTYPE_V2DF_INT:
34110 case UINT_FTYPE_V4SF_INT:
34111 case INT64_FTYPE_V2DF_INT:
34112 case INT64_FTYPE_V4SF_INT:
34113 case INT_FTYPE_V2DF_INT:
34114 case INT_FTYPE_V4SF_INT:
34115 nargs = 2;
34116 break;
34117 case V4SF_FTYPE_V4SF_UINT_INT:
34118 case V4SF_FTYPE_V4SF_UINT64_INT:
34119 case V2DF_FTYPE_V2DF_UINT64_INT:
34120 case V4SF_FTYPE_V4SF_INT_INT:
34121 case V4SF_FTYPE_V4SF_INT64_INT:
34122 case V2DF_FTYPE_V2DF_INT64_INT:
34123 case V4SF_FTYPE_V4SF_V4SF_INT:
34124 case V2DF_FTYPE_V2DF_V2DF_INT:
34125 case V4SF_FTYPE_V4SF_V2DF_INT:
34126 case V2DF_FTYPE_V2DF_V4SF_INT:
34127 nargs = 3;
34128 break;
34129 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
34130 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
34131 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
34132 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
34133 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
34134 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
34135 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
34136 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
34137 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
34138 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
34139 nargs = 4;
34140 break;
34141 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
34142 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
34143 nargs_constant = 2;
34144 nargs = 4;
34145 break;
34146 case INT_FTYPE_V4SF_V4SF_INT_INT:
34147 case INT_FTYPE_V2DF_V2DF_INT_INT:
34148 return ix86_expand_sse_comi_round (d, exp, target);
34149 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
34150 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
34151 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
34152 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
34153 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
34154 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
34155 nargs = 5;
34156 break;
34157 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
34158 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
34159 nargs_constant = 4;
34160 nargs = 5;
34161 break;
34162 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
34163 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
34164 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
34165 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
34166 nargs_constant = 3;
34167 nargs = 5;
34168 break;
34169 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
34170 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
34171 nargs = 6;
34172 nargs_constant = 4;
34173 break;
34174 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
34175 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
34176 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
34177 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
34178 nargs = 6;
34179 nargs_constant = 3;
34180 break;
34181 default:
34182 gcc_unreachable ();
34183 }
34184 gcc_assert (nargs <= ARRAY_SIZE (args));
34185
34186 if (optimize
34187 || target == 0
34188 || GET_MODE (target) != tmode
34189 || !insn_p->operand[0].predicate (target, tmode))
34190 target = gen_reg_rtx (tmode);
34191
34192 for (i = 0; i < nargs; i++)
34193 {
34194 tree arg = CALL_EXPR_ARG (exp, i);
34195 rtx op = expand_normal (arg);
34196 enum machine_mode mode = insn_p->operand[i + 1].mode;
34197 bool match = insn_p->operand[i + 1].predicate (op, mode);
34198
34199 if (i == nargs - nargs_constant)
34200 {
34201 if (!match)
34202 {
34203 switch (icode)
34204 {
34205 case CODE_FOR_avx512f_getmantv8df_mask_round:
34206 case CODE_FOR_avx512f_getmantv16sf_mask_round:
34207 case CODE_FOR_avx512f_getmantv2df_round:
34208 case CODE_FOR_avx512f_getmantv4sf_round:
34209 error ("the immediate argument must be a 4-bit immediate");
34210 return const0_rtx;
34211 case CODE_FOR_avx512f_cmpv8df3_mask_round:
34212 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
34213 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
34214 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
34215 error ("the immediate argument must be a 5-bit immediate");
34216 return const0_rtx;
34217 default:
34218 error ("the immediate argument must be an 8-bit immediate");
34219 return const0_rtx;
34220 }
34221 }
34222 }
34223 else if (i == nargs-1)
34224 {
34225 if (!insn_p->operand[nargs].predicate (op, SImode))
34226 {
34227 error ("incorrect rounding operand");
34228 return const0_rtx;
34229 }
34230
34231 /* If there is no rounding use normal version of the pattern. */
34232 if (INTVAL (op) == NO_ROUND)
34233 redundant_embed_rnd = 1;
34234 }
34235 else
34236 {
34237 if (VECTOR_MODE_P (mode))
34238 op = safe_vector_operand (op, mode);
34239
34240 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34241 {
34242 if (optimize || !match)
34243 op = copy_to_mode_reg (mode, op);
34244 }
34245 else
34246 {
34247 op = copy_to_reg (op);
34248 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34249 }
34250 }
34251
34252 args[i].op = op;
34253 args[i].mode = mode;
34254 }
34255
34256 switch (nargs)
34257 {
34258 case 1:
34259 pat = GEN_FCN (icode) (target, args[0].op);
34260 break;
34261 case 2:
34262 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34263 break;
34264 case 3:
34265 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34266 args[2].op);
34267 break;
34268 case 4:
34269 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34270 args[2].op, args[3].op);
34271 break;
34272 case 5:
34273 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34274 args[2].op, args[3].op, args[4].op);
34275 case 6:
34276 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34277 args[2].op, args[3].op, args[4].op,
34278 args[5].op);
34279 break;
34280 default:
34281 gcc_unreachable ();
34282 }
34283
34284 if (!pat)
34285 return 0;
34286
34287 if (redundant_embed_rnd)
34288 pat = ix86_erase_embedded_rounding (pat);
34289
34290 emit_insn (pat);
34291 return target;
34292 }
34293
34294 /* Subroutine of ix86_expand_builtin to take care of special insns
34295 with variable number of operands. */
34296
34297 static rtx
34298 ix86_expand_special_args_builtin (const struct builtin_description *d,
34299 tree exp, rtx target)
34300 {
34301 tree arg;
34302 rtx pat, op;
34303 unsigned int i, nargs, arg_adjust, memory;
34304 bool aligned_mem = false;
34305 struct
34306 {
34307 rtx op;
34308 enum machine_mode mode;
34309 } args[3];
34310 enum insn_code icode = d->icode;
34311 bool last_arg_constant = false;
34312 const struct insn_data_d *insn_p = &insn_data[icode];
34313 enum machine_mode tmode = insn_p->operand[0].mode;
34314 enum { load, store } klass;
34315
34316 switch ((enum ix86_builtin_func_type) d->flag)
34317 {
34318 case VOID_FTYPE_VOID:
34319 emit_insn (GEN_FCN (icode) (target));
34320 return 0;
34321 case VOID_FTYPE_UINT64:
34322 case VOID_FTYPE_UNSIGNED:
34323 nargs = 0;
34324 klass = store;
34325 memory = 0;
34326 break;
34327
34328 case INT_FTYPE_VOID:
34329 case UINT64_FTYPE_VOID:
34330 case UNSIGNED_FTYPE_VOID:
34331 nargs = 0;
34332 klass = load;
34333 memory = 0;
34334 break;
34335 case UINT64_FTYPE_PUNSIGNED:
34336 case V2DI_FTYPE_PV2DI:
34337 case V4DI_FTYPE_PV4DI:
34338 case V32QI_FTYPE_PCCHAR:
34339 case V16QI_FTYPE_PCCHAR:
34340 case V8SF_FTYPE_PCV4SF:
34341 case V8SF_FTYPE_PCFLOAT:
34342 case V4SF_FTYPE_PCFLOAT:
34343 case V4DF_FTYPE_PCV2DF:
34344 case V4DF_FTYPE_PCDOUBLE:
34345 case V2DF_FTYPE_PCDOUBLE:
34346 case VOID_FTYPE_PVOID:
34347 case V16SI_FTYPE_PV4SI:
34348 case V16SF_FTYPE_PV4SF:
34349 case V8DI_FTYPE_PV4DI:
34350 case V8DI_FTYPE_PV8DI:
34351 case V8DF_FTYPE_PV4DF:
34352 nargs = 1;
34353 klass = load;
34354 memory = 0;
34355 switch (icode)
34356 {
34357 case CODE_FOR_sse4_1_movntdqa:
34358 case CODE_FOR_avx2_movntdqa:
34359 case CODE_FOR_avx512f_movntdqa:
34360 aligned_mem = true;
34361 break;
34362 default:
34363 break;
34364 }
34365 break;
34366 case VOID_FTYPE_PV2SF_V4SF:
34367 case VOID_FTYPE_PV8DI_V8DI:
34368 case VOID_FTYPE_PV4DI_V4DI:
34369 case VOID_FTYPE_PV2DI_V2DI:
34370 case VOID_FTYPE_PCHAR_V32QI:
34371 case VOID_FTYPE_PCHAR_V16QI:
34372 case VOID_FTYPE_PFLOAT_V16SF:
34373 case VOID_FTYPE_PFLOAT_V8SF:
34374 case VOID_FTYPE_PFLOAT_V4SF:
34375 case VOID_FTYPE_PDOUBLE_V8DF:
34376 case VOID_FTYPE_PDOUBLE_V4DF:
34377 case VOID_FTYPE_PDOUBLE_V2DF:
34378 case VOID_FTYPE_PLONGLONG_LONGLONG:
34379 case VOID_FTYPE_PULONGLONG_ULONGLONG:
34380 case VOID_FTYPE_PINT_INT:
34381 nargs = 1;
34382 klass = store;
34383 /* Reserve memory operand for target. */
34384 memory = ARRAY_SIZE (args);
34385 switch (icode)
34386 {
34387 /* These builtins and instructions require the memory
34388 to be properly aligned. */
34389 case CODE_FOR_avx_movntv4di:
34390 case CODE_FOR_sse2_movntv2di:
34391 case CODE_FOR_avx_movntv8sf:
34392 case CODE_FOR_sse_movntv4sf:
34393 case CODE_FOR_sse4a_vmmovntv4sf:
34394 case CODE_FOR_avx_movntv4df:
34395 case CODE_FOR_sse2_movntv2df:
34396 case CODE_FOR_sse4a_vmmovntv2df:
34397 case CODE_FOR_sse2_movntidi:
34398 case CODE_FOR_sse_movntq:
34399 case CODE_FOR_sse2_movntisi:
34400 case CODE_FOR_avx512f_movntv16sf:
34401 case CODE_FOR_avx512f_movntv8df:
34402 case CODE_FOR_avx512f_movntv8di:
34403 aligned_mem = true;
34404 break;
34405 default:
34406 break;
34407 }
34408 break;
34409 case V4SF_FTYPE_V4SF_PCV2SF:
34410 case V2DF_FTYPE_V2DF_PCDOUBLE:
34411 nargs = 2;
34412 klass = load;
34413 memory = 1;
34414 break;
34415 case V8SF_FTYPE_PCV8SF_V8SI:
34416 case V4DF_FTYPE_PCV4DF_V4DI:
34417 case V4SF_FTYPE_PCV4SF_V4SI:
34418 case V2DF_FTYPE_PCV2DF_V2DI:
34419 case V8SI_FTYPE_PCV8SI_V8SI:
34420 case V4DI_FTYPE_PCV4DI_V4DI:
34421 case V4SI_FTYPE_PCV4SI_V4SI:
34422 case V2DI_FTYPE_PCV2DI_V2DI:
34423 nargs = 2;
34424 klass = load;
34425 memory = 0;
34426 break;
34427 case VOID_FTYPE_PV8DF_V8DF_QI:
34428 case VOID_FTYPE_PV16SF_V16SF_HI:
34429 case VOID_FTYPE_PV8DI_V8DI_QI:
34430 case VOID_FTYPE_PV16SI_V16SI_HI:
34431 switch (icode)
34432 {
34433 /* These builtins and instructions require the memory
34434 to be properly aligned. */
34435 case CODE_FOR_avx512f_storev16sf_mask:
34436 case CODE_FOR_avx512f_storev16si_mask:
34437 case CODE_FOR_avx512f_storev8df_mask:
34438 case CODE_FOR_avx512f_storev8di_mask:
34439 aligned_mem = true;
34440 break;
34441 default:
34442 break;
34443 }
34444 /* FALLTHRU */
34445 case VOID_FTYPE_PV8SF_V8SI_V8SF:
34446 case VOID_FTYPE_PV4DF_V4DI_V4DF:
34447 case VOID_FTYPE_PV4SF_V4SI_V4SF:
34448 case VOID_FTYPE_PV2DF_V2DI_V2DF:
34449 case VOID_FTYPE_PV8SI_V8SI_V8SI:
34450 case VOID_FTYPE_PV4DI_V4DI_V4DI:
34451 case VOID_FTYPE_PV4SI_V4SI_V4SI:
34452 case VOID_FTYPE_PV2DI_V2DI_V2DI:
34453 case VOID_FTYPE_PDOUBLE_V2DF_QI:
34454 case VOID_FTYPE_PFLOAT_V4SF_QI:
34455 nargs = 2;
34456 klass = store;
34457 /* Reserve memory operand for target. */
34458 memory = ARRAY_SIZE (args);
34459 break;
34460 case V16SF_FTYPE_PCV16SF_V16SF_HI:
34461 case V16SI_FTYPE_PCV16SI_V16SI_HI:
34462 case V8DF_FTYPE_PCV8DF_V8DF_QI:
34463 case V8DI_FTYPE_PCV8DI_V8DI_QI:
34464 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
34465 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
34466 nargs = 3;
34467 klass = load;
34468 memory = 0;
34469 switch (icode)
34470 {
34471 /* These builtins and instructions require the memory
34472 to be properly aligned. */
34473 case CODE_FOR_avx512f_loadv16sf_mask:
34474 case CODE_FOR_avx512f_loadv16si_mask:
34475 case CODE_FOR_avx512f_loadv8df_mask:
34476 case CODE_FOR_avx512f_loadv8di_mask:
34477 aligned_mem = true;
34478 break;
34479 default:
34480 break;
34481 }
34482 break;
34483 case VOID_FTYPE_UINT_UINT_UINT:
34484 case VOID_FTYPE_UINT64_UINT_UINT:
34485 case UCHAR_FTYPE_UINT_UINT_UINT:
34486 case UCHAR_FTYPE_UINT64_UINT_UINT:
34487 nargs = 3;
34488 klass = load;
34489 memory = ARRAY_SIZE (args);
34490 last_arg_constant = true;
34491 break;
34492 default:
34493 gcc_unreachable ();
34494 }
34495
34496 gcc_assert (nargs <= ARRAY_SIZE (args));
34497
34498 if (klass == store)
34499 {
34500 arg = CALL_EXPR_ARG (exp, 0);
34501 op = expand_normal (arg);
34502 gcc_assert (target == 0);
34503 if (memory)
34504 {
34505 op = ix86_zero_extend_to_Pmode (op);
34506 target = gen_rtx_MEM (tmode, op);
34507 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
34508 on it. Try to improve it using get_pointer_alignment,
34509 and if the special builtin is one that requires strict
34510 mode alignment, also from it's GET_MODE_ALIGNMENT.
34511 Failure to do so could lead to ix86_legitimate_combined_insn
34512 rejecting all changes to such insns. */
34513 unsigned int align = get_pointer_alignment (arg);
34514 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
34515 align = GET_MODE_ALIGNMENT (tmode);
34516 if (MEM_ALIGN (target) < align)
34517 set_mem_align (target, align);
34518 }
34519 else
34520 target = force_reg (tmode, op);
34521 arg_adjust = 1;
34522 }
34523 else
34524 {
34525 arg_adjust = 0;
34526 if (optimize
34527 || target == 0
34528 || !register_operand (target, tmode)
34529 || GET_MODE (target) != tmode)
34530 target = gen_reg_rtx (tmode);
34531 }
34532
34533 for (i = 0; i < nargs; i++)
34534 {
34535 enum machine_mode mode = insn_p->operand[i + 1].mode;
34536 bool match;
34537
34538 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
34539 op = expand_normal (arg);
34540 match = insn_p->operand[i + 1].predicate (op, mode);
34541
34542 if (last_arg_constant && (i + 1) == nargs)
34543 {
34544 if (!match)
34545 {
34546 if (icode == CODE_FOR_lwp_lwpvalsi3
34547 || icode == CODE_FOR_lwp_lwpinssi3
34548 || icode == CODE_FOR_lwp_lwpvaldi3
34549 || icode == CODE_FOR_lwp_lwpinsdi3)
34550 error ("the last argument must be a 32-bit immediate");
34551 else
34552 error ("the last argument must be an 8-bit immediate");
34553 return const0_rtx;
34554 }
34555 }
34556 else
34557 {
34558 if (i == memory)
34559 {
34560 /* This must be the memory operand. */
34561 op = ix86_zero_extend_to_Pmode (op);
34562 op = gen_rtx_MEM (mode, op);
34563 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
34564 on it. Try to improve it using get_pointer_alignment,
34565 and if the special builtin is one that requires strict
34566 mode alignment, also from it's GET_MODE_ALIGNMENT.
34567 Failure to do so could lead to ix86_legitimate_combined_insn
34568 rejecting all changes to such insns. */
34569 unsigned int align = get_pointer_alignment (arg);
34570 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
34571 align = GET_MODE_ALIGNMENT (mode);
34572 if (MEM_ALIGN (op) < align)
34573 set_mem_align (op, align);
34574 }
34575 else
34576 {
34577 /* This must be register. */
34578 if (VECTOR_MODE_P (mode))
34579 op = safe_vector_operand (op, mode);
34580
34581 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34582 op = copy_to_mode_reg (mode, op);
34583 else
34584 {
34585 op = copy_to_reg (op);
34586 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34587 }
34588 }
34589 }
34590
34591 args[i].op = op;
34592 args[i].mode = mode;
34593 }
34594
34595 switch (nargs)
34596 {
34597 case 0:
34598 pat = GEN_FCN (icode) (target);
34599 break;
34600 case 1:
34601 pat = GEN_FCN (icode) (target, args[0].op);
34602 break;
34603 case 2:
34604 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34605 break;
34606 case 3:
34607 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
34608 break;
34609 default:
34610 gcc_unreachable ();
34611 }
34612
34613 if (! pat)
34614 return 0;
34615 emit_insn (pat);
34616 return klass == store ? 0 : target;
34617 }
34618
34619 /* Return the integer constant in ARG. Constrain it to be in the range
34620 of the subparts of VEC_TYPE; issue an error if not. */
34621
34622 static int
34623 get_element_number (tree vec_type, tree arg)
34624 {
34625 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
34626
34627 if (!tree_fits_uhwi_p (arg)
34628 || (elt = tree_to_uhwi (arg), elt > max))
34629 {
34630 error ("selector must be an integer constant in the range 0..%wi", max);
34631 return 0;
34632 }
34633
34634 return elt;
34635 }
34636
34637 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34638 ix86_expand_vector_init. We DO have language-level syntax for this, in
34639 the form of (type){ init-list }. Except that since we can't place emms
34640 instructions from inside the compiler, we can't allow the use of MMX
34641 registers unless the user explicitly asks for it. So we do *not* define
34642 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
34643 we have builtins invoked by mmintrin.h that gives us license to emit
34644 these sorts of instructions. */
34645
34646 static rtx
34647 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
34648 {
34649 enum machine_mode tmode = TYPE_MODE (type);
34650 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
34651 int i, n_elt = GET_MODE_NUNITS (tmode);
34652 rtvec v = rtvec_alloc (n_elt);
34653
34654 gcc_assert (VECTOR_MODE_P (tmode));
34655 gcc_assert (call_expr_nargs (exp) == n_elt);
34656
34657 for (i = 0; i < n_elt; ++i)
34658 {
34659 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
34660 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
34661 }
34662
34663 if (!target || !register_operand (target, tmode))
34664 target = gen_reg_rtx (tmode);
34665
34666 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
34667 return target;
34668 }
34669
34670 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34671 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
34672 had a language-level syntax for referencing vector elements. */
34673
34674 static rtx
34675 ix86_expand_vec_ext_builtin (tree exp, rtx target)
34676 {
34677 enum machine_mode tmode, mode0;
34678 tree arg0, arg1;
34679 int elt;
34680 rtx op0;
34681
34682 arg0 = CALL_EXPR_ARG (exp, 0);
34683 arg1 = CALL_EXPR_ARG (exp, 1);
34684
34685 op0 = expand_normal (arg0);
34686 elt = get_element_number (TREE_TYPE (arg0), arg1);
34687
34688 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
34689 mode0 = TYPE_MODE (TREE_TYPE (arg0));
34690 gcc_assert (VECTOR_MODE_P (mode0));
34691
34692 op0 = force_reg (mode0, op0);
34693
34694 if (optimize || !target || !register_operand (target, tmode))
34695 target = gen_reg_rtx (tmode);
34696
34697 ix86_expand_vector_extract (true, target, op0, elt);
34698
34699 return target;
34700 }
34701
34702 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34703 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
34704 a language-level syntax for referencing vector elements. */
34705
34706 static rtx
34707 ix86_expand_vec_set_builtin (tree exp)
34708 {
34709 enum machine_mode tmode, mode1;
34710 tree arg0, arg1, arg2;
34711 int elt;
34712 rtx op0, op1, target;
34713
34714 arg0 = CALL_EXPR_ARG (exp, 0);
34715 arg1 = CALL_EXPR_ARG (exp, 1);
34716 arg2 = CALL_EXPR_ARG (exp, 2);
34717
34718 tmode = TYPE_MODE (TREE_TYPE (arg0));
34719 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
34720 gcc_assert (VECTOR_MODE_P (tmode));
34721
34722 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
34723 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
34724 elt = get_element_number (TREE_TYPE (arg0), arg2);
34725
34726 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
34727 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
34728
34729 op0 = force_reg (tmode, op0);
34730 op1 = force_reg (mode1, op1);
34731
34732 /* OP0 is the source of these builtin functions and shouldn't be
34733 modified. Create a copy, use it and return it as target. */
34734 target = gen_reg_rtx (tmode);
34735 emit_move_insn (target, op0);
34736 ix86_expand_vector_set (true, target, op1, elt);
34737
34738 return target;
34739 }
34740
34741 /* Expand an expression EXP that calls a built-in function,
34742 with result going to TARGET if that's convenient
34743 (and in mode MODE if that's convenient).
34744 SUBTARGET may be used as the target for computing one of EXP's operands.
34745 IGNORE is nonzero if the value is to be ignored. */
34746
34747 static rtx
34748 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
34749 enum machine_mode mode, int ignore)
34750 {
34751 const struct builtin_description *d;
34752 size_t i;
34753 enum insn_code icode;
34754 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
34755 tree arg0, arg1, arg2, arg3, arg4;
34756 rtx op0, op1, op2, op3, op4, pat, insn;
34757 enum machine_mode mode0, mode1, mode2, mode3, mode4;
34758 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
34759
34760 /* For CPU builtins that can be folded, fold first and expand the fold. */
34761 switch (fcode)
34762 {
34763 case IX86_BUILTIN_CPU_INIT:
34764 {
34765 /* Make it call __cpu_indicator_init in libgcc. */
34766 tree call_expr, fndecl, type;
34767 type = build_function_type_list (integer_type_node, NULL_TREE);
34768 fndecl = build_fn_decl ("__cpu_indicator_init", type);
34769 call_expr = build_call_expr (fndecl, 0);
34770 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
34771 }
34772 case IX86_BUILTIN_CPU_IS:
34773 case IX86_BUILTIN_CPU_SUPPORTS:
34774 {
34775 tree arg0 = CALL_EXPR_ARG (exp, 0);
34776 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
34777 gcc_assert (fold_expr != NULL_TREE);
34778 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
34779 }
34780 }
34781
34782 /* Determine whether the builtin function is available under the current ISA.
34783 Originally the builtin was not created if it wasn't applicable to the
34784 current ISA based on the command line switches. With function specific
34785 options, we need to check in the context of the function making the call
34786 whether it is supported. */
34787 if (ix86_builtins_isa[fcode].isa
34788 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
34789 {
34790 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
34791 NULL, (enum fpmath_unit) 0, false);
34792
34793 if (!opts)
34794 error ("%qE needs unknown isa option", fndecl);
34795 else
34796 {
34797 gcc_assert (opts != NULL);
34798 error ("%qE needs isa option %s", fndecl, opts);
34799 free (opts);
34800 }
34801 return const0_rtx;
34802 }
34803
34804 switch (fcode)
34805 {
34806 case IX86_BUILTIN_MASKMOVQ:
34807 case IX86_BUILTIN_MASKMOVDQU:
34808 icode = (fcode == IX86_BUILTIN_MASKMOVQ
34809 ? CODE_FOR_mmx_maskmovq
34810 : CODE_FOR_sse2_maskmovdqu);
34811 /* Note the arg order is different from the operand order. */
34812 arg1 = CALL_EXPR_ARG (exp, 0);
34813 arg2 = CALL_EXPR_ARG (exp, 1);
34814 arg0 = CALL_EXPR_ARG (exp, 2);
34815 op0 = expand_normal (arg0);
34816 op1 = expand_normal (arg1);
34817 op2 = expand_normal (arg2);
34818 mode0 = insn_data[icode].operand[0].mode;
34819 mode1 = insn_data[icode].operand[1].mode;
34820 mode2 = insn_data[icode].operand[2].mode;
34821
34822 op0 = ix86_zero_extend_to_Pmode (op0);
34823 op0 = gen_rtx_MEM (mode1, op0);
34824
34825 if (!insn_data[icode].operand[0].predicate (op0, mode0))
34826 op0 = copy_to_mode_reg (mode0, op0);
34827 if (!insn_data[icode].operand[1].predicate (op1, mode1))
34828 op1 = copy_to_mode_reg (mode1, op1);
34829 if (!insn_data[icode].operand[2].predicate (op2, mode2))
34830 op2 = copy_to_mode_reg (mode2, op2);
34831 pat = GEN_FCN (icode) (op0, op1, op2);
34832 if (! pat)
34833 return 0;
34834 emit_insn (pat);
34835 return 0;
34836
34837 case IX86_BUILTIN_LDMXCSR:
34838 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
34839 target = assign_386_stack_local (SImode, SLOT_TEMP);
34840 emit_move_insn (target, op0);
34841 emit_insn (gen_sse_ldmxcsr (target));
34842 return 0;
34843
34844 case IX86_BUILTIN_STMXCSR:
34845 target = assign_386_stack_local (SImode, SLOT_TEMP);
34846 emit_insn (gen_sse_stmxcsr (target));
34847 return copy_to_mode_reg (SImode, target);
34848
34849 case IX86_BUILTIN_CLFLUSH:
34850 arg0 = CALL_EXPR_ARG (exp, 0);
34851 op0 = expand_normal (arg0);
34852 icode = CODE_FOR_sse2_clflush;
34853 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
34854 op0 = ix86_zero_extend_to_Pmode (op0);
34855
34856 emit_insn (gen_sse2_clflush (op0));
34857 return 0;
34858
34859 case IX86_BUILTIN_MONITOR:
34860 arg0 = CALL_EXPR_ARG (exp, 0);
34861 arg1 = CALL_EXPR_ARG (exp, 1);
34862 arg2 = CALL_EXPR_ARG (exp, 2);
34863 op0 = expand_normal (arg0);
34864 op1 = expand_normal (arg1);
34865 op2 = expand_normal (arg2);
34866 if (!REG_P (op0))
34867 op0 = ix86_zero_extend_to_Pmode (op0);
34868 if (!REG_P (op1))
34869 op1 = copy_to_mode_reg (SImode, op1);
34870 if (!REG_P (op2))
34871 op2 = copy_to_mode_reg (SImode, op2);
34872 emit_insn (ix86_gen_monitor (op0, op1, op2));
34873 return 0;
34874
34875 case IX86_BUILTIN_MWAIT:
34876 arg0 = CALL_EXPR_ARG (exp, 0);
34877 arg1 = CALL_EXPR_ARG (exp, 1);
34878 op0 = expand_normal (arg0);
34879 op1 = expand_normal (arg1);
34880 if (!REG_P (op0))
34881 op0 = copy_to_mode_reg (SImode, op0);
34882 if (!REG_P (op1))
34883 op1 = copy_to_mode_reg (SImode, op1);
34884 emit_insn (gen_sse3_mwait (op0, op1));
34885 return 0;
34886
34887 case IX86_BUILTIN_VEC_INIT_V2SI:
34888 case IX86_BUILTIN_VEC_INIT_V4HI:
34889 case IX86_BUILTIN_VEC_INIT_V8QI:
34890 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
34891
34892 case IX86_BUILTIN_VEC_EXT_V2DF:
34893 case IX86_BUILTIN_VEC_EXT_V2DI:
34894 case IX86_BUILTIN_VEC_EXT_V4SF:
34895 case IX86_BUILTIN_VEC_EXT_V4SI:
34896 case IX86_BUILTIN_VEC_EXT_V8HI:
34897 case IX86_BUILTIN_VEC_EXT_V2SI:
34898 case IX86_BUILTIN_VEC_EXT_V4HI:
34899 case IX86_BUILTIN_VEC_EXT_V16QI:
34900 return ix86_expand_vec_ext_builtin (exp, target);
34901
34902 case IX86_BUILTIN_VEC_SET_V2DI:
34903 case IX86_BUILTIN_VEC_SET_V4SF:
34904 case IX86_BUILTIN_VEC_SET_V4SI:
34905 case IX86_BUILTIN_VEC_SET_V8HI:
34906 case IX86_BUILTIN_VEC_SET_V4HI:
34907 case IX86_BUILTIN_VEC_SET_V16QI:
34908 return ix86_expand_vec_set_builtin (exp);
34909
34910 case IX86_BUILTIN_INFQ:
34911 case IX86_BUILTIN_HUGE_VALQ:
34912 {
34913 REAL_VALUE_TYPE inf;
34914 rtx tmp;
34915
34916 real_inf (&inf);
34917 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
34918
34919 tmp = validize_mem (force_const_mem (mode, tmp));
34920
34921 if (target == 0)
34922 target = gen_reg_rtx (mode);
34923
34924 emit_move_insn (target, tmp);
34925 return target;
34926 }
34927
34928 case IX86_BUILTIN_RDPMC:
34929 case IX86_BUILTIN_RDTSC:
34930 case IX86_BUILTIN_RDTSCP:
34931
34932 op0 = gen_reg_rtx (DImode);
34933 op1 = gen_reg_rtx (DImode);
34934
34935 if (fcode == IX86_BUILTIN_RDPMC)
34936 {
34937 arg0 = CALL_EXPR_ARG (exp, 0);
34938 op2 = expand_normal (arg0);
34939 if (!register_operand (op2, SImode))
34940 op2 = copy_to_mode_reg (SImode, op2);
34941
34942 insn = (TARGET_64BIT
34943 ? gen_rdpmc_rex64 (op0, op1, op2)
34944 : gen_rdpmc (op0, op2));
34945 emit_insn (insn);
34946 }
34947 else if (fcode == IX86_BUILTIN_RDTSC)
34948 {
34949 insn = (TARGET_64BIT
34950 ? gen_rdtsc_rex64 (op0, op1)
34951 : gen_rdtsc (op0));
34952 emit_insn (insn);
34953 }
34954 else
34955 {
34956 op2 = gen_reg_rtx (SImode);
34957
34958 insn = (TARGET_64BIT
34959 ? gen_rdtscp_rex64 (op0, op1, op2)
34960 : gen_rdtscp (op0, op2));
34961 emit_insn (insn);
34962
34963 arg0 = CALL_EXPR_ARG (exp, 0);
34964 op4 = expand_normal (arg0);
34965 if (!address_operand (op4, VOIDmode))
34966 {
34967 op4 = convert_memory_address (Pmode, op4);
34968 op4 = copy_addr_to_reg (op4);
34969 }
34970 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
34971 }
34972
34973 if (target == 0)
34974 {
34975 /* mode is VOIDmode if __builtin_rd* has been called
34976 without lhs. */
34977 if (mode == VOIDmode)
34978 return target;
34979 target = gen_reg_rtx (mode);
34980 }
34981
34982 if (TARGET_64BIT)
34983 {
34984 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
34985 op1, 1, OPTAB_DIRECT);
34986 op0 = expand_simple_binop (DImode, IOR, op0, op1,
34987 op0, 1, OPTAB_DIRECT);
34988 }
34989
34990 emit_move_insn (target, op0);
34991 return target;
34992
34993 case IX86_BUILTIN_FXSAVE:
34994 case IX86_BUILTIN_FXRSTOR:
34995 case IX86_BUILTIN_FXSAVE64:
34996 case IX86_BUILTIN_FXRSTOR64:
34997 case IX86_BUILTIN_FNSTENV:
34998 case IX86_BUILTIN_FLDENV:
34999 case IX86_BUILTIN_FNSTSW:
35000 mode0 = BLKmode;
35001 switch (fcode)
35002 {
35003 case IX86_BUILTIN_FXSAVE:
35004 icode = CODE_FOR_fxsave;
35005 break;
35006 case IX86_BUILTIN_FXRSTOR:
35007 icode = CODE_FOR_fxrstor;
35008 break;
35009 case IX86_BUILTIN_FXSAVE64:
35010 icode = CODE_FOR_fxsave64;
35011 break;
35012 case IX86_BUILTIN_FXRSTOR64:
35013 icode = CODE_FOR_fxrstor64;
35014 break;
35015 case IX86_BUILTIN_FNSTENV:
35016 icode = CODE_FOR_fnstenv;
35017 break;
35018 case IX86_BUILTIN_FLDENV:
35019 icode = CODE_FOR_fldenv;
35020 break;
35021 case IX86_BUILTIN_FNSTSW:
35022 icode = CODE_FOR_fnstsw;
35023 mode0 = HImode;
35024 break;
35025 default:
35026 gcc_unreachable ();
35027 }
35028
35029 arg0 = CALL_EXPR_ARG (exp, 0);
35030 op0 = expand_normal (arg0);
35031
35032 if (!address_operand (op0, VOIDmode))
35033 {
35034 op0 = convert_memory_address (Pmode, op0);
35035 op0 = copy_addr_to_reg (op0);
35036 }
35037 op0 = gen_rtx_MEM (mode0, op0);
35038
35039 pat = GEN_FCN (icode) (op0);
35040 if (pat)
35041 emit_insn (pat);
35042 return 0;
35043
35044 case IX86_BUILTIN_XSAVE:
35045 case IX86_BUILTIN_XRSTOR:
35046 case IX86_BUILTIN_XSAVE64:
35047 case IX86_BUILTIN_XRSTOR64:
35048 case IX86_BUILTIN_XSAVEOPT:
35049 case IX86_BUILTIN_XSAVEOPT64:
35050 arg0 = CALL_EXPR_ARG (exp, 0);
35051 arg1 = CALL_EXPR_ARG (exp, 1);
35052 op0 = expand_normal (arg0);
35053 op1 = expand_normal (arg1);
35054
35055 if (!address_operand (op0, VOIDmode))
35056 {
35057 op0 = convert_memory_address (Pmode, op0);
35058 op0 = copy_addr_to_reg (op0);
35059 }
35060 op0 = gen_rtx_MEM (BLKmode, op0);
35061
35062 op1 = force_reg (DImode, op1);
35063
35064 if (TARGET_64BIT)
35065 {
35066 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
35067 NULL, 1, OPTAB_DIRECT);
35068 switch (fcode)
35069 {
35070 case IX86_BUILTIN_XSAVE:
35071 icode = CODE_FOR_xsave_rex64;
35072 break;
35073 case IX86_BUILTIN_XRSTOR:
35074 icode = CODE_FOR_xrstor_rex64;
35075 break;
35076 case IX86_BUILTIN_XSAVE64:
35077 icode = CODE_FOR_xsave64;
35078 break;
35079 case IX86_BUILTIN_XRSTOR64:
35080 icode = CODE_FOR_xrstor64;
35081 break;
35082 case IX86_BUILTIN_XSAVEOPT:
35083 icode = CODE_FOR_xsaveopt_rex64;
35084 break;
35085 case IX86_BUILTIN_XSAVEOPT64:
35086 icode = CODE_FOR_xsaveopt64;
35087 break;
35088 default:
35089 gcc_unreachable ();
35090 }
35091
35092 op2 = gen_lowpart (SImode, op2);
35093 op1 = gen_lowpart (SImode, op1);
35094 pat = GEN_FCN (icode) (op0, op1, op2);
35095 }
35096 else
35097 {
35098 switch (fcode)
35099 {
35100 case IX86_BUILTIN_XSAVE:
35101 icode = CODE_FOR_xsave;
35102 break;
35103 case IX86_BUILTIN_XRSTOR:
35104 icode = CODE_FOR_xrstor;
35105 break;
35106 case IX86_BUILTIN_XSAVEOPT:
35107 icode = CODE_FOR_xsaveopt;
35108 break;
35109 default:
35110 gcc_unreachable ();
35111 }
35112 pat = GEN_FCN (icode) (op0, op1);
35113 }
35114
35115 if (pat)
35116 emit_insn (pat);
35117 return 0;
35118
35119 case IX86_BUILTIN_LLWPCB:
35120 arg0 = CALL_EXPR_ARG (exp, 0);
35121 op0 = expand_normal (arg0);
35122 icode = CODE_FOR_lwp_llwpcb;
35123 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35124 op0 = ix86_zero_extend_to_Pmode (op0);
35125 emit_insn (gen_lwp_llwpcb (op0));
35126 return 0;
35127
35128 case IX86_BUILTIN_SLWPCB:
35129 icode = CODE_FOR_lwp_slwpcb;
35130 if (!target
35131 || !insn_data[icode].operand[0].predicate (target, Pmode))
35132 target = gen_reg_rtx (Pmode);
35133 emit_insn (gen_lwp_slwpcb (target));
35134 return target;
35135
35136 case IX86_BUILTIN_BEXTRI32:
35137 case IX86_BUILTIN_BEXTRI64:
35138 arg0 = CALL_EXPR_ARG (exp, 0);
35139 arg1 = CALL_EXPR_ARG (exp, 1);
35140 op0 = expand_normal (arg0);
35141 op1 = expand_normal (arg1);
35142 icode = (fcode == IX86_BUILTIN_BEXTRI32
35143 ? CODE_FOR_tbm_bextri_si
35144 : CODE_FOR_tbm_bextri_di);
35145 if (!CONST_INT_P (op1))
35146 {
35147 error ("last argument must be an immediate");
35148 return const0_rtx;
35149 }
35150 else
35151 {
35152 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
35153 unsigned char lsb_index = INTVAL (op1) & 0xFF;
35154 op1 = GEN_INT (length);
35155 op2 = GEN_INT (lsb_index);
35156 pat = GEN_FCN (icode) (target, op0, op1, op2);
35157 if (pat)
35158 emit_insn (pat);
35159 return target;
35160 }
35161
35162 case IX86_BUILTIN_RDRAND16_STEP:
35163 icode = CODE_FOR_rdrandhi_1;
35164 mode0 = HImode;
35165 goto rdrand_step;
35166
35167 case IX86_BUILTIN_RDRAND32_STEP:
35168 icode = CODE_FOR_rdrandsi_1;
35169 mode0 = SImode;
35170 goto rdrand_step;
35171
35172 case IX86_BUILTIN_RDRAND64_STEP:
35173 icode = CODE_FOR_rdranddi_1;
35174 mode0 = DImode;
35175
35176 rdrand_step:
35177 op0 = gen_reg_rtx (mode0);
35178 emit_insn (GEN_FCN (icode) (op0));
35179
35180 arg0 = CALL_EXPR_ARG (exp, 0);
35181 op1 = expand_normal (arg0);
35182 if (!address_operand (op1, VOIDmode))
35183 {
35184 op1 = convert_memory_address (Pmode, op1);
35185 op1 = copy_addr_to_reg (op1);
35186 }
35187 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35188
35189 op1 = gen_reg_rtx (SImode);
35190 emit_move_insn (op1, CONST1_RTX (SImode));
35191
35192 /* Emit SImode conditional move. */
35193 if (mode0 == HImode)
35194 {
35195 op2 = gen_reg_rtx (SImode);
35196 emit_insn (gen_zero_extendhisi2 (op2, op0));
35197 }
35198 else if (mode0 == SImode)
35199 op2 = op0;
35200 else
35201 op2 = gen_rtx_SUBREG (SImode, op0, 0);
35202
35203 if (target == 0)
35204 target = gen_reg_rtx (SImode);
35205
35206 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
35207 const0_rtx);
35208 emit_insn (gen_rtx_SET (VOIDmode, target,
35209 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
35210 return target;
35211
35212 case IX86_BUILTIN_RDSEED16_STEP:
35213 icode = CODE_FOR_rdseedhi_1;
35214 mode0 = HImode;
35215 goto rdseed_step;
35216
35217 case IX86_BUILTIN_RDSEED32_STEP:
35218 icode = CODE_FOR_rdseedsi_1;
35219 mode0 = SImode;
35220 goto rdseed_step;
35221
35222 case IX86_BUILTIN_RDSEED64_STEP:
35223 icode = CODE_FOR_rdseeddi_1;
35224 mode0 = DImode;
35225
35226 rdseed_step:
35227 op0 = gen_reg_rtx (mode0);
35228 emit_insn (GEN_FCN (icode) (op0));
35229
35230 arg0 = CALL_EXPR_ARG (exp, 0);
35231 op1 = expand_normal (arg0);
35232 if (!address_operand (op1, VOIDmode))
35233 {
35234 op1 = convert_memory_address (Pmode, op1);
35235 op1 = copy_addr_to_reg (op1);
35236 }
35237 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35238
35239 op2 = gen_reg_rtx (QImode);
35240
35241 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
35242 const0_rtx);
35243 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
35244
35245 if (target == 0)
35246 target = gen_reg_rtx (SImode);
35247
35248 emit_insn (gen_zero_extendqisi2 (target, op2));
35249 return target;
35250
35251 case IX86_BUILTIN_ADDCARRYX32:
35252 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
35253 mode0 = SImode;
35254 goto addcarryx;
35255
35256 case IX86_BUILTIN_ADDCARRYX64:
35257 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
35258 mode0 = DImode;
35259
35260 addcarryx:
35261 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
35262 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
35263 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
35264 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
35265
35266 op0 = gen_reg_rtx (QImode);
35267
35268 /* Generate CF from input operand. */
35269 op1 = expand_normal (arg0);
35270 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
35271 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
35272
35273 /* Gen ADCX instruction to compute X+Y+CF. */
35274 op2 = expand_normal (arg1);
35275 op3 = expand_normal (arg2);
35276
35277 if (!REG_P (op2))
35278 op2 = copy_to_mode_reg (mode0, op2);
35279 if (!REG_P (op3))
35280 op3 = copy_to_mode_reg (mode0, op3);
35281
35282 op0 = gen_reg_rtx (mode0);
35283
35284 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
35285 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
35286 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
35287
35288 /* Store the result. */
35289 op4 = expand_normal (arg3);
35290 if (!address_operand (op4, VOIDmode))
35291 {
35292 op4 = convert_memory_address (Pmode, op4);
35293 op4 = copy_addr_to_reg (op4);
35294 }
35295 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
35296
35297 /* Return current CF value. */
35298 if (target == 0)
35299 target = gen_reg_rtx (QImode);
35300
35301 PUT_MODE (pat, QImode);
35302 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
35303 return target;
35304
35305 case IX86_BUILTIN_READ_FLAGS:
35306 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
35307
35308 if (optimize
35309 || target == NULL_RTX
35310 || !nonimmediate_operand (target, word_mode)
35311 || GET_MODE (target) != word_mode)
35312 target = gen_reg_rtx (word_mode);
35313
35314 emit_insn (gen_pop (target));
35315 return target;
35316
35317 case IX86_BUILTIN_WRITE_FLAGS:
35318
35319 arg0 = CALL_EXPR_ARG (exp, 0);
35320 op0 = expand_normal (arg0);
35321 if (!general_no_elim_operand (op0, word_mode))
35322 op0 = copy_to_mode_reg (word_mode, op0);
35323
35324 emit_insn (gen_push (op0));
35325 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
35326 return 0;
35327
35328 case IX86_BUILTIN_KORTESTC16:
35329 icode = CODE_FOR_kortestchi;
35330 mode0 = HImode;
35331 mode1 = CCCmode;
35332 goto kortest;
35333
35334 case IX86_BUILTIN_KORTESTZ16:
35335 icode = CODE_FOR_kortestzhi;
35336 mode0 = HImode;
35337 mode1 = CCZmode;
35338
35339 kortest:
35340 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
35341 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
35342 op0 = expand_normal (arg0);
35343 op1 = expand_normal (arg1);
35344
35345 op0 = copy_to_reg (op0);
35346 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
35347 op1 = copy_to_reg (op1);
35348 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
35349
35350 target = gen_reg_rtx (QImode);
35351 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
35352
35353 /* Emit kortest. */
35354 emit_insn (GEN_FCN (icode) (op0, op1));
35355 /* And use setcc to return result from flags. */
35356 ix86_expand_setcc (target, EQ,
35357 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
35358 return target;
35359
35360 case IX86_BUILTIN_GATHERSIV2DF:
35361 icode = CODE_FOR_avx2_gathersiv2df;
35362 goto gather_gen;
35363 case IX86_BUILTIN_GATHERSIV4DF:
35364 icode = CODE_FOR_avx2_gathersiv4df;
35365 goto gather_gen;
35366 case IX86_BUILTIN_GATHERDIV2DF:
35367 icode = CODE_FOR_avx2_gatherdiv2df;
35368 goto gather_gen;
35369 case IX86_BUILTIN_GATHERDIV4DF:
35370 icode = CODE_FOR_avx2_gatherdiv4df;
35371 goto gather_gen;
35372 case IX86_BUILTIN_GATHERSIV4SF:
35373 icode = CODE_FOR_avx2_gathersiv4sf;
35374 goto gather_gen;
35375 case IX86_BUILTIN_GATHERSIV8SF:
35376 icode = CODE_FOR_avx2_gathersiv8sf;
35377 goto gather_gen;
35378 case IX86_BUILTIN_GATHERDIV4SF:
35379 icode = CODE_FOR_avx2_gatherdiv4sf;
35380 goto gather_gen;
35381 case IX86_BUILTIN_GATHERDIV8SF:
35382 icode = CODE_FOR_avx2_gatherdiv8sf;
35383 goto gather_gen;
35384 case IX86_BUILTIN_GATHERSIV2DI:
35385 icode = CODE_FOR_avx2_gathersiv2di;
35386 goto gather_gen;
35387 case IX86_BUILTIN_GATHERSIV4DI:
35388 icode = CODE_FOR_avx2_gathersiv4di;
35389 goto gather_gen;
35390 case IX86_BUILTIN_GATHERDIV2DI:
35391 icode = CODE_FOR_avx2_gatherdiv2di;
35392 goto gather_gen;
35393 case IX86_BUILTIN_GATHERDIV4DI:
35394 icode = CODE_FOR_avx2_gatherdiv4di;
35395 goto gather_gen;
35396 case IX86_BUILTIN_GATHERSIV4SI:
35397 icode = CODE_FOR_avx2_gathersiv4si;
35398 goto gather_gen;
35399 case IX86_BUILTIN_GATHERSIV8SI:
35400 icode = CODE_FOR_avx2_gathersiv8si;
35401 goto gather_gen;
35402 case IX86_BUILTIN_GATHERDIV4SI:
35403 icode = CODE_FOR_avx2_gatherdiv4si;
35404 goto gather_gen;
35405 case IX86_BUILTIN_GATHERDIV8SI:
35406 icode = CODE_FOR_avx2_gatherdiv8si;
35407 goto gather_gen;
35408 case IX86_BUILTIN_GATHERALTSIV4DF:
35409 icode = CODE_FOR_avx2_gathersiv4df;
35410 goto gather_gen;
35411 case IX86_BUILTIN_GATHERALTDIV8SF:
35412 icode = CODE_FOR_avx2_gatherdiv8sf;
35413 goto gather_gen;
35414 case IX86_BUILTIN_GATHERALTSIV4DI:
35415 icode = CODE_FOR_avx2_gathersiv4di;
35416 goto gather_gen;
35417 case IX86_BUILTIN_GATHERALTDIV8SI:
35418 icode = CODE_FOR_avx2_gatherdiv8si;
35419 goto gather_gen;
35420 case IX86_BUILTIN_GATHER3SIV16SF:
35421 icode = CODE_FOR_avx512f_gathersiv16sf;
35422 goto gather_gen;
35423 case IX86_BUILTIN_GATHER3SIV8DF:
35424 icode = CODE_FOR_avx512f_gathersiv8df;
35425 goto gather_gen;
35426 case IX86_BUILTIN_GATHER3DIV16SF:
35427 icode = CODE_FOR_avx512f_gatherdiv16sf;
35428 goto gather_gen;
35429 case IX86_BUILTIN_GATHER3DIV8DF:
35430 icode = CODE_FOR_avx512f_gatherdiv8df;
35431 goto gather_gen;
35432 case IX86_BUILTIN_GATHER3SIV16SI:
35433 icode = CODE_FOR_avx512f_gathersiv16si;
35434 goto gather_gen;
35435 case IX86_BUILTIN_GATHER3SIV8DI:
35436 icode = CODE_FOR_avx512f_gathersiv8di;
35437 goto gather_gen;
35438 case IX86_BUILTIN_GATHER3DIV16SI:
35439 icode = CODE_FOR_avx512f_gatherdiv16si;
35440 goto gather_gen;
35441 case IX86_BUILTIN_GATHER3DIV8DI:
35442 icode = CODE_FOR_avx512f_gatherdiv8di;
35443 goto gather_gen;
35444 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35445 icode = CODE_FOR_avx512f_gathersiv8df;
35446 goto gather_gen;
35447 case IX86_BUILTIN_GATHER3ALTDIV16SF:
35448 icode = CODE_FOR_avx512f_gatherdiv16sf;
35449 goto gather_gen;
35450 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35451 icode = CODE_FOR_avx512f_gathersiv8di;
35452 goto gather_gen;
35453 case IX86_BUILTIN_GATHER3ALTDIV16SI:
35454 icode = CODE_FOR_avx512f_gatherdiv16si;
35455 goto gather_gen;
35456 case IX86_BUILTIN_SCATTERSIV16SF:
35457 icode = CODE_FOR_avx512f_scattersiv16sf;
35458 goto scatter_gen;
35459 case IX86_BUILTIN_SCATTERSIV8DF:
35460 icode = CODE_FOR_avx512f_scattersiv8df;
35461 goto scatter_gen;
35462 case IX86_BUILTIN_SCATTERDIV16SF:
35463 icode = CODE_FOR_avx512f_scatterdiv16sf;
35464 goto scatter_gen;
35465 case IX86_BUILTIN_SCATTERDIV8DF:
35466 icode = CODE_FOR_avx512f_scatterdiv8df;
35467 goto scatter_gen;
35468 case IX86_BUILTIN_SCATTERSIV16SI:
35469 icode = CODE_FOR_avx512f_scattersiv16si;
35470 goto scatter_gen;
35471 case IX86_BUILTIN_SCATTERSIV8DI:
35472 icode = CODE_FOR_avx512f_scattersiv8di;
35473 goto scatter_gen;
35474 case IX86_BUILTIN_SCATTERDIV16SI:
35475 icode = CODE_FOR_avx512f_scatterdiv16si;
35476 goto scatter_gen;
35477 case IX86_BUILTIN_SCATTERDIV8DI:
35478 icode = CODE_FOR_avx512f_scatterdiv8di;
35479 goto scatter_gen;
35480 case IX86_BUILTIN_GATHERPFDPS:
35481 icode = CODE_FOR_avx512pf_gatherpfv16si;
35482 goto vec_prefetch_gen;
35483 case IX86_BUILTIN_GATHERPFQPS:
35484 icode = CODE_FOR_avx512pf_gatherpfv8di;
35485 goto vec_prefetch_gen;
35486 case IX86_BUILTIN_SCATTERPFDPS:
35487 icode = CODE_FOR_avx512pf_scatterpfv16si;
35488 goto vec_prefetch_gen;
35489 case IX86_BUILTIN_SCATTERPFQPS:
35490 icode = CODE_FOR_avx512pf_scatterpfv8di;
35491 goto vec_prefetch_gen;
35492
35493 gather_gen:
35494 rtx half;
35495 rtx (*gen) (rtx, rtx);
35496
35497 arg0 = CALL_EXPR_ARG (exp, 0);
35498 arg1 = CALL_EXPR_ARG (exp, 1);
35499 arg2 = CALL_EXPR_ARG (exp, 2);
35500 arg3 = CALL_EXPR_ARG (exp, 3);
35501 arg4 = CALL_EXPR_ARG (exp, 4);
35502 op0 = expand_normal (arg0);
35503 op1 = expand_normal (arg1);
35504 op2 = expand_normal (arg2);
35505 op3 = expand_normal (arg3);
35506 op4 = expand_normal (arg4);
35507 /* Note the arg order is different from the operand order. */
35508 mode0 = insn_data[icode].operand[1].mode;
35509 mode2 = insn_data[icode].operand[3].mode;
35510 mode3 = insn_data[icode].operand[4].mode;
35511 mode4 = insn_data[icode].operand[5].mode;
35512
35513 if (target == NULL_RTX
35514 || GET_MODE (target) != insn_data[icode].operand[0].mode)
35515 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
35516 else
35517 subtarget = target;
35518
35519 switch (fcode)
35520 {
35521 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35522 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35523 half = gen_reg_rtx (V8SImode);
35524 if (!nonimmediate_operand (op2, V16SImode))
35525 op2 = copy_to_mode_reg (V16SImode, op2);
35526 emit_insn (gen_vec_extract_lo_v16si (half, op2));
35527 op2 = half;
35528 break;
35529 case IX86_BUILTIN_GATHERALTSIV4DF:
35530 case IX86_BUILTIN_GATHERALTSIV4DI:
35531 half = gen_reg_rtx (V4SImode);
35532 if (!nonimmediate_operand (op2, V8SImode))
35533 op2 = copy_to_mode_reg (V8SImode, op2);
35534 emit_insn (gen_vec_extract_lo_v8si (half, op2));
35535 op2 = half;
35536 break;
35537 case IX86_BUILTIN_GATHER3ALTDIV16SF:
35538 case IX86_BUILTIN_GATHER3ALTDIV16SI:
35539 half = gen_reg_rtx (mode0);
35540 if (mode0 == V8SFmode)
35541 gen = gen_vec_extract_lo_v16sf;
35542 else
35543 gen = gen_vec_extract_lo_v16si;
35544 if (!nonimmediate_operand (op0, GET_MODE (op0)))
35545 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
35546 emit_insn (gen (half, op0));
35547 op0 = half;
35548 if (GET_MODE (op3) != VOIDmode)
35549 {
35550 if (!nonimmediate_operand (op3, GET_MODE (op3)))
35551 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
35552 emit_insn (gen (half, op3));
35553 op3 = half;
35554 }
35555 break;
35556 case IX86_BUILTIN_GATHERALTDIV8SF:
35557 case IX86_BUILTIN_GATHERALTDIV8SI:
35558 half = gen_reg_rtx (mode0);
35559 if (mode0 == V4SFmode)
35560 gen = gen_vec_extract_lo_v8sf;
35561 else
35562 gen = gen_vec_extract_lo_v8si;
35563 if (!nonimmediate_operand (op0, GET_MODE (op0)))
35564 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
35565 emit_insn (gen (half, op0));
35566 op0 = half;
35567 if (GET_MODE (op3) != VOIDmode)
35568 {
35569 if (!nonimmediate_operand (op3, GET_MODE (op3)))
35570 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
35571 emit_insn (gen (half, op3));
35572 op3 = half;
35573 }
35574 break;
35575 default:
35576 break;
35577 }
35578
35579 /* Force memory operand only with base register here. But we
35580 don't want to do it on memory operand for other builtin
35581 functions. */
35582 op1 = ix86_zero_extend_to_Pmode (op1);
35583
35584 if (!insn_data[icode].operand[1].predicate (op0, mode0))
35585 op0 = copy_to_mode_reg (mode0, op0);
35586 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
35587 op1 = copy_to_mode_reg (Pmode, op1);
35588 if (!insn_data[icode].operand[3].predicate (op2, mode2))
35589 op2 = copy_to_mode_reg (mode2, op2);
35590 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
35591 {
35592 if (!insn_data[icode].operand[4].predicate (op3, mode3))
35593 op3 = copy_to_mode_reg (mode3, op3);
35594 }
35595 else
35596 {
35597 op3 = copy_to_reg (op3);
35598 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
35599 }
35600 if (!insn_data[icode].operand[5].predicate (op4, mode4))
35601 {
35602 error ("the last argument must be scale 1, 2, 4, 8");
35603 return const0_rtx;
35604 }
35605
35606 /* Optimize. If mask is known to have all high bits set,
35607 replace op0 with pc_rtx to signal that the instruction
35608 overwrites the whole destination and doesn't use its
35609 previous contents. */
35610 if (optimize)
35611 {
35612 if (TREE_CODE (arg3) == INTEGER_CST)
35613 {
35614 if (integer_all_onesp (arg3))
35615 op0 = pc_rtx;
35616 }
35617 else if (TREE_CODE (arg3) == VECTOR_CST)
35618 {
35619 unsigned int negative = 0;
35620 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
35621 {
35622 tree cst = VECTOR_CST_ELT (arg3, i);
35623 if (TREE_CODE (cst) == INTEGER_CST
35624 && tree_int_cst_sign_bit (cst))
35625 negative++;
35626 else if (TREE_CODE (cst) == REAL_CST
35627 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
35628 negative++;
35629 }
35630 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
35631 op0 = pc_rtx;
35632 }
35633 else if (TREE_CODE (arg3) == SSA_NAME
35634 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
35635 {
35636 /* Recognize also when mask is like:
35637 __v2df src = _mm_setzero_pd ();
35638 __v2df mask = _mm_cmpeq_pd (src, src);
35639 or
35640 __v8sf src = _mm256_setzero_ps ();
35641 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
35642 as that is a cheaper way to load all ones into
35643 a register than having to load a constant from
35644 memory. */
35645 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
35646 if (is_gimple_call (def_stmt))
35647 {
35648 tree fndecl = gimple_call_fndecl (def_stmt);
35649 if (fndecl
35650 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
35651 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
35652 {
35653 case IX86_BUILTIN_CMPPD:
35654 case IX86_BUILTIN_CMPPS:
35655 case IX86_BUILTIN_CMPPD256:
35656 case IX86_BUILTIN_CMPPS256:
35657 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
35658 break;
35659 /* FALLTHRU */
35660 case IX86_BUILTIN_CMPEQPD:
35661 case IX86_BUILTIN_CMPEQPS:
35662 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
35663 && initializer_zerop (gimple_call_arg (def_stmt,
35664 1)))
35665 op0 = pc_rtx;
35666 break;
35667 default:
35668 break;
35669 }
35670 }
35671 }
35672 }
35673
35674 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
35675 if (! pat)
35676 return const0_rtx;
35677 emit_insn (pat);
35678
35679 switch (fcode)
35680 {
35681 case IX86_BUILTIN_GATHER3DIV16SF:
35682 if (target == NULL_RTX)
35683 target = gen_reg_rtx (V8SFmode);
35684 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
35685 break;
35686 case IX86_BUILTIN_GATHER3DIV16SI:
35687 if (target == NULL_RTX)
35688 target = gen_reg_rtx (V8SImode);
35689 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
35690 break;
35691 case IX86_BUILTIN_GATHERDIV8SF:
35692 if (target == NULL_RTX)
35693 target = gen_reg_rtx (V4SFmode);
35694 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
35695 break;
35696 case IX86_BUILTIN_GATHERDIV8SI:
35697 if (target == NULL_RTX)
35698 target = gen_reg_rtx (V4SImode);
35699 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
35700 break;
35701 default:
35702 target = subtarget;
35703 break;
35704 }
35705 return target;
35706
35707 scatter_gen:
35708 arg0 = CALL_EXPR_ARG (exp, 0);
35709 arg1 = CALL_EXPR_ARG (exp, 1);
35710 arg2 = CALL_EXPR_ARG (exp, 2);
35711 arg3 = CALL_EXPR_ARG (exp, 3);
35712 arg4 = CALL_EXPR_ARG (exp, 4);
35713 op0 = expand_normal (arg0);
35714 op1 = expand_normal (arg1);
35715 op2 = expand_normal (arg2);
35716 op3 = expand_normal (arg3);
35717 op4 = expand_normal (arg4);
35718 mode1 = insn_data[icode].operand[1].mode;
35719 mode2 = insn_data[icode].operand[2].mode;
35720 mode3 = insn_data[icode].operand[3].mode;
35721 mode4 = insn_data[icode].operand[4].mode;
35722
35723 /* Force memory operand only with base register here. But we
35724 don't want to do it on memory operand for other builtin
35725 functions. */
35726 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
35727
35728 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35729 op0 = copy_to_mode_reg (Pmode, op0);
35730
35731 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
35732 {
35733 if (!insn_data[icode].operand[1].predicate (op1, mode1))
35734 op1 = copy_to_mode_reg (mode1, op1);
35735 }
35736 else
35737 {
35738 op1 = copy_to_reg (op1);
35739 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
35740 }
35741
35742 if (!insn_data[icode].operand[2].predicate (op2, mode2))
35743 op2 = copy_to_mode_reg (mode2, op2);
35744
35745 if (!insn_data[icode].operand[3].predicate (op3, mode3))
35746 op3 = copy_to_mode_reg (mode3, op3);
35747
35748 if (!insn_data[icode].operand[4].predicate (op4, mode4))
35749 {
35750 error ("the last argument must be scale 1, 2, 4, 8");
35751 return const0_rtx;
35752 }
35753
35754 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
35755 if (! pat)
35756 return const0_rtx;
35757
35758 emit_insn (pat);
35759 return 0;
35760
35761 vec_prefetch_gen:
35762 arg0 = CALL_EXPR_ARG (exp, 0);
35763 arg1 = CALL_EXPR_ARG (exp, 1);
35764 arg2 = CALL_EXPR_ARG (exp, 2);
35765 arg3 = CALL_EXPR_ARG (exp, 3);
35766 arg4 = CALL_EXPR_ARG (exp, 4);
35767 op0 = expand_normal (arg0);
35768 op1 = expand_normal (arg1);
35769 op2 = expand_normal (arg2);
35770 op3 = expand_normal (arg3);
35771 op4 = expand_normal (arg4);
35772 mode0 = insn_data[icode].operand[0].mode;
35773 mode1 = insn_data[icode].operand[1].mode;
35774 mode3 = insn_data[icode].operand[3].mode;
35775 mode4 = insn_data[icode].operand[4].mode;
35776
35777 if (GET_MODE (op0) == mode0
35778 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
35779 {
35780 if (!insn_data[icode].operand[0].predicate (op0, mode0))
35781 op0 = copy_to_mode_reg (mode0, op0);
35782 }
35783 else if (op0 != constm1_rtx)
35784 {
35785 op0 = copy_to_reg (op0);
35786 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
35787 }
35788
35789 if (!insn_data[icode].operand[1].predicate (op1, mode1))
35790 op1 = copy_to_mode_reg (mode1, op1);
35791
35792 /* Force memory operand only with base register here. But we
35793 don't want to do it on memory operand for other builtin
35794 functions. */
35795 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
35796
35797 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
35798 op2 = copy_to_mode_reg (Pmode, op2);
35799
35800 if (!insn_data[icode].operand[3].predicate (op3, mode3))
35801 {
35802 error ("the forth argument must be scale 1, 2, 4, 8");
35803 return const0_rtx;
35804 }
35805
35806 if (!insn_data[icode].operand[4].predicate (op4, mode4))
35807 {
35808 error ("the last argument must be hint 0 or 1");
35809 return const0_rtx;
35810 }
35811
35812 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
35813 if (! pat)
35814 return const0_rtx;
35815
35816 emit_insn (pat);
35817
35818 return 0;
35819
35820 case IX86_BUILTIN_XABORT:
35821 icode = CODE_FOR_xabort;
35822 arg0 = CALL_EXPR_ARG (exp, 0);
35823 op0 = expand_normal (arg0);
35824 mode0 = insn_data[icode].operand[0].mode;
35825 if (!insn_data[icode].operand[0].predicate (op0, mode0))
35826 {
35827 error ("the xabort's argument must be an 8-bit immediate");
35828 return const0_rtx;
35829 }
35830 emit_insn (gen_xabort (op0));
35831 return 0;
35832
35833 default:
35834 break;
35835 }
35836
35837 for (i = 0, d = bdesc_special_args;
35838 i < ARRAY_SIZE (bdesc_special_args);
35839 i++, d++)
35840 if (d->code == fcode)
35841 return ix86_expand_special_args_builtin (d, exp, target);
35842
35843 for (i = 0, d = bdesc_args;
35844 i < ARRAY_SIZE (bdesc_args);
35845 i++, d++)
35846 if (d->code == fcode)
35847 switch (fcode)
35848 {
35849 case IX86_BUILTIN_FABSQ:
35850 case IX86_BUILTIN_COPYSIGNQ:
35851 if (!TARGET_SSE)
35852 /* Emit a normal call if SSE isn't available. */
35853 return expand_call (exp, target, ignore);
35854 default:
35855 return ix86_expand_args_builtin (d, exp, target);
35856 }
35857
35858 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
35859 if (d->code == fcode)
35860 return ix86_expand_sse_comi (d, exp, target);
35861
35862 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
35863 if (d->code == fcode)
35864 return ix86_expand_round_builtin (d, exp, target);
35865
35866 for (i = 0, d = bdesc_pcmpestr;
35867 i < ARRAY_SIZE (bdesc_pcmpestr);
35868 i++, d++)
35869 if (d->code == fcode)
35870 return ix86_expand_sse_pcmpestr (d, exp, target);
35871
35872 for (i = 0, d = bdesc_pcmpistr;
35873 i < ARRAY_SIZE (bdesc_pcmpistr);
35874 i++, d++)
35875 if (d->code == fcode)
35876 return ix86_expand_sse_pcmpistr (d, exp, target);
35877
35878 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
35879 if (d->code == fcode)
35880 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
35881 (enum ix86_builtin_func_type)
35882 d->flag, d->comparison);
35883
35884 gcc_unreachable ();
35885 }
35886
35887 /* This returns the target-specific builtin with code CODE if
35888 current_function_decl has visibility on this builtin, which is checked
35889 using isa flags. Returns NULL_TREE otherwise. */
35890
35891 static tree ix86_get_builtin (enum ix86_builtins code)
35892 {
35893 struct cl_target_option *opts;
35894 tree target_tree = NULL_TREE;
35895
35896 /* Determine the isa flags of current_function_decl. */
35897
35898 if (current_function_decl)
35899 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
35900
35901 if (target_tree == NULL)
35902 target_tree = target_option_default_node;
35903
35904 opts = TREE_TARGET_OPTION (target_tree);
35905
35906 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
35907 return ix86_builtin_decl (code, true);
35908 else
35909 return NULL_TREE;
35910 }
35911
35912 /* Returns a function decl for a vectorized version of the builtin function
35913 with builtin function code FN and the result vector type TYPE, or NULL_TREE
35914 if it is not available. */
35915
35916 static tree
35917 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
35918 tree type_in)
35919 {
35920 enum machine_mode in_mode, out_mode;
35921 int in_n, out_n;
35922 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
35923
35924 if (TREE_CODE (type_out) != VECTOR_TYPE
35925 || TREE_CODE (type_in) != VECTOR_TYPE
35926 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
35927 return NULL_TREE;
35928
35929 out_mode = TYPE_MODE (TREE_TYPE (type_out));
35930 out_n = TYPE_VECTOR_SUBPARTS (type_out);
35931 in_mode = TYPE_MODE (TREE_TYPE (type_in));
35932 in_n = TYPE_VECTOR_SUBPARTS (type_in);
35933
35934 switch (fn)
35935 {
35936 case BUILT_IN_SQRT:
35937 if (out_mode == DFmode && in_mode == DFmode)
35938 {
35939 if (out_n == 2 && in_n == 2)
35940 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
35941 else if (out_n == 4 && in_n == 4)
35942 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
35943 else if (out_n == 8 && in_n == 8)
35944 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
35945 }
35946 break;
35947
35948 case BUILT_IN_EXP2F:
35949 if (out_mode == SFmode && in_mode == SFmode)
35950 {
35951 if (out_n == 16 && in_n == 16)
35952 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
35953 }
35954 break;
35955
35956 case BUILT_IN_SQRTF:
35957 if (out_mode == SFmode && in_mode == SFmode)
35958 {
35959 if (out_n == 4 && in_n == 4)
35960 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
35961 else if (out_n == 8 && in_n == 8)
35962 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
35963 else if (out_n == 16 && in_n == 16)
35964 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
35965 }
35966 break;
35967
35968 case BUILT_IN_IFLOOR:
35969 case BUILT_IN_LFLOOR:
35970 case BUILT_IN_LLFLOOR:
35971 /* The round insn does not trap on denormals. */
35972 if (flag_trapping_math || !TARGET_ROUND)
35973 break;
35974
35975 if (out_mode == SImode && in_mode == DFmode)
35976 {
35977 if (out_n == 4 && in_n == 2)
35978 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
35979 else if (out_n == 8 && in_n == 4)
35980 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
35981 else if (out_n == 16 && in_n == 8)
35982 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
35983 }
35984 break;
35985
35986 case BUILT_IN_IFLOORF:
35987 case BUILT_IN_LFLOORF:
35988 case BUILT_IN_LLFLOORF:
35989 /* The round insn does not trap on denormals. */
35990 if (flag_trapping_math || !TARGET_ROUND)
35991 break;
35992
35993 if (out_mode == SImode && in_mode == SFmode)
35994 {
35995 if (out_n == 4 && in_n == 4)
35996 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
35997 else if (out_n == 8 && in_n == 8)
35998 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
35999 }
36000 break;
36001
36002 case BUILT_IN_ICEIL:
36003 case BUILT_IN_LCEIL:
36004 case BUILT_IN_LLCEIL:
36005 /* The round insn does not trap on denormals. */
36006 if (flag_trapping_math || !TARGET_ROUND)
36007 break;
36008
36009 if (out_mode == SImode && in_mode == DFmode)
36010 {
36011 if (out_n == 4 && in_n == 2)
36012 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
36013 else if (out_n == 8 && in_n == 4)
36014 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
36015 else if (out_n == 16 && in_n == 8)
36016 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
36017 }
36018 break;
36019
36020 case BUILT_IN_ICEILF:
36021 case BUILT_IN_LCEILF:
36022 case BUILT_IN_LLCEILF:
36023 /* The round insn does not trap on denormals. */
36024 if (flag_trapping_math || !TARGET_ROUND)
36025 break;
36026
36027 if (out_mode == SImode && in_mode == SFmode)
36028 {
36029 if (out_n == 4 && in_n == 4)
36030 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
36031 else if (out_n == 8 && in_n == 8)
36032 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
36033 }
36034 break;
36035
36036 case BUILT_IN_IRINT:
36037 case BUILT_IN_LRINT:
36038 case BUILT_IN_LLRINT:
36039 if (out_mode == SImode && in_mode == DFmode)
36040 {
36041 if (out_n == 4 && in_n == 2)
36042 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
36043 else if (out_n == 8 && in_n == 4)
36044 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
36045 }
36046 break;
36047
36048 case BUILT_IN_IRINTF:
36049 case BUILT_IN_LRINTF:
36050 case BUILT_IN_LLRINTF:
36051 if (out_mode == SImode && in_mode == SFmode)
36052 {
36053 if (out_n == 4 && in_n == 4)
36054 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
36055 else if (out_n == 8 && in_n == 8)
36056 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
36057 }
36058 break;
36059
36060 case BUILT_IN_IROUND:
36061 case BUILT_IN_LROUND:
36062 case BUILT_IN_LLROUND:
36063 /* The round insn does not trap on denormals. */
36064 if (flag_trapping_math || !TARGET_ROUND)
36065 break;
36066
36067 if (out_mode == SImode && in_mode == DFmode)
36068 {
36069 if (out_n == 4 && in_n == 2)
36070 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
36071 else if (out_n == 8 && in_n == 4)
36072 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
36073 else if (out_n == 16 && in_n == 8)
36074 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
36075 }
36076 break;
36077
36078 case BUILT_IN_IROUNDF:
36079 case BUILT_IN_LROUNDF:
36080 case BUILT_IN_LLROUNDF:
36081 /* The round insn does not trap on denormals. */
36082 if (flag_trapping_math || !TARGET_ROUND)
36083 break;
36084
36085 if (out_mode == SImode && in_mode == SFmode)
36086 {
36087 if (out_n == 4 && in_n == 4)
36088 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
36089 else if (out_n == 8 && in_n == 8)
36090 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
36091 }
36092 break;
36093
36094 case BUILT_IN_COPYSIGN:
36095 if (out_mode == DFmode && in_mode == DFmode)
36096 {
36097 if (out_n == 2 && in_n == 2)
36098 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
36099 else if (out_n == 4 && in_n == 4)
36100 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
36101 else if (out_n == 8 && in_n == 8)
36102 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
36103 }
36104 break;
36105
36106 case BUILT_IN_COPYSIGNF:
36107 if (out_mode == SFmode && in_mode == SFmode)
36108 {
36109 if (out_n == 4 && in_n == 4)
36110 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
36111 else if (out_n == 8 && in_n == 8)
36112 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
36113 else if (out_n == 16 && in_n == 16)
36114 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
36115 }
36116 break;
36117
36118 case BUILT_IN_FLOOR:
36119 /* The round insn does not trap on denormals. */
36120 if (flag_trapping_math || !TARGET_ROUND)
36121 break;
36122
36123 if (out_mode == DFmode && in_mode == DFmode)
36124 {
36125 if (out_n == 2 && in_n == 2)
36126 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
36127 else if (out_n == 4 && in_n == 4)
36128 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
36129 }
36130 break;
36131
36132 case BUILT_IN_FLOORF:
36133 /* The round insn does not trap on denormals. */
36134 if (flag_trapping_math || !TARGET_ROUND)
36135 break;
36136
36137 if (out_mode == SFmode && in_mode == SFmode)
36138 {
36139 if (out_n == 4 && in_n == 4)
36140 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
36141 else if (out_n == 8 && in_n == 8)
36142 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
36143 }
36144 break;
36145
36146 case BUILT_IN_CEIL:
36147 /* The round insn does not trap on denormals. */
36148 if (flag_trapping_math || !TARGET_ROUND)
36149 break;
36150
36151 if (out_mode == DFmode && in_mode == DFmode)
36152 {
36153 if (out_n == 2 && in_n == 2)
36154 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
36155 else if (out_n == 4 && in_n == 4)
36156 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
36157 }
36158 break;
36159
36160 case BUILT_IN_CEILF:
36161 /* The round insn does not trap on denormals. */
36162 if (flag_trapping_math || !TARGET_ROUND)
36163 break;
36164
36165 if (out_mode == SFmode && in_mode == SFmode)
36166 {
36167 if (out_n == 4 && in_n == 4)
36168 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
36169 else if (out_n == 8 && in_n == 8)
36170 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
36171 }
36172 break;
36173
36174 case BUILT_IN_TRUNC:
36175 /* The round insn does not trap on denormals. */
36176 if (flag_trapping_math || !TARGET_ROUND)
36177 break;
36178
36179 if (out_mode == DFmode && in_mode == DFmode)
36180 {
36181 if (out_n == 2 && in_n == 2)
36182 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
36183 else if (out_n == 4 && in_n == 4)
36184 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
36185 }
36186 break;
36187
36188 case BUILT_IN_TRUNCF:
36189 /* The round insn does not trap on denormals. */
36190 if (flag_trapping_math || !TARGET_ROUND)
36191 break;
36192
36193 if (out_mode == SFmode && in_mode == SFmode)
36194 {
36195 if (out_n == 4 && in_n == 4)
36196 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
36197 else if (out_n == 8 && in_n == 8)
36198 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
36199 }
36200 break;
36201
36202 case BUILT_IN_RINT:
36203 /* The round insn does not trap on denormals. */
36204 if (flag_trapping_math || !TARGET_ROUND)
36205 break;
36206
36207 if (out_mode == DFmode && in_mode == DFmode)
36208 {
36209 if (out_n == 2 && in_n == 2)
36210 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
36211 else if (out_n == 4 && in_n == 4)
36212 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
36213 }
36214 break;
36215
36216 case BUILT_IN_RINTF:
36217 /* The round insn does not trap on denormals. */
36218 if (flag_trapping_math || !TARGET_ROUND)
36219 break;
36220
36221 if (out_mode == SFmode && in_mode == SFmode)
36222 {
36223 if (out_n == 4 && in_n == 4)
36224 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
36225 else if (out_n == 8 && in_n == 8)
36226 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
36227 }
36228 break;
36229
36230 case BUILT_IN_ROUND:
36231 /* The round insn does not trap on denormals. */
36232 if (flag_trapping_math || !TARGET_ROUND)
36233 break;
36234
36235 if (out_mode == DFmode && in_mode == DFmode)
36236 {
36237 if (out_n == 2 && in_n == 2)
36238 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
36239 else if (out_n == 4 && in_n == 4)
36240 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
36241 }
36242 break;
36243
36244 case BUILT_IN_ROUNDF:
36245 /* The round insn does not trap on denormals. */
36246 if (flag_trapping_math || !TARGET_ROUND)
36247 break;
36248
36249 if (out_mode == SFmode && in_mode == SFmode)
36250 {
36251 if (out_n == 4 && in_n == 4)
36252 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
36253 else if (out_n == 8 && in_n == 8)
36254 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
36255 }
36256 break;
36257
36258 case BUILT_IN_FMA:
36259 if (out_mode == DFmode && in_mode == DFmode)
36260 {
36261 if (out_n == 2 && in_n == 2)
36262 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
36263 if (out_n == 4 && in_n == 4)
36264 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
36265 }
36266 break;
36267
36268 case BUILT_IN_FMAF:
36269 if (out_mode == SFmode && in_mode == SFmode)
36270 {
36271 if (out_n == 4 && in_n == 4)
36272 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
36273 if (out_n == 8 && in_n == 8)
36274 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
36275 }
36276 break;
36277
36278 default:
36279 break;
36280 }
36281
36282 /* Dispatch to a handler for a vectorization library. */
36283 if (ix86_veclib_handler)
36284 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
36285 type_in);
36286
36287 return NULL_TREE;
36288 }
36289
36290 /* Handler for an SVML-style interface to
36291 a library with vectorized intrinsics. */
36292
36293 static tree
36294 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
36295 {
36296 char name[20];
36297 tree fntype, new_fndecl, args;
36298 unsigned arity;
36299 const char *bname;
36300 enum machine_mode el_mode, in_mode;
36301 int n, in_n;
36302
36303 /* The SVML is suitable for unsafe math only. */
36304 if (!flag_unsafe_math_optimizations)
36305 return NULL_TREE;
36306
36307 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36308 n = TYPE_VECTOR_SUBPARTS (type_out);
36309 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36310 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36311 if (el_mode != in_mode
36312 || n != in_n)
36313 return NULL_TREE;
36314
36315 switch (fn)
36316 {
36317 case BUILT_IN_EXP:
36318 case BUILT_IN_LOG:
36319 case BUILT_IN_LOG10:
36320 case BUILT_IN_POW:
36321 case BUILT_IN_TANH:
36322 case BUILT_IN_TAN:
36323 case BUILT_IN_ATAN:
36324 case BUILT_IN_ATAN2:
36325 case BUILT_IN_ATANH:
36326 case BUILT_IN_CBRT:
36327 case BUILT_IN_SINH:
36328 case BUILT_IN_SIN:
36329 case BUILT_IN_ASINH:
36330 case BUILT_IN_ASIN:
36331 case BUILT_IN_COSH:
36332 case BUILT_IN_COS:
36333 case BUILT_IN_ACOSH:
36334 case BUILT_IN_ACOS:
36335 if (el_mode != DFmode || n != 2)
36336 return NULL_TREE;
36337 break;
36338
36339 case BUILT_IN_EXPF:
36340 case BUILT_IN_LOGF:
36341 case BUILT_IN_LOG10F:
36342 case BUILT_IN_POWF:
36343 case BUILT_IN_TANHF:
36344 case BUILT_IN_TANF:
36345 case BUILT_IN_ATANF:
36346 case BUILT_IN_ATAN2F:
36347 case BUILT_IN_ATANHF:
36348 case BUILT_IN_CBRTF:
36349 case BUILT_IN_SINHF:
36350 case BUILT_IN_SINF:
36351 case BUILT_IN_ASINHF:
36352 case BUILT_IN_ASINF:
36353 case BUILT_IN_COSHF:
36354 case BUILT_IN_COSF:
36355 case BUILT_IN_ACOSHF:
36356 case BUILT_IN_ACOSF:
36357 if (el_mode != SFmode || n != 4)
36358 return NULL_TREE;
36359 break;
36360
36361 default:
36362 return NULL_TREE;
36363 }
36364
36365 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36366
36367 if (fn == BUILT_IN_LOGF)
36368 strcpy (name, "vmlsLn4");
36369 else if (fn == BUILT_IN_LOG)
36370 strcpy (name, "vmldLn2");
36371 else if (n == 4)
36372 {
36373 sprintf (name, "vmls%s", bname+10);
36374 name[strlen (name)-1] = '4';
36375 }
36376 else
36377 sprintf (name, "vmld%s2", bname+10);
36378
36379 /* Convert to uppercase. */
36380 name[4] &= ~0x20;
36381
36382 arity = 0;
36383 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36384 args;
36385 args = TREE_CHAIN (args))
36386 arity++;
36387
36388 if (arity == 1)
36389 fntype = build_function_type_list (type_out, type_in, NULL);
36390 else
36391 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36392
36393 /* Build a function declaration for the vectorized function. */
36394 new_fndecl = build_decl (BUILTINS_LOCATION,
36395 FUNCTION_DECL, get_identifier (name), fntype);
36396 TREE_PUBLIC (new_fndecl) = 1;
36397 DECL_EXTERNAL (new_fndecl) = 1;
36398 DECL_IS_NOVOPS (new_fndecl) = 1;
36399 TREE_READONLY (new_fndecl) = 1;
36400
36401 return new_fndecl;
36402 }
36403
36404 /* Handler for an ACML-style interface to
36405 a library with vectorized intrinsics. */
36406
36407 static tree
36408 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
36409 {
36410 char name[20] = "__vr.._";
36411 tree fntype, new_fndecl, args;
36412 unsigned arity;
36413 const char *bname;
36414 enum machine_mode el_mode, in_mode;
36415 int n, in_n;
36416
36417 /* The ACML is 64bits only and suitable for unsafe math only as
36418 it does not correctly support parts of IEEE with the required
36419 precision such as denormals. */
36420 if (!TARGET_64BIT
36421 || !flag_unsafe_math_optimizations)
36422 return NULL_TREE;
36423
36424 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36425 n = TYPE_VECTOR_SUBPARTS (type_out);
36426 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36427 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36428 if (el_mode != in_mode
36429 || n != in_n)
36430 return NULL_TREE;
36431
36432 switch (fn)
36433 {
36434 case BUILT_IN_SIN:
36435 case BUILT_IN_COS:
36436 case BUILT_IN_EXP:
36437 case BUILT_IN_LOG:
36438 case BUILT_IN_LOG2:
36439 case BUILT_IN_LOG10:
36440 name[4] = 'd';
36441 name[5] = '2';
36442 if (el_mode != DFmode
36443 || n != 2)
36444 return NULL_TREE;
36445 break;
36446
36447 case BUILT_IN_SINF:
36448 case BUILT_IN_COSF:
36449 case BUILT_IN_EXPF:
36450 case BUILT_IN_POWF:
36451 case BUILT_IN_LOGF:
36452 case BUILT_IN_LOG2F:
36453 case BUILT_IN_LOG10F:
36454 name[4] = 's';
36455 name[5] = '4';
36456 if (el_mode != SFmode
36457 || n != 4)
36458 return NULL_TREE;
36459 break;
36460
36461 default:
36462 return NULL_TREE;
36463 }
36464
36465 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36466 sprintf (name + 7, "%s", bname+10);
36467
36468 arity = 0;
36469 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36470 args;
36471 args = TREE_CHAIN (args))
36472 arity++;
36473
36474 if (arity == 1)
36475 fntype = build_function_type_list (type_out, type_in, NULL);
36476 else
36477 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36478
36479 /* Build a function declaration for the vectorized function. */
36480 new_fndecl = build_decl (BUILTINS_LOCATION,
36481 FUNCTION_DECL, get_identifier (name), fntype);
36482 TREE_PUBLIC (new_fndecl) = 1;
36483 DECL_EXTERNAL (new_fndecl) = 1;
36484 DECL_IS_NOVOPS (new_fndecl) = 1;
36485 TREE_READONLY (new_fndecl) = 1;
36486
36487 return new_fndecl;
36488 }
36489
36490 /* Returns a decl of a function that implements gather load with
36491 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
36492 Return NULL_TREE if it is not available. */
36493
36494 static tree
36495 ix86_vectorize_builtin_gather (const_tree mem_vectype,
36496 const_tree index_type, int scale)
36497 {
36498 bool si;
36499 enum ix86_builtins code;
36500
36501 if (! TARGET_AVX2)
36502 return NULL_TREE;
36503
36504 if ((TREE_CODE (index_type) != INTEGER_TYPE
36505 && !POINTER_TYPE_P (index_type))
36506 || (TYPE_MODE (index_type) != SImode
36507 && TYPE_MODE (index_type) != DImode))
36508 return NULL_TREE;
36509
36510 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
36511 return NULL_TREE;
36512
36513 /* v*gather* insn sign extends index to pointer mode. */
36514 if (TYPE_PRECISION (index_type) < POINTER_SIZE
36515 && TYPE_UNSIGNED (index_type))
36516 return NULL_TREE;
36517
36518 if (scale <= 0
36519 || scale > 8
36520 || (scale & (scale - 1)) != 0)
36521 return NULL_TREE;
36522
36523 si = TYPE_MODE (index_type) == SImode;
36524 switch (TYPE_MODE (mem_vectype))
36525 {
36526 case V2DFmode:
36527 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
36528 break;
36529 case V4DFmode:
36530 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
36531 break;
36532 case V2DImode:
36533 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
36534 break;
36535 case V4DImode:
36536 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
36537 break;
36538 case V4SFmode:
36539 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
36540 break;
36541 case V8SFmode:
36542 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
36543 break;
36544 case V4SImode:
36545 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
36546 break;
36547 case V8SImode:
36548 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
36549 break;
36550 case V8DFmode:
36551 if (TARGET_AVX512F)
36552 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
36553 else
36554 return NULL_TREE;
36555 break;
36556 case V8DImode:
36557 if (TARGET_AVX512F)
36558 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
36559 else
36560 return NULL_TREE;
36561 break;
36562 case V16SFmode:
36563 if (TARGET_AVX512F)
36564 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
36565 else
36566 return NULL_TREE;
36567 break;
36568 case V16SImode:
36569 if (TARGET_AVX512F)
36570 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
36571 else
36572 return NULL_TREE;
36573 break;
36574 default:
36575 return NULL_TREE;
36576 }
36577
36578 return ix86_get_builtin (code);
36579 }
36580
36581 /* Returns a code for a target-specific builtin that implements
36582 reciprocal of the function, or NULL_TREE if not available. */
36583
36584 static tree
36585 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
36586 bool sqrt ATTRIBUTE_UNUSED)
36587 {
36588 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
36589 && flag_finite_math_only && !flag_trapping_math
36590 && flag_unsafe_math_optimizations))
36591 return NULL_TREE;
36592
36593 if (md_fn)
36594 /* Machine dependent builtins. */
36595 switch (fn)
36596 {
36597 /* Vectorized version of sqrt to rsqrt conversion. */
36598 case IX86_BUILTIN_SQRTPS_NR:
36599 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
36600
36601 case IX86_BUILTIN_SQRTPS_NR256:
36602 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
36603
36604 default:
36605 return NULL_TREE;
36606 }
36607 else
36608 /* Normal builtins. */
36609 switch (fn)
36610 {
36611 /* Sqrt to rsqrt conversion. */
36612 case BUILT_IN_SQRTF:
36613 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
36614
36615 default:
36616 return NULL_TREE;
36617 }
36618 }
36619 \f
36620 /* Helper for avx_vpermilps256_operand et al. This is also used by
36621 the expansion functions to turn the parallel back into a mask.
36622 The return value is 0 for no match and the imm8+1 for a match. */
36623
36624 int
36625 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
36626 {
36627 unsigned i, nelt = GET_MODE_NUNITS (mode);
36628 unsigned mask = 0;
36629 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
36630
36631 if (XVECLEN (par, 0) != (int) nelt)
36632 return 0;
36633
36634 /* Validate that all of the elements are constants, and not totally
36635 out of range. Copy the data into an integral array to make the
36636 subsequent checks easier. */
36637 for (i = 0; i < nelt; ++i)
36638 {
36639 rtx er = XVECEXP (par, 0, i);
36640 unsigned HOST_WIDE_INT ei;
36641
36642 if (!CONST_INT_P (er))
36643 return 0;
36644 ei = INTVAL (er);
36645 if (ei >= nelt)
36646 return 0;
36647 ipar[i] = ei;
36648 }
36649
36650 switch (mode)
36651 {
36652 case V8DFmode:
36653 /* In the 512-bit DFmode case, we can only move elements within
36654 a 128-bit lane. First fill the second part of the mask,
36655 then fallthru. */
36656 for (i = 4; i < 6; ++i)
36657 {
36658 if (ipar[i] < 4 || ipar[i] >= 6)
36659 return 0;
36660 mask |= (ipar[i] - 4) << i;
36661 }
36662 for (i = 6; i < 8; ++i)
36663 {
36664 if (ipar[i] < 6)
36665 return 0;
36666 mask |= (ipar[i] - 6) << i;
36667 }
36668 /* FALLTHRU */
36669
36670 case V4DFmode:
36671 /* In the 256-bit DFmode case, we can only move elements within
36672 a 128-bit lane. */
36673 for (i = 0; i < 2; ++i)
36674 {
36675 if (ipar[i] >= 2)
36676 return 0;
36677 mask |= ipar[i] << i;
36678 }
36679 for (i = 2; i < 4; ++i)
36680 {
36681 if (ipar[i] < 2)
36682 return 0;
36683 mask |= (ipar[i] - 2) << i;
36684 }
36685 break;
36686
36687 case V16SFmode:
36688 /* In 512 bit SFmode case, permutation in the upper 256 bits
36689 must mirror the permutation in the lower 256-bits. */
36690 for (i = 0; i < 8; ++i)
36691 if (ipar[i] + 8 != ipar[i + 8])
36692 return 0;
36693 /* FALLTHRU */
36694
36695 case V8SFmode:
36696 /* In 256 bit SFmode case, we have full freedom of
36697 movement within the low 128-bit lane, but the high 128-bit
36698 lane must mirror the exact same pattern. */
36699 for (i = 0; i < 4; ++i)
36700 if (ipar[i] + 4 != ipar[i + 4])
36701 return 0;
36702 nelt = 4;
36703 /* FALLTHRU */
36704
36705 case V2DFmode:
36706 case V4SFmode:
36707 /* In the 128-bit case, we've full freedom in the placement of
36708 the elements from the source operand. */
36709 for (i = 0; i < nelt; ++i)
36710 mask |= ipar[i] << (i * (nelt / 2));
36711 break;
36712
36713 default:
36714 gcc_unreachable ();
36715 }
36716
36717 /* Make sure success has a non-zero value by adding one. */
36718 return mask + 1;
36719 }
36720
36721 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
36722 the expansion functions to turn the parallel back into a mask.
36723 The return value is 0 for no match and the imm8+1 for a match. */
36724
36725 int
36726 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
36727 {
36728 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
36729 unsigned mask = 0;
36730 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
36731
36732 if (XVECLEN (par, 0) != (int) nelt)
36733 return 0;
36734
36735 /* Validate that all of the elements are constants, and not totally
36736 out of range. Copy the data into an integral array to make the
36737 subsequent checks easier. */
36738 for (i = 0; i < nelt; ++i)
36739 {
36740 rtx er = XVECEXP (par, 0, i);
36741 unsigned HOST_WIDE_INT ei;
36742
36743 if (!CONST_INT_P (er))
36744 return 0;
36745 ei = INTVAL (er);
36746 if (ei >= 2 * nelt)
36747 return 0;
36748 ipar[i] = ei;
36749 }
36750
36751 /* Validate that the halves of the permute are halves. */
36752 for (i = 0; i < nelt2 - 1; ++i)
36753 if (ipar[i] + 1 != ipar[i + 1])
36754 return 0;
36755 for (i = nelt2; i < nelt - 1; ++i)
36756 if (ipar[i] + 1 != ipar[i + 1])
36757 return 0;
36758
36759 /* Reconstruct the mask. */
36760 for (i = 0; i < 2; ++i)
36761 {
36762 unsigned e = ipar[i * nelt2];
36763 if (e % nelt2)
36764 return 0;
36765 e /= nelt2;
36766 mask |= e << (i * 4);
36767 }
36768
36769 /* Make sure success has a non-zero value by adding one. */
36770 return mask + 1;
36771 }
36772 \f
36773 /* Store OPERAND to the memory after reload is completed. This means
36774 that we can't easily use assign_stack_local. */
36775 rtx
36776 ix86_force_to_memory (enum machine_mode mode, rtx operand)
36777 {
36778 rtx result;
36779
36780 gcc_assert (reload_completed);
36781 if (ix86_using_red_zone ())
36782 {
36783 result = gen_rtx_MEM (mode,
36784 gen_rtx_PLUS (Pmode,
36785 stack_pointer_rtx,
36786 GEN_INT (-RED_ZONE_SIZE)));
36787 emit_move_insn (result, operand);
36788 }
36789 else if (TARGET_64BIT)
36790 {
36791 switch (mode)
36792 {
36793 case HImode:
36794 case SImode:
36795 operand = gen_lowpart (DImode, operand);
36796 /* FALLTHRU */
36797 case DImode:
36798 emit_insn (
36799 gen_rtx_SET (VOIDmode,
36800 gen_rtx_MEM (DImode,
36801 gen_rtx_PRE_DEC (DImode,
36802 stack_pointer_rtx)),
36803 operand));
36804 break;
36805 default:
36806 gcc_unreachable ();
36807 }
36808 result = gen_rtx_MEM (mode, stack_pointer_rtx);
36809 }
36810 else
36811 {
36812 switch (mode)
36813 {
36814 case DImode:
36815 {
36816 rtx operands[2];
36817 split_double_mode (mode, &operand, 1, operands, operands + 1);
36818 emit_insn (
36819 gen_rtx_SET (VOIDmode,
36820 gen_rtx_MEM (SImode,
36821 gen_rtx_PRE_DEC (Pmode,
36822 stack_pointer_rtx)),
36823 operands[1]));
36824 emit_insn (
36825 gen_rtx_SET (VOIDmode,
36826 gen_rtx_MEM (SImode,
36827 gen_rtx_PRE_DEC (Pmode,
36828 stack_pointer_rtx)),
36829 operands[0]));
36830 }
36831 break;
36832 case HImode:
36833 /* Store HImodes as SImodes. */
36834 operand = gen_lowpart (SImode, operand);
36835 /* FALLTHRU */
36836 case SImode:
36837 emit_insn (
36838 gen_rtx_SET (VOIDmode,
36839 gen_rtx_MEM (GET_MODE (operand),
36840 gen_rtx_PRE_DEC (SImode,
36841 stack_pointer_rtx)),
36842 operand));
36843 break;
36844 default:
36845 gcc_unreachable ();
36846 }
36847 result = gen_rtx_MEM (mode, stack_pointer_rtx);
36848 }
36849 return result;
36850 }
36851
36852 /* Free operand from the memory. */
36853 void
36854 ix86_free_from_memory (enum machine_mode mode)
36855 {
36856 if (!ix86_using_red_zone ())
36857 {
36858 int size;
36859
36860 if (mode == DImode || TARGET_64BIT)
36861 size = 8;
36862 else
36863 size = 4;
36864 /* Use LEA to deallocate stack space. In peephole2 it will be converted
36865 to pop or add instruction if registers are available. */
36866 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
36867 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
36868 GEN_INT (size))));
36869 }
36870 }
36871
36872 /* Return a register priority for hard reg REGNO. */
36873 static int
36874 ix86_register_priority (int hard_regno)
36875 {
36876 /* ebp and r13 as the base always wants a displacement, r12 as the
36877 base always wants an index. So discourage their usage in an
36878 address. */
36879 if (hard_regno == R12_REG || hard_regno == R13_REG)
36880 return 0;
36881 if (hard_regno == BP_REG)
36882 return 1;
36883 /* New x86-64 int registers result in bigger code size. Discourage
36884 them. */
36885 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
36886 return 2;
36887 /* New x86-64 SSE registers result in bigger code size. Discourage
36888 them. */
36889 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
36890 return 2;
36891 /* Usage of AX register results in smaller code. Prefer it. */
36892 if (hard_regno == 0)
36893 return 4;
36894 return 3;
36895 }
36896
36897 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
36898
36899 Put float CONST_DOUBLE in the constant pool instead of fp regs.
36900 QImode must go into class Q_REGS.
36901 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
36902 movdf to do mem-to-mem moves through integer regs. */
36903
36904 static reg_class_t
36905 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
36906 {
36907 enum machine_mode mode = GET_MODE (x);
36908
36909 /* We're only allowed to return a subclass of CLASS. Many of the
36910 following checks fail for NO_REGS, so eliminate that early. */
36911 if (regclass == NO_REGS)
36912 return NO_REGS;
36913
36914 /* All classes can load zeros. */
36915 if (x == CONST0_RTX (mode))
36916 return regclass;
36917
36918 /* Force constants into memory if we are loading a (nonzero) constant into
36919 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
36920 instructions to load from a constant. */
36921 if (CONSTANT_P (x)
36922 && (MAYBE_MMX_CLASS_P (regclass)
36923 || MAYBE_SSE_CLASS_P (regclass)
36924 || MAYBE_MASK_CLASS_P (regclass)))
36925 return NO_REGS;
36926
36927 /* Prefer SSE regs only, if we can use them for math. */
36928 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
36929 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
36930
36931 /* Floating-point constants need more complex checks. */
36932 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
36933 {
36934 /* General regs can load everything. */
36935 if (reg_class_subset_p (regclass, GENERAL_REGS))
36936 return regclass;
36937
36938 /* Floats can load 0 and 1 plus some others. Note that we eliminated
36939 zero above. We only want to wind up preferring 80387 registers if
36940 we plan on doing computation with them. */
36941 if (TARGET_80387
36942 && standard_80387_constant_p (x) > 0)
36943 {
36944 /* Limit class to non-sse. */
36945 if (regclass == FLOAT_SSE_REGS)
36946 return FLOAT_REGS;
36947 if (regclass == FP_TOP_SSE_REGS)
36948 return FP_TOP_REG;
36949 if (regclass == FP_SECOND_SSE_REGS)
36950 return FP_SECOND_REG;
36951 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
36952 return regclass;
36953 }
36954
36955 return NO_REGS;
36956 }
36957
36958 /* Generally when we see PLUS here, it's the function invariant
36959 (plus soft-fp const_int). Which can only be computed into general
36960 regs. */
36961 if (GET_CODE (x) == PLUS)
36962 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
36963
36964 /* QImode constants are easy to load, but non-constant QImode data
36965 must go into Q_REGS. */
36966 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
36967 {
36968 if (reg_class_subset_p (regclass, Q_REGS))
36969 return regclass;
36970 if (reg_class_subset_p (Q_REGS, regclass))
36971 return Q_REGS;
36972 return NO_REGS;
36973 }
36974
36975 return regclass;
36976 }
36977
36978 /* Discourage putting floating-point values in SSE registers unless
36979 SSE math is being used, and likewise for the 387 registers. */
36980 static reg_class_t
36981 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
36982 {
36983 enum machine_mode mode = GET_MODE (x);
36984
36985 /* Restrict the output reload class to the register bank that we are doing
36986 math on. If we would like not to return a subset of CLASS, reject this
36987 alternative: if reload cannot do this, it will still use its choice. */
36988 mode = GET_MODE (x);
36989 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
36990 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
36991
36992 if (X87_FLOAT_MODE_P (mode))
36993 {
36994 if (regclass == FP_TOP_SSE_REGS)
36995 return FP_TOP_REG;
36996 else if (regclass == FP_SECOND_SSE_REGS)
36997 return FP_SECOND_REG;
36998 else
36999 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
37000 }
37001
37002 return regclass;
37003 }
37004
37005 static reg_class_t
37006 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
37007 enum machine_mode mode, secondary_reload_info *sri)
37008 {
37009 /* Double-word spills from general registers to non-offsettable memory
37010 references (zero-extended addresses) require special handling. */
37011 if (TARGET_64BIT
37012 && MEM_P (x)
37013 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
37014 && INTEGER_CLASS_P (rclass)
37015 && !offsettable_memref_p (x))
37016 {
37017 sri->icode = (in_p
37018 ? CODE_FOR_reload_noff_load
37019 : CODE_FOR_reload_noff_store);
37020 /* Add the cost of moving address to a temporary. */
37021 sri->extra_cost = 1;
37022
37023 return NO_REGS;
37024 }
37025
37026 /* QImode spills from non-QI registers require
37027 intermediate register on 32bit targets. */
37028 if (mode == QImode
37029 && (MAYBE_MASK_CLASS_P (rclass)
37030 || (!TARGET_64BIT && !in_p
37031 && INTEGER_CLASS_P (rclass)
37032 && MAYBE_NON_Q_CLASS_P (rclass))))
37033 {
37034 int regno;
37035
37036 if (REG_P (x))
37037 regno = REGNO (x);
37038 else
37039 regno = -1;
37040
37041 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
37042 regno = true_regnum (x);
37043
37044 /* Return Q_REGS if the operand is in memory. */
37045 if (regno == -1)
37046 return Q_REGS;
37047 }
37048
37049 /* This condition handles corner case where an expression involving
37050 pointers gets vectorized. We're trying to use the address of a
37051 stack slot as a vector initializer.
37052
37053 (set (reg:V2DI 74 [ vect_cst_.2 ])
37054 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
37055
37056 Eventually frame gets turned into sp+offset like this:
37057
37058 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37059 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37060 (const_int 392 [0x188]))))
37061
37062 That later gets turned into:
37063
37064 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37065 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37066 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
37067
37068 We'll have the following reload recorded:
37069
37070 Reload 0: reload_in (DI) =
37071 (plus:DI (reg/f:DI 7 sp)
37072 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
37073 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37074 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
37075 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
37076 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37077 reload_reg_rtx: (reg:V2DI 22 xmm1)
37078
37079 Which isn't going to work since SSE instructions can't handle scalar
37080 additions. Returning GENERAL_REGS forces the addition into integer
37081 register and reload can handle subsequent reloads without problems. */
37082
37083 if (in_p && GET_CODE (x) == PLUS
37084 && SSE_CLASS_P (rclass)
37085 && SCALAR_INT_MODE_P (mode))
37086 return GENERAL_REGS;
37087
37088 return NO_REGS;
37089 }
37090
37091 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
37092
37093 static bool
37094 ix86_class_likely_spilled_p (reg_class_t rclass)
37095 {
37096 switch (rclass)
37097 {
37098 case AREG:
37099 case DREG:
37100 case CREG:
37101 case BREG:
37102 case AD_REGS:
37103 case SIREG:
37104 case DIREG:
37105 case SSE_FIRST_REG:
37106 case FP_TOP_REG:
37107 case FP_SECOND_REG:
37108 return true;
37109
37110 default:
37111 break;
37112 }
37113
37114 return false;
37115 }
37116
37117 /* If we are copying between general and FP registers, we need a memory
37118 location. The same is true for SSE and MMX registers.
37119
37120 To optimize register_move_cost performance, allow inline variant.
37121
37122 The macro can't work reliably when one of the CLASSES is class containing
37123 registers from multiple units (SSE, MMX, integer). We avoid this by never
37124 combining those units in single alternative in the machine description.
37125 Ensure that this constraint holds to avoid unexpected surprises.
37126
37127 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
37128 enforce these sanity checks. */
37129
37130 static inline bool
37131 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37132 enum machine_mode mode, int strict)
37133 {
37134 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
37135 return false;
37136 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
37137 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
37138 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
37139 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
37140 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
37141 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
37142 {
37143 gcc_assert (!strict || lra_in_progress);
37144 return true;
37145 }
37146
37147 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
37148 return true;
37149
37150 /* ??? This is a lie. We do have moves between mmx/general, and for
37151 mmx/sse2. But by saying we need secondary memory we discourage the
37152 register allocator from using the mmx registers unless needed. */
37153 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
37154 return true;
37155
37156 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37157 {
37158 /* SSE1 doesn't have any direct moves from other classes. */
37159 if (!TARGET_SSE2)
37160 return true;
37161
37162 /* If the target says that inter-unit moves are more expensive
37163 than moving through memory, then don't generate them. */
37164 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
37165 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
37166 return true;
37167
37168 /* Between SSE and general, we have moves no larger than word size. */
37169 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
37170 return true;
37171 }
37172
37173 return false;
37174 }
37175
37176 bool
37177 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37178 enum machine_mode mode, int strict)
37179 {
37180 return inline_secondary_memory_needed (class1, class2, mode, strict);
37181 }
37182
37183 /* Implement the TARGET_CLASS_MAX_NREGS hook.
37184
37185 On the 80386, this is the size of MODE in words,
37186 except in the FP regs, where a single reg is always enough. */
37187
37188 static unsigned char
37189 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
37190 {
37191 if (MAYBE_INTEGER_CLASS_P (rclass))
37192 {
37193 if (mode == XFmode)
37194 return (TARGET_64BIT ? 2 : 3);
37195 else if (mode == XCmode)
37196 return (TARGET_64BIT ? 4 : 6);
37197 else
37198 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
37199 }
37200 else
37201 {
37202 if (COMPLEX_MODE_P (mode))
37203 return 2;
37204 else
37205 return 1;
37206 }
37207 }
37208
37209 /* Return true if the registers in CLASS cannot represent the change from
37210 modes FROM to TO. */
37211
37212 bool
37213 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
37214 enum reg_class regclass)
37215 {
37216 if (from == to)
37217 return false;
37218
37219 /* x87 registers can't do subreg at all, as all values are reformatted
37220 to extended precision. */
37221 if (MAYBE_FLOAT_CLASS_P (regclass))
37222 return true;
37223
37224 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
37225 {
37226 /* Vector registers do not support QI or HImode loads. If we don't
37227 disallow a change to these modes, reload will assume it's ok to
37228 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
37229 the vec_dupv4hi pattern. */
37230 if (GET_MODE_SIZE (from) < 4)
37231 return true;
37232
37233 /* Vector registers do not support subreg with nonzero offsets, which
37234 are otherwise valid for integer registers. Since we can't see
37235 whether we have a nonzero offset from here, prohibit all
37236 nonparadoxical subregs changing size. */
37237 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
37238 return true;
37239 }
37240
37241 return false;
37242 }
37243
37244 /* Return the cost of moving data of mode M between a
37245 register and memory. A value of 2 is the default; this cost is
37246 relative to those in `REGISTER_MOVE_COST'.
37247
37248 This function is used extensively by register_move_cost that is used to
37249 build tables at startup. Make it inline in this case.
37250 When IN is 2, return maximum of in and out move cost.
37251
37252 If moving between registers and memory is more expensive than
37253 between two registers, you should define this macro to express the
37254 relative cost.
37255
37256 Model also increased moving costs of QImode registers in non
37257 Q_REGS classes.
37258 */
37259 static inline int
37260 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
37261 int in)
37262 {
37263 int cost;
37264 if (FLOAT_CLASS_P (regclass))
37265 {
37266 int index;
37267 switch (mode)
37268 {
37269 case SFmode:
37270 index = 0;
37271 break;
37272 case DFmode:
37273 index = 1;
37274 break;
37275 case XFmode:
37276 index = 2;
37277 break;
37278 default:
37279 return 100;
37280 }
37281 if (in == 2)
37282 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
37283 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
37284 }
37285 if (SSE_CLASS_P (regclass))
37286 {
37287 int index;
37288 switch (GET_MODE_SIZE (mode))
37289 {
37290 case 4:
37291 index = 0;
37292 break;
37293 case 8:
37294 index = 1;
37295 break;
37296 case 16:
37297 index = 2;
37298 break;
37299 default:
37300 return 100;
37301 }
37302 if (in == 2)
37303 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
37304 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
37305 }
37306 if (MMX_CLASS_P (regclass))
37307 {
37308 int index;
37309 switch (GET_MODE_SIZE (mode))
37310 {
37311 case 4:
37312 index = 0;
37313 break;
37314 case 8:
37315 index = 1;
37316 break;
37317 default:
37318 return 100;
37319 }
37320 if (in)
37321 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
37322 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
37323 }
37324 switch (GET_MODE_SIZE (mode))
37325 {
37326 case 1:
37327 if (Q_CLASS_P (regclass) || TARGET_64BIT)
37328 {
37329 if (!in)
37330 return ix86_cost->int_store[0];
37331 if (TARGET_PARTIAL_REG_DEPENDENCY
37332 && optimize_function_for_speed_p (cfun))
37333 cost = ix86_cost->movzbl_load;
37334 else
37335 cost = ix86_cost->int_load[0];
37336 if (in == 2)
37337 return MAX (cost, ix86_cost->int_store[0]);
37338 return cost;
37339 }
37340 else
37341 {
37342 if (in == 2)
37343 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
37344 if (in)
37345 return ix86_cost->movzbl_load;
37346 else
37347 return ix86_cost->int_store[0] + 4;
37348 }
37349 break;
37350 case 2:
37351 if (in == 2)
37352 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
37353 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
37354 default:
37355 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
37356 if (mode == TFmode)
37357 mode = XFmode;
37358 if (in == 2)
37359 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
37360 else if (in)
37361 cost = ix86_cost->int_load[2];
37362 else
37363 cost = ix86_cost->int_store[2];
37364 return (cost * (((int) GET_MODE_SIZE (mode)
37365 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
37366 }
37367 }
37368
37369 static int
37370 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
37371 bool in)
37372 {
37373 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
37374 }
37375
37376
37377 /* Return the cost of moving data from a register in class CLASS1 to
37378 one in class CLASS2.
37379
37380 It is not required that the cost always equal 2 when FROM is the same as TO;
37381 on some machines it is expensive to move between registers if they are not
37382 general registers. */
37383
37384 static int
37385 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
37386 reg_class_t class2_i)
37387 {
37388 enum reg_class class1 = (enum reg_class) class1_i;
37389 enum reg_class class2 = (enum reg_class) class2_i;
37390
37391 /* In case we require secondary memory, compute cost of the store followed
37392 by load. In order to avoid bad register allocation choices, we need
37393 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
37394
37395 if (inline_secondary_memory_needed (class1, class2, mode, 0))
37396 {
37397 int cost = 1;
37398
37399 cost += inline_memory_move_cost (mode, class1, 2);
37400 cost += inline_memory_move_cost (mode, class2, 2);
37401
37402 /* In case of copying from general_purpose_register we may emit multiple
37403 stores followed by single load causing memory size mismatch stall.
37404 Count this as arbitrarily high cost of 20. */
37405 if (targetm.class_max_nregs (class1, mode)
37406 > targetm.class_max_nregs (class2, mode))
37407 cost += 20;
37408
37409 /* In the case of FP/MMX moves, the registers actually overlap, and we
37410 have to switch modes in order to treat them differently. */
37411 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
37412 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
37413 cost += 20;
37414
37415 return cost;
37416 }
37417
37418 /* Moves between SSE/MMX and integer unit are expensive. */
37419 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
37420 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37421
37422 /* ??? By keeping returned value relatively high, we limit the number
37423 of moves between integer and MMX/SSE registers for all targets.
37424 Additionally, high value prevents problem with x86_modes_tieable_p(),
37425 where integer modes in MMX/SSE registers are not tieable
37426 because of missing QImode and HImode moves to, from or between
37427 MMX/SSE registers. */
37428 return MAX (8, ix86_cost->mmxsse_to_integer);
37429
37430 if (MAYBE_FLOAT_CLASS_P (class1))
37431 return ix86_cost->fp_move;
37432 if (MAYBE_SSE_CLASS_P (class1))
37433 return ix86_cost->sse_move;
37434 if (MAYBE_MMX_CLASS_P (class1))
37435 return ix86_cost->mmx_move;
37436 return 2;
37437 }
37438
37439 /* Return TRUE if hard register REGNO can hold a value of machine-mode
37440 MODE. */
37441
37442 bool
37443 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
37444 {
37445 /* Flags and only flags can only hold CCmode values. */
37446 if (CC_REGNO_P (regno))
37447 return GET_MODE_CLASS (mode) == MODE_CC;
37448 if (GET_MODE_CLASS (mode) == MODE_CC
37449 || GET_MODE_CLASS (mode) == MODE_RANDOM
37450 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
37451 return false;
37452 if (STACK_REGNO_P (regno))
37453 return VALID_FP_MODE_P (mode);
37454 if (MASK_REGNO_P (regno))
37455 return VALID_MASK_REG_MODE (mode);
37456 if (SSE_REGNO_P (regno))
37457 {
37458 /* We implement the move patterns for all vector modes into and
37459 out of SSE registers, even when no operation instructions
37460 are available. */
37461
37462 /* For AVX-512 we allow, regardless of regno:
37463 - XI mode
37464 - any of 512-bit wide vector mode
37465 - any scalar mode. */
37466 if (TARGET_AVX512F
37467 && (mode == XImode
37468 || VALID_AVX512F_REG_MODE (mode)
37469 || VALID_AVX512F_SCALAR_MODE (mode)))
37470 return true;
37471
37472 /* xmm16-xmm31 are only available for AVX-512. */
37473 if (EXT_REX_SSE_REGNO_P (regno))
37474 return false;
37475
37476 /* OImode move is available only when AVX is enabled. */
37477 return ((TARGET_AVX && mode == OImode)
37478 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
37479 || VALID_SSE_REG_MODE (mode)
37480 || VALID_SSE2_REG_MODE (mode)
37481 || VALID_MMX_REG_MODE (mode)
37482 || VALID_MMX_REG_MODE_3DNOW (mode));
37483 }
37484 if (MMX_REGNO_P (regno))
37485 {
37486 /* We implement the move patterns for 3DNOW modes even in MMX mode,
37487 so if the register is available at all, then we can move data of
37488 the given mode into or out of it. */
37489 return (VALID_MMX_REG_MODE (mode)
37490 || VALID_MMX_REG_MODE_3DNOW (mode));
37491 }
37492
37493 if (mode == QImode)
37494 {
37495 /* Take care for QImode values - they can be in non-QI regs,
37496 but then they do cause partial register stalls. */
37497 if (ANY_QI_REGNO_P (regno))
37498 return true;
37499 if (!TARGET_PARTIAL_REG_STALL)
37500 return true;
37501 /* LRA checks if the hard register is OK for the given mode.
37502 QImode values can live in non-QI regs, so we allow all
37503 registers here. */
37504 if (lra_in_progress)
37505 return true;
37506 return !can_create_pseudo_p ();
37507 }
37508 /* We handle both integer and floats in the general purpose registers. */
37509 else if (VALID_INT_MODE_P (mode))
37510 return true;
37511 else if (VALID_FP_MODE_P (mode))
37512 return true;
37513 else if (VALID_DFP_MODE_P (mode))
37514 return true;
37515 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
37516 on to use that value in smaller contexts, this can easily force a
37517 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
37518 supporting DImode, allow it. */
37519 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
37520 return true;
37521
37522 return false;
37523 }
37524
37525 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
37526 tieable integer mode. */
37527
37528 static bool
37529 ix86_tieable_integer_mode_p (enum machine_mode mode)
37530 {
37531 switch (mode)
37532 {
37533 case HImode:
37534 case SImode:
37535 return true;
37536
37537 case QImode:
37538 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
37539
37540 case DImode:
37541 return TARGET_64BIT;
37542
37543 default:
37544 return false;
37545 }
37546 }
37547
37548 /* Return true if MODE1 is accessible in a register that can hold MODE2
37549 without copying. That is, all register classes that can hold MODE2
37550 can also hold MODE1. */
37551
37552 bool
37553 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
37554 {
37555 if (mode1 == mode2)
37556 return true;
37557
37558 if (ix86_tieable_integer_mode_p (mode1)
37559 && ix86_tieable_integer_mode_p (mode2))
37560 return true;
37561
37562 /* MODE2 being XFmode implies fp stack or general regs, which means we
37563 can tie any smaller floating point modes to it. Note that we do not
37564 tie this with TFmode. */
37565 if (mode2 == XFmode)
37566 return mode1 == SFmode || mode1 == DFmode;
37567
37568 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
37569 that we can tie it with SFmode. */
37570 if (mode2 == DFmode)
37571 return mode1 == SFmode;
37572
37573 /* If MODE2 is only appropriate for an SSE register, then tie with
37574 any other mode acceptable to SSE registers. */
37575 if (GET_MODE_SIZE (mode2) == 32
37576 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37577 return (GET_MODE_SIZE (mode1) == 32
37578 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37579 if (GET_MODE_SIZE (mode2) == 16
37580 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37581 return (GET_MODE_SIZE (mode1) == 16
37582 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37583
37584 /* If MODE2 is appropriate for an MMX register, then tie
37585 with any other mode acceptable to MMX registers. */
37586 if (GET_MODE_SIZE (mode2) == 8
37587 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
37588 return (GET_MODE_SIZE (mode1) == 8
37589 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
37590
37591 return false;
37592 }
37593
37594 /* Return the cost of moving between two registers of mode MODE. */
37595
37596 static int
37597 ix86_set_reg_reg_cost (enum machine_mode mode)
37598 {
37599 unsigned int units = UNITS_PER_WORD;
37600
37601 switch (GET_MODE_CLASS (mode))
37602 {
37603 default:
37604 break;
37605
37606 case MODE_CC:
37607 units = GET_MODE_SIZE (CCmode);
37608 break;
37609
37610 case MODE_FLOAT:
37611 if ((TARGET_SSE && mode == TFmode)
37612 || (TARGET_80387 && mode == XFmode)
37613 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
37614 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
37615 units = GET_MODE_SIZE (mode);
37616 break;
37617
37618 case MODE_COMPLEX_FLOAT:
37619 if ((TARGET_SSE && mode == TCmode)
37620 || (TARGET_80387 && mode == XCmode)
37621 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
37622 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
37623 units = GET_MODE_SIZE (mode);
37624 break;
37625
37626 case MODE_VECTOR_INT:
37627 case MODE_VECTOR_FLOAT:
37628 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
37629 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
37630 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
37631 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
37632 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
37633 units = GET_MODE_SIZE (mode);
37634 }
37635
37636 /* Return the cost of moving between two registers of mode MODE,
37637 assuming that the move will be in pieces of at most UNITS bytes. */
37638 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
37639 }
37640
37641 /* Compute a (partial) cost for rtx X. Return true if the complete
37642 cost has been computed, and false if subexpressions should be
37643 scanned. In either case, *TOTAL contains the cost result. */
37644
37645 static bool
37646 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
37647 bool speed)
37648 {
37649 rtx mask;
37650 enum rtx_code code = (enum rtx_code) code_i;
37651 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
37652 enum machine_mode mode = GET_MODE (x);
37653 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
37654
37655 switch (code)
37656 {
37657 case SET:
37658 if (register_operand (SET_DEST (x), VOIDmode)
37659 && reg_or_0_operand (SET_SRC (x), VOIDmode))
37660 {
37661 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
37662 return true;
37663 }
37664 return false;
37665
37666 case CONST_INT:
37667 case CONST:
37668 case LABEL_REF:
37669 case SYMBOL_REF:
37670 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
37671 *total = 3;
37672 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
37673 *total = 2;
37674 else if (flag_pic && SYMBOLIC_CONST (x)
37675 && (!TARGET_64BIT
37676 || (!GET_CODE (x) != LABEL_REF
37677 && (GET_CODE (x) != SYMBOL_REF
37678 || !SYMBOL_REF_LOCAL_P (x)))))
37679 *total = 1;
37680 else
37681 *total = 0;
37682 return true;
37683
37684 case CONST_DOUBLE:
37685 if (mode == VOIDmode)
37686 {
37687 *total = 0;
37688 return true;
37689 }
37690 switch (standard_80387_constant_p (x))
37691 {
37692 case 1: /* 0.0 */
37693 *total = 1;
37694 return true;
37695 default: /* Other constants */
37696 *total = 2;
37697 return true;
37698 case 0:
37699 case -1:
37700 break;
37701 }
37702 if (SSE_FLOAT_MODE_P (mode))
37703 {
37704 case CONST_VECTOR:
37705 switch (standard_sse_constant_p (x))
37706 {
37707 case 0:
37708 break;
37709 case 1: /* 0: xor eliminates false dependency */
37710 *total = 0;
37711 return true;
37712 default: /* -1: cmp contains false dependency */
37713 *total = 1;
37714 return true;
37715 }
37716 }
37717 /* Fall back to (MEM (SYMBOL_REF)), since that's where
37718 it'll probably end up. Add a penalty for size. */
37719 *total = (COSTS_N_INSNS (1)
37720 + (flag_pic != 0 && !TARGET_64BIT)
37721 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
37722 return true;
37723
37724 case ZERO_EXTEND:
37725 /* The zero extensions is often completely free on x86_64, so make
37726 it as cheap as possible. */
37727 if (TARGET_64BIT && mode == DImode
37728 && GET_MODE (XEXP (x, 0)) == SImode)
37729 *total = 1;
37730 else if (TARGET_ZERO_EXTEND_WITH_AND)
37731 *total = cost->add;
37732 else
37733 *total = cost->movzx;
37734 return false;
37735
37736 case SIGN_EXTEND:
37737 *total = cost->movsx;
37738 return false;
37739
37740 case ASHIFT:
37741 if (SCALAR_INT_MODE_P (mode)
37742 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
37743 && CONST_INT_P (XEXP (x, 1)))
37744 {
37745 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
37746 if (value == 1)
37747 {
37748 *total = cost->add;
37749 return false;
37750 }
37751 if ((value == 2 || value == 3)
37752 && cost->lea <= cost->shift_const)
37753 {
37754 *total = cost->lea;
37755 return false;
37756 }
37757 }
37758 /* FALLTHRU */
37759
37760 case ROTATE:
37761 case ASHIFTRT:
37762 case LSHIFTRT:
37763 case ROTATERT:
37764 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
37765 {
37766 /* ??? Should be SSE vector operation cost. */
37767 /* At least for published AMD latencies, this really is the same
37768 as the latency for a simple fpu operation like fabs. */
37769 /* V*QImode is emulated with 1-11 insns. */
37770 if (mode == V16QImode || mode == V32QImode)
37771 {
37772 int count = 11;
37773 if (TARGET_XOP && mode == V16QImode)
37774 {
37775 /* For XOP we use vpshab, which requires a broadcast of the
37776 value to the variable shift insn. For constants this
37777 means a V16Q const in mem; even when we can perform the
37778 shift with one insn set the cost to prefer paddb. */
37779 if (CONSTANT_P (XEXP (x, 1)))
37780 {
37781 *total = (cost->fabs
37782 + rtx_cost (XEXP (x, 0), code, 0, speed)
37783 + (speed ? 2 : COSTS_N_BYTES (16)));
37784 return true;
37785 }
37786 count = 3;
37787 }
37788 else if (TARGET_SSSE3)
37789 count = 7;
37790 *total = cost->fabs * count;
37791 }
37792 else
37793 *total = cost->fabs;
37794 }
37795 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
37796 {
37797 if (CONST_INT_P (XEXP (x, 1)))
37798 {
37799 if (INTVAL (XEXP (x, 1)) > 32)
37800 *total = cost->shift_const + COSTS_N_INSNS (2);
37801 else
37802 *total = cost->shift_const * 2;
37803 }
37804 else
37805 {
37806 if (GET_CODE (XEXP (x, 1)) == AND)
37807 *total = cost->shift_var * 2;
37808 else
37809 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
37810 }
37811 }
37812 else
37813 {
37814 if (CONST_INT_P (XEXP (x, 1)))
37815 *total = cost->shift_const;
37816 else if (GET_CODE (XEXP (x, 1)) == SUBREG
37817 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
37818 {
37819 /* Return the cost after shift-and truncation. */
37820 *total = cost->shift_var;
37821 return true;
37822 }
37823 else
37824 *total = cost->shift_var;
37825 }
37826 return false;
37827
37828 case FMA:
37829 {
37830 rtx sub;
37831
37832 gcc_assert (FLOAT_MODE_P (mode));
37833 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
37834
37835 /* ??? SSE scalar/vector cost should be used here. */
37836 /* ??? Bald assumption that fma has the same cost as fmul. */
37837 *total = cost->fmul;
37838 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
37839
37840 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
37841 sub = XEXP (x, 0);
37842 if (GET_CODE (sub) == NEG)
37843 sub = XEXP (sub, 0);
37844 *total += rtx_cost (sub, FMA, 0, speed);
37845
37846 sub = XEXP (x, 2);
37847 if (GET_CODE (sub) == NEG)
37848 sub = XEXP (sub, 0);
37849 *total += rtx_cost (sub, FMA, 2, speed);
37850 return true;
37851 }
37852
37853 case MULT:
37854 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
37855 {
37856 /* ??? SSE scalar cost should be used here. */
37857 *total = cost->fmul;
37858 return false;
37859 }
37860 else if (X87_FLOAT_MODE_P (mode))
37861 {
37862 *total = cost->fmul;
37863 return false;
37864 }
37865 else if (FLOAT_MODE_P (mode))
37866 {
37867 /* ??? SSE vector cost should be used here. */
37868 *total = cost->fmul;
37869 return false;
37870 }
37871 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
37872 {
37873 /* V*QImode is emulated with 7-13 insns. */
37874 if (mode == V16QImode || mode == V32QImode)
37875 {
37876 int extra = 11;
37877 if (TARGET_XOP && mode == V16QImode)
37878 extra = 5;
37879 else if (TARGET_SSSE3)
37880 extra = 6;
37881 *total = cost->fmul * 2 + cost->fabs * extra;
37882 }
37883 /* V*DImode is emulated with 5-8 insns. */
37884 else if (mode == V2DImode || mode == V4DImode)
37885 {
37886 if (TARGET_XOP && mode == V2DImode)
37887 *total = cost->fmul * 2 + cost->fabs * 3;
37888 else
37889 *total = cost->fmul * 3 + cost->fabs * 5;
37890 }
37891 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
37892 insns, including two PMULUDQ. */
37893 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
37894 *total = cost->fmul * 2 + cost->fabs * 5;
37895 else
37896 *total = cost->fmul;
37897 return false;
37898 }
37899 else
37900 {
37901 rtx op0 = XEXP (x, 0);
37902 rtx op1 = XEXP (x, 1);
37903 int nbits;
37904 if (CONST_INT_P (XEXP (x, 1)))
37905 {
37906 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
37907 for (nbits = 0; value != 0; value &= value - 1)
37908 nbits++;
37909 }
37910 else
37911 /* This is arbitrary. */
37912 nbits = 7;
37913
37914 /* Compute costs correctly for widening multiplication. */
37915 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
37916 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
37917 == GET_MODE_SIZE (mode))
37918 {
37919 int is_mulwiden = 0;
37920 enum machine_mode inner_mode = GET_MODE (op0);
37921
37922 if (GET_CODE (op0) == GET_CODE (op1))
37923 is_mulwiden = 1, op1 = XEXP (op1, 0);
37924 else if (CONST_INT_P (op1))
37925 {
37926 if (GET_CODE (op0) == SIGN_EXTEND)
37927 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
37928 == INTVAL (op1);
37929 else
37930 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
37931 }
37932
37933 if (is_mulwiden)
37934 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
37935 }
37936
37937 *total = (cost->mult_init[MODE_INDEX (mode)]
37938 + nbits * cost->mult_bit
37939 + rtx_cost (op0, outer_code, opno, speed)
37940 + rtx_cost (op1, outer_code, opno, speed));
37941
37942 return true;
37943 }
37944
37945 case DIV:
37946 case UDIV:
37947 case MOD:
37948 case UMOD:
37949 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
37950 /* ??? SSE cost should be used here. */
37951 *total = cost->fdiv;
37952 else if (X87_FLOAT_MODE_P (mode))
37953 *total = cost->fdiv;
37954 else if (FLOAT_MODE_P (mode))
37955 /* ??? SSE vector cost should be used here. */
37956 *total = cost->fdiv;
37957 else
37958 *total = cost->divide[MODE_INDEX (mode)];
37959 return false;
37960
37961 case PLUS:
37962 if (GET_MODE_CLASS (mode) == MODE_INT
37963 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
37964 {
37965 if (GET_CODE (XEXP (x, 0)) == PLUS
37966 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
37967 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
37968 && CONSTANT_P (XEXP (x, 1)))
37969 {
37970 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
37971 if (val == 2 || val == 4 || val == 8)
37972 {
37973 *total = cost->lea;
37974 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
37975 outer_code, opno, speed);
37976 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
37977 outer_code, opno, speed);
37978 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
37979 return true;
37980 }
37981 }
37982 else if (GET_CODE (XEXP (x, 0)) == MULT
37983 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
37984 {
37985 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
37986 if (val == 2 || val == 4 || val == 8)
37987 {
37988 *total = cost->lea;
37989 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
37990 outer_code, opno, speed);
37991 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
37992 return true;
37993 }
37994 }
37995 else if (GET_CODE (XEXP (x, 0)) == PLUS)
37996 {
37997 *total = cost->lea;
37998 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
37999 outer_code, opno, speed);
38000 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38001 outer_code, opno, speed);
38002 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38003 return true;
38004 }
38005 }
38006 /* FALLTHRU */
38007
38008 case MINUS:
38009 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38010 {
38011 /* ??? SSE cost should be used here. */
38012 *total = cost->fadd;
38013 return false;
38014 }
38015 else if (X87_FLOAT_MODE_P (mode))
38016 {
38017 *total = cost->fadd;
38018 return false;
38019 }
38020 else if (FLOAT_MODE_P (mode))
38021 {
38022 /* ??? SSE vector cost should be used here. */
38023 *total = cost->fadd;
38024 return false;
38025 }
38026 /* FALLTHRU */
38027
38028 case AND:
38029 case IOR:
38030 case XOR:
38031 if (GET_MODE_CLASS (mode) == MODE_INT
38032 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38033 {
38034 *total = (cost->add * 2
38035 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
38036 << (GET_MODE (XEXP (x, 0)) != DImode))
38037 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
38038 << (GET_MODE (XEXP (x, 1)) != DImode)));
38039 return true;
38040 }
38041 /* FALLTHRU */
38042
38043 case NEG:
38044 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38045 {
38046 /* ??? SSE cost should be used here. */
38047 *total = cost->fchs;
38048 return false;
38049 }
38050 else if (X87_FLOAT_MODE_P (mode))
38051 {
38052 *total = cost->fchs;
38053 return false;
38054 }
38055 else if (FLOAT_MODE_P (mode))
38056 {
38057 /* ??? SSE vector cost should be used here. */
38058 *total = cost->fchs;
38059 return false;
38060 }
38061 /* FALLTHRU */
38062
38063 case NOT:
38064 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38065 {
38066 /* ??? Should be SSE vector operation cost. */
38067 /* At least for published AMD latencies, this really is the same
38068 as the latency for a simple fpu operation like fabs. */
38069 *total = cost->fabs;
38070 }
38071 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38072 *total = cost->add * 2;
38073 else
38074 *total = cost->add;
38075 return false;
38076
38077 case COMPARE:
38078 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
38079 && XEXP (XEXP (x, 0), 1) == const1_rtx
38080 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
38081 && XEXP (x, 1) == const0_rtx)
38082 {
38083 /* This kind of construct is implemented using test[bwl].
38084 Treat it as if we had an AND. */
38085 *total = (cost->add
38086 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
38087 + rtx_cost (const1_rtx, outer_code, opno, speed));
38088 return true;
38089 }
38090 return false;
38091
38092 case FLOAT_EXTEND:
38093 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
38094 *total = 0;
38095 return false;
38096
38097 case ABS:
38098 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38099 /* ??? SSE cost should be used here. */
38100 *total = cost->fabs;
38101 else if (X87_FLOAT_MODE_P (mode))
38102 *total = cost->fabs;
38103 else if (FLOAT_MODE_P (mode))
38104 /* ??? SSE vector cost should be used here. */
38105 *total = cost->fabs;
38106 return false;
38107
38108 case SQRT:
38109 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38110 /* ??? SSE cost should be used here. */
38111 *total = cost->fsqrt;
38112 else if (X87_FLOAT_MODE_P (mode))
38113 *total = cost->fsqrt;
38114 else if (FLOAT_MODE_P (mode))
38115 /* ??? SSE vector cost should be used here. */
38116 *total = cost->fsqrt;
38117 return false;
38118
38119 case UNSPEC:
38120 if (XINT (x, 1) == UNSPEC_TP)
38121 *total = 0;
38122 return false;
38123
38124 case VEC_SELECT:
38125 case VEC_CONCAT:
38126 case VEC_DUPLICATE:
38127 /* ??? Assume all of these vector manipulation patterns are
38128 recognizable. In which case they all pretty much have the
38129 same cost. */
38130 *total = cost->fabs;
38131 return true;
38132 case VEC_MERGE:
38133 mask = XEXP (x, 2);
38134 /* This is masked instruction, assume the same cost,
38135 as nonmasked variant. */
38136 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
38137 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
38138 else
38139 *total = cost->fabs;
38140 return true;
38141
38142 default:
38143 return false;
38144 }
38145 }
38146
38147 #if TARGET_MACHO
38148
38149 static int current_machopic_label_num;
38150
38151 /* Given a symbol name and its associated stub, write out the
38152 definition of the stub. */
38153
38154 void
38155 machopic_output_stub (FILE *file, const char *symb, const char *stub)
38156 {
38157 unsigned int length;
38158 char *binder_name, *symbol_name, lazy_ptr_name[32];
38159 int label = ++current_machopic_label_num;
38160
38161 /* For 64-bit we shouldn't get here. */
38162 gcc_assert (!TARGET_64BIT);
38163
38164 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
38165 symb = targetm.strip_name_encoding (symb);
38166
38167 length = strlen (stub);
38168 binder_name = XALLOCAVEC (char, length + 32);
38169 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
38170
38171 length = strlen (symb);
38172 symbol_name = XALLOCAVEC (char, length + 32);
38173 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
38174
38175 sprintf (lazy_ptr_name, "L%d$lz", label);
38176
38177 if (MACHOPIC_ATT_STUB)
38178 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
38179 else if (MACHOPIC_PURE)
38180 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
38181 else
38182 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
38183
38184 fprintf (file, "%s:\n", stub);
38185 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38186
38187 if (MACHOPIC_ATT_STUB)
38188 {
38189 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
38190 }
38191 else if (MACHOPIC_PURE)
38192 {
38193 /* PIC stub. */
38194 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38195 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
38196 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
38197 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
38198 label, lazy_ptr_name, label);
38199 fprintf (file, "\tjmp\t*%%ecx\n");
38200 }
38201 else
38202 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
38203
38204 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
38205 it needs no stub-binding-helper. */
38206 if (MACHOPIC_ATT_STUB)
38207 return;
38208
38209 fprintf (file, "%s:\n", binder_name);
38210
38211 if (MACHOPIC_PURE)
38212 {
38213 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
38214 fprintf (file, "\tpushl\t%%ecx\n");
38215 }
38216 else
38217 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
38218
38219 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
38220
38221 /* N.B. Keep the correspondence of these
38222 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
38223 old-pic/new-pic/non-pic stubs; altering this will break
38224 compatibility with existing dylibs. */
38225 if (MACHOPIC_PURE)
38226 {
38227 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38228 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
38229 }
38230 else
38231 /* 16-byte -mdynamic-no-pic stub. */
38232 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
38233
38234 fprintf (file, "%s:\n", lazy_ptr_name);
38235 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38236 fprintf (file, ASM_LONG "%s\n", binder_name);
38237 }
38238 #endif /* TARGET_MACHO */
38239
38240 /* Order the registers for register allocator. */
38241
38242 void
38243 x86_order_regs_for_local_alloc (void)
38244 {
38245 int pos = 0;
38246 int i;
38247
38248 /* First allocate the local general purpose registers. */
38249 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38250 if (GENERAL_REGNO_P (i) && call_used_regs[i])
38251 reg_alloc_order [pos++] = i;
38252
38253 /* Global general purpose registers. */
38254 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38255 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
38256 reg_alloc_order [pos++] = i;
38257
38258 /* x87 registers come first in case we are doing FP math
38259 using them. */
38260 if (!TARGET_SSE_MATH)
38261 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38262 reg_alloc_order [pos++] = i;
38263
38264 /* SSE registers. */
38265 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
38266 reg_alloc_order [pos++] = i;
38267 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
38268 reg_alloc_order [pos++] = i;
38269
38270 /* Extended REX SSE registers. */
38271 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
38272 reg_alloc_order [pos++] = i;
38273
38274 /* Mask register. */
38275 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
38276 reg_alloc_order [pos++] = i;
38277
38278 /* x87 registers. */
38279 if (TARGET_SSE_MATH)
38280 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38281 reg_alloc_order [pos++] = i;
38282
38283 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
38284 reg_alloc_order [pos++] = i;
38285
38286 /* Initialize the rest of array as we do not allocate some registers
38287 at all. */
38288 while (pos < FIRST_PSEUDO_REGISTER)
38289 reg_alloc_order [pos++] = 0;
38290 }
38291
38292 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
38293 in struct attribute_spec handler. */
38294 static tree
38295 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
38296 tree args,
38297 int flags ATTRIBUTE_UNUSED,
38298 bool *no_add_attrs)
38299 {
38300 if (TREE_CODE (*node) != FUNCTION_TYPE
38301 && TREE_CODE (*node) != METHOD_TYPE
38302 && TREE_CODE (*node) != FIELD_DECL
38303 && TREE_CODE (*node) != TYPE_DECL)
38304 {
38305 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38306 name);
38307 *no_add_attrs = true;
38308 return NULL_TREE;
38309 }
38310 if (TARGET_64BIT)
38311 {
38312 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
38313 name);
38314 *no_add_attrs = true;
38315 return NULL_TREE;
38316 }
38317 if (is_attribute_p ("callee_pop_aggregate_return", name))
38318 {
38319 tree cst;
38320
38321 cst = TREE_VALUE (args);
38322 if (TREE_CODE (cst) != INTEGER_CST)
38323 {
38324 warning (OPT_Wattributes,
38325 "%qE attribute requires an integer constant argument",
38326 name);
38327 *no_add_attrs = true;
38328 }
38329 else if (compare_tree_int (cst, 0) != 0
38330 && compare_tree_int (cst, 1) != 0)
38331 {
38332 warning (OPT_Wattributes,
38333 "argument to %qE attribute is neither zero, nor one",
38334 name);
38335 *no_add_attrs = true;
38336 }
38337
38338 return NULL_TREE;
38339 }
38340
38341 return NULL_TREE;
38342 }
38343
38344 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
38345 struct attribute_spec.handler. */
38346 static tree
38347 ix86_handle_abi_attribute (tree *node, tree name,
38348 tree args ATTRIBUTE_UNUSED,
38349 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38350 {
38351 if (TREE_CODE (*node) != FUNCTION_TYPE
38352 && TREE_CODE (*node) != METHOD_TYPE
38353 && TREE_CODE (*node) != FIELD_DECL
38354 && TREE_CODE (*node) != TYPE_DECL)
38355 {
38356 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38357 name);
38358 *no_add_attrs = true;
38359 return NULL_TREE;
38360 }
38361
38362 /* Can combine regparm with all attributes but fastcall. */
38363 if (is_attribute_p ("ms_abi", name))
38364 {
38365 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
38366 {
38367 error ("ms_abi and sysv_abi attributes are not compatible");
38368 }
38369
38370 return NULL_TREE;
38371 }
38372 else if (is_attribute_p ("sysv_abi", name))
38373 {
38374 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
38375 {
38376 error ("ms_abi and sysv_abi attributes are not compatible");
38377 }
38378
38379 return NULL_TREE;
38380 }
38381
38382 return NULL_TREE;
38383 }
38384
38385 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
38386 struct attribute_spec.handler. */
38387 static tree
38388 ix86_handle_struct_attribute (tree *node, tree name,
38389 tree args ATTRIBUTE_UNUSED,
38390 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38391 {
38392 tree *type = NULL;
38393 if (DECL_P (*node))
38394 {
38395 if (TREE_CODE (*node) == TYPE_DECL)
38396 type = &TREE_TYPE (*node);
38397 }
38398 else
38399 type = node;
38400
38401 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
38402 {
38403 warning (OPT_Wattributes, "%qE attribute ignored",
38404 name);
38405 *no_add_attrs = true;
38406 }
38407
38408 else if ((is_attribute_p ("ms_struct", name)
38409 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
38410 || ((is_attribute_p ("gcc_struct", name)
38411 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
38412 {
38413 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
38414 name);
38415 *no_add_attrs = true;
38416 }
38417
38418 return NULL_TREE;
38419 }
38420
38421 static tree
38422 ix86_handle_fndecl_attribute (tree *node, tree name,
38423 tree args ATTRIBUTE_UNUSED,
38424 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38425 {
38426 if (TREE_CODE (*node) != FUNCTION_DECL)
38427 {
38428 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38429 name);
38430 *no_add_attrs = true;
38431 }
38432 return NULL_TREE;
38433 }
38434
38435 static bool
38436 ix86_ms_bitfield_layout_p (const_tree record_type)
38437 {
38438 return ((TARGET_MS_BITFIELD_LAYOUT
38439 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
38440 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
38441 }
38442
38443 /* Returns an expression indicating where the this parameter is
38444 located on entry to the FUNCTION. */
38445
38446 static rtx
38447 x86_this_parameter (tree function)
38448 {
38449 tree type = TREE_TYPE (function);
38450 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
38451 int nregs;
38452
38453 if (TARGET_64BIT)
38454 {
38455 const int *parm_regs;
38456
38457 if (ix86_function_type_abi (type) == MS_ABI)
38458 parm_regs = x86_64_ms_abi_int_parameter_registers;
38459 else
38460 parm_regs = x86_64_int_parameter_registers;
38461 return gen_rtx_REG (Pmode, parm_regs[aggr]);
38462 }
38463
38464 nregs = ix86_function_regparm (type, function);
38465
38466 if (nregs > 0 && !stdarg_p (type))
38467 {
38468 int regno;
38469 unsigned int ccvt = ix86_get_callcvt (type);
38470
38471 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38472 regno = aggr ? DX_REG : CX_REG;
38473 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38474 {
38475 regno = CX_REG;
38476 if (aggr)
38477 return gen_rtx_MEM (SImode,
38478 plus_constant (Pmode, stack_pointer_rtx, 4));
38479 }
38480 else
38481 {
38482 regno = AX_REG;
38483 if (aggr)
38484 {
38485 regno = DX_REG;
38486 if (nregs == 1)
38487 return gen_rtx_MEM (SImode,
38488 plus_constant (Pmode,
38489 stack_pointer_rtx, 4));
38490 }
38491 }
38492 return gen_rtx_REG (SImode, regno);
38493 }
38494
38495 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
38496 aggr ? 8 : 4));
38497 }
38498
38499 /* Determine whether x86_output_mi_thunk can succeed. */
38500
38501 static bool
38502 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
38503 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
38504 HOST_WIDE_INT vcall_offset, const_tree function)
38505 {
38506 /* 64-bit can handle anything. */
38507 if (TARGET_64BIT)
38508 return true;
38509
38510 /* For 32-bit, everything's fine if we have one free register. */
38511 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
38512 return true;
38513
38514 /* Need a free register for vcall_offset. */
38515 if (vcall_offset)
38516 return false;
38517
38518 /* Need a free register for GOT references. */
38519 if (flag_pic && !targetm.binds_local_p (function))
38520 return false;
38521
38522 /* Otherwise ok. */
38523 return true;
38524 }
38525
38526 /* Output the assembler code for a thunk function. THUNK_DECL is the
38527 declaration for the thunk function itself, FUNCTION is the decl for
38528 the target function. DELTA is an immediate constant offset to be
38529 added to THIS. If VCALL_OFFSET is nonzero, the word at
38530 *(*this + vcall_offset) should be added to THIS. */
38531
38532 static void
38533 x86_output_mi_thunk (FILE *file,
38534 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
38535 HOST_WIDE_INT vcall_offset, tree function)
38536 {
38537 rtx this_param = x86_this_parameter (function);
38538 rtx this_reg, tmp, fnaddr;
38539 unsigned int tmp_regno;
38540
38541 if (TARGET_64BIT)
38542 tmp_regno = R10_REG;
38543 else
38544 {
38545 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
38546 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38547 tmp_regno = AX_REG;
38548 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38549 tmp_regno = DX_REG;
38550 else
38551 tmp_regno = CX_REG;
38552 }
38553
38554 emit_note (NOTE_INSN_PROLOGUE_END);
38555
38556 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
38557 pull it in now and let DELTA benefit. */
38558 if (REG_P (this_param))
38559 this_reg = this_param;
38560 else if (vcall_offset)
38561 {
38562 /* Put the this parameter into %eax. */
38563 this_reg = gen_rtx_REG (Pmode, AX_REG);
38564 emit_move_insn (this_reg, this_param);
38565 }
38566 else
38567 this_reg = NULL_RTX;
38568
38569 /* Adjust the this parameter by a fixed constant. */
38570 if (delta)
38571 {
38572 rtx delta_rtx = GEN_INT (delta);
38573 rtx delta_dst = this_reg ? this_reg : this_param;
38574
38575 if (TARGET_64BIT)
38576 {
38577 if (!x86_64_general_operand (delta_rtx, Pmode))
38578 {
38579 tmp = gen_rtx_REG (Pmode, tmp_regno);
38580 emit_move_insn (tmp, delta_rtx);
38581 delta_rtx = tmp;
38582 }
38583 }
38584
38585 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
38586 }
38587
38588 /* Adjust the this parameter by a value stored in the vtable. */
38589 if (vcall_offset)
38590 {
38591 rtx vcall_addr, vcall_mem, this_mem;
38592
38593 tmp = gen_rtx_REG (Pmode, tmp_regno);
38594
38595 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
38596 if (Pmode != ptr_mode)
38597 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
38598 emit_move_insn (tmp, this_mem);
38599
38600 /* Adjust the this parameter. */
38601 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
38602 if (TARGET_64BIT
38603 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
38604 {
38605 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
38606 emit_move_insn (tmp2, GEN_INT (vcall_offset));
38607 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
38608 }
38609
38610 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
38611 if (Pmode != ptr_mode)
38612 emit_insn (gen_addsi_1_zext (this_reg,
38613 gen_rtx_REG (ptr_mode,
38614 REGNO (this_reg)),
38615 vcall_mem));
38616 else
38617 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
38618 }
38619
38620 /* If necessary, drop THIS back to its stack slot. */
38621 if (this_reg && this_reg != this_param)
38622 emit_move_insn (this_param, this_reg);
38623
38624 fnaddr = XEXP (DECL_RTL (function), 0);
38625 if (TARGET_64BIT)
38626 {
38627 if (!flag_pic || targetm.binds_local_p (function)
38628 || TARGET_PECOFF)
38629 ;
38630 else
38631 {
38632 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
38633 tmp = gen_rtx_CONST (Pmode, tmp);
38634 fnaddr = gen_rtx_MEM (Pmode, tmp);
38635 }
38636 }
38637 else
38638 {
38639 if (!flag_pic || targetm.binds_local_p (function))
38640 ;
38641 #if TARGET_MACHO
38642 else if (TARGET_MACHO)
38643 {
38644 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
38645 fnaddr = XEXP (fnaddr, 0);
38646 }
38647 #endif /* TARGET_MACHO */
38648 else
38649 {
38650 tmp = gen_rtx_REG (Pmode, CX_REG);
38651 output_set_got (tmp, NULL_RTX);
38652
38653 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
38654 fnaddr = gen_rtx_PLUS (Pmode, fnaddr, tmp);
38655 fnaddr = gen_rtx_MEM (Pmode, fnaddr);
38656 }
38657 }
38658
38659 /* Our sibling call patterns do not allow memories, because we have no
38660 predicate that can distinguish between frame and non-frame memory.
38661 For our purposes here, we can get away with (ab)using a jump pattern,
38662 because we're going to do no optimization. */
38663 if (MEM_P (fnaddr))
38664 emit_jump_insn (gen_indirect_jump (fnaddr));
38665 else
38666 {
38667 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
38668 fnaddr = legitimize_pic_address (fnaddr,
38669 gen_rtx_REG (Pmode, tmp_regno));
38670
38671 if (!sibcall_insn_operand (fnaddr, word_mode))
38672 {
38673 tmp = gen_rtx_REG (word_mode, tmp_regno);
38674 if (GET_MODE (fnaddr) != word_mode)
38675 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
38676 emit_move_insn (tmp, fnaddr);
38677 fnaddr = tmp;
38678 }
38679
38680 tmp = gen_rtx_MEM (QImode, fnaddr);
38681 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
38682 tmp = emit_call_insn (tmp);
38683 SIBLING_CALL_P (tmp) = 1;
38684 }
38685 emit_barrier ();
38686
38687 /* Emit just enough of rest_of_compilation to get the insns emitted.
38688 Note that use_thunk calls assemble_start_function et al. */
38689 tmp = get_insns ();
38690 shorten_branches (tmp);
38691 final_start_function (tmp, file, 1);
38692 final (tmp, file, 1);
38693 final_end_function ();
38694 }
38695
38696 static void
38697 x86_file_start (void)
38698 {
38699 default_file_start ();
38700 #if TARGET_MACHO
38701 darwin_file_start ();
38702 #endif
38703 if (X86_FILE_START_VERSION_DIRECTIVE)
38704 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
38705 if (X86_FILE_START_FLTUSED)
38706 fputs ("\t.global\t__fltused\n", asm_out_file);
38707 if (ix86_asm_dialect == ASM_INTEL)
38708 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
38709 }
38710
38711 int
38712 x86_field_alignment (tree field, int computed)
38713 {
38714 enum machine_mode mode;
38715 tree type = TREE_TYPE (field);
38716
38717 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
38718 return computed;
38719 mode = TYPE_MODE (strip_array_types (type));
38720 if (mode == DFmode || mode == DCmode
38721 || GET_MODE_CLASS (mode) == MODE_INT
38722 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
38723 return MIN (32, computed);
38724 return computed;
38725 }
38726
38727 /* Output assembler code to FILE to increment profiler label # LABELNO
38728 for profiling a function entry. */
38729 void
38730 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
38731 {
38732 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
38733 : MCOUNT_NAME);
38734
38735 if (TARGET_64BIT)
38736 {
38737 #ifndef NO_PROFILE_COUNTERS
38738 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
38739 #endif
38740
38741 if (!TARGET_PECOFF && flag_pic)
38742 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
38743 else
38744 fprintf (file, "\tcall\t%s\n", mcount_name);
38745 }
38746 else if (flag_pic)
38747 {
38748 #ifndef NO_PROFILE_COUNTERS
38749 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
38750 LPREFIX, labelno);
38751 #endif
38752 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
38753 }
38754 else
38755 {
38756 #ifndef NO_PROFILE_COUNTERS
38757 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
38758 LPREFIX, labelno);
38759 #endif
38760 fprintf (file, "\tcall\t%s\n", mcount_name);
38761 }
38762 }
38763
38764 /* We don't have exact information about the insn sizes, but we may assume
38765 quite safely that we are informed about all 1 byte insns and memory
38766 address sizes. This is enough to eliminate unnecessary padding in
38767 99% of cases. */
38768
38769 static int
38770 min_insn_size (rtx insn)
38771 {
38772 int l = 0, len;
38773
38774 if (!INSN_P (insn) || !active_insn_p (insn))
38775 return 0;
38776
38777 /* Discard alignments we've emit and jump instructions. */
38778 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
38779 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
38780 return 0;
38781
38782 /* Important case - calls are always 5 bytes.
38783 It is common to have many calls in the row. */
38784 if (CALL_P (insn)
38785 && symbolic_reference_mentioned_p (PATTERN (insn))
38786 && !SIBLING_CALL_P (insn))
38787 return 5;
38788 len = get_attr_length (insn);
38789 if (len <= 1)
38790 return 1;
38791
38792 /* For normal instructions we rely on get_attr_length being exact,
38793 with a few exceptions. */
38794 if (!JUMP_P (insn))
38795 {
38796 enum attr_type type = get_attr_type (insn);
38797
38798 switch (type)
38799 {
38800 case TYPE_MULTI:
38801 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
38802 || asm_noperands (PATTERN (insn)) >= 0)
38803 return 0;
38804 break;
38805 case TYPE_OTHER:
38806 case TYPE_FCMP:
38807 break;
38808 default:
38809 /* Otherwise trust get_attr_length. */
38810 return len;
38811 }
38812
38813 l = get_attr_length_address (insn);
38814 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
38815 l = 4;
38816 }
38817 if (l)
38818 return 1+l;
38819 else
38820 return 2;
38821 }
38822
38823 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
38824
38825 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
38826 window. */
38827
38828 static void
38829 ix86_avoid_jump_mispredicts (void)
38830 {
38831 rtx insn, start = get_insns ();
38832 int nbytes = 0, njumps = 0;
38833 int isjump = 0;
38834
38835 /* Look for all minimal intervals of instructions containing 4 jumps.
38836 The intervals are bounded by START and INSN. NBYTES is the total
38837 size of instructions in the interval including INSN and not including
38838 START. When the NBYTES is smaller than 16 bytes, it is possible
38839 that the end of START and INSN ends up in the same 16byte page.
38840
38841 The smallest offset in the page INSN can start is the case where START
38842 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
38843 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
38844
38845 Don't consider asm goto as jump, while it can contain a jump, it doesn't
38846 have to, control transfer to label(s) can be performed through other
38847 means, and also we estimate minimum length of all asm stmts as 0. */
38848 for (insn = start; insn; insn = NEXT_INSN (insn))
38849 {
38850 int min_size;
38851
38852 if (LABEL_P (insn))
38853 {
38854 int align = label_to_alignment (insn);
38855 int max_skip = label_to_max_skip (insn);
38856
38857 if (max_skip > 15)
38858 max_skip = 15;
38859 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
38860 already in the current 16 byte page, because otherwise
38861 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
38862 bytes to reach 16 byte boundary. */
38863 if (align <= 0
38864 || (align <= 3 && max_skip != (1 << align) - 1))
38865 max_skip = 0;
38866 if (dump_file)
38867 fprintf (dump_file, "Label %i with max_skip %i\n",
38868 INSN_UID (insn), max_skip);
38869 if (max_skip)
38870 {
38871 while (nbytes + max_skip >= 16)
38872 {
38873 start = NEXT_INSN (start);
38874 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
38875 || CALL_P (start))
38876 njumps--, isjump = 1;
38877 else
38878 isjump = 0;
38879 nbytes -= min_insn_size (start);
38880 }
38881 }
38882 continue;
38883 }
38884
38885 min_size = min_insn_size (insn);
38886 nbytes += min_size;
38887 if (dump_file)
38888 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
38889 INSN_UID (insn), min_size);
38890 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
38891 || CALL_P (insn))
38892 njumps++;
38893 else
38894 continue;
38895
38896 while (njumps > 3)
38897 {
38898 start = NEXT_INSN (start);
38899 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
38900 || CALL_P (start))
38901 njumps--, isjump = 1;
38902 else
38903 isjump = 0;
38904 nbytes -= min_insn_size (start);
38905 }
38906 gcc_assert (njumps >= 0);
38907 if (dump_file)
38908 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
38909 INSN_UID (start), INSN_UID (insn), nbytes);
38910
38911 if (njumps == 3 && isjump && nbytes < 16)
38912 {
38913 int padsize = 15 - nbytes + min_insn_size (insn);
38914
38915 if (dump_file)
38916 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
38917 INSN_UID (insn), padsize);
38918 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
38919 }
38920 }
38921 }
38922 #endif
38923
38924 /* AMD Athlon works faster
38925 when RET is not destination of conditional jump or directly preceded
38926 by other jump instruction. We avoid the penalty by inserting NOP just
38927 before the RET instructions in such cases. */
38928 static void
38929 ix86_pad_returns (void)
38930 {
38931 edge e;
38932 edge_iterator ei;
38933
38934 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
38935 {
38936 basic_block bb = e->src;
38937 rtx ret = BB_END (bb);
38938 rtx prev;
38939 bool replace = false;
38940
38941 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
38942 || optimize_bb_for_size_p (bb))
38943 continue;
38944 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
38945 if (active_insn_p (prev) || LABEL_P (prev))
38946 break;
38947 if (prev && LABEL_P (prev))
38948 {
38949 edge e;
38950 edge_iterator ei;
38951
38952 FOR_EACH_EDGE (e, ei, bb->preds)
38953 if (EDGE_FREQUENCY (e) && e->src->index >= 0
38954 && !(e->flags & EDGE_FALLTHRU))
38955 {
38956 replace = true;
38957 break;
38958 }
38959 }
38960 if (!replace)
38961 {
38962 prev = prev_active_insn (ret);
38963 if (prev
38964 && ((JUMP_P (prev) && any_condjump_p (prev))
38965 || CALL_P (prev)))
38966 replace = true;
38967 /* Empty functions get branch mispredict even when
38968 the jump destination is not visible to us. */
38969 if (!prev && !optimize_function_for_size_p (cfun))
38970 replace = true;
38971 }
38972 if (replace)
38973 {
38974 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
38975 delete_insn (ret);
38976 }
38977 }
38978 }
38979
38980 /* Count the minimum number of instructions in BB. Return 4 if the
38981 number of instructions >= 4. */
38982
38983 static int
38984 ix86_count_insn_bb (basic_block bb)
38985 {
38986 rtx insn;
38987 int insn_count = 0;
38988
38989 /* Count number of instructions in this block. Return 4 if the number
38990 of instructions >= 4. */
38991 FOR_BB_INSNS (bb, insn)
38992 {
38993 /* Only happen in exit blocks. */
38994 if (JUMP_P (insn)
38995 && ANY_RETURN_P (PATTERN (insn)))
38996 break;
38997
38998 if (NONDEBUG_INSN_P (insn)
38999 && GET_CODE (PATTERN (insn)) != USE
39000 && GET_CODE (PATTERN (insn)) != CLOBBER)
39001 {
39002 insn_count++;
39003 if (insn_count >= 4)
39004 return insn_count;
39005 }
39006 }
39007
39008 return insn_count;
39009 }
39010
39011
39012 /* Count the minimum number of instructions in code path in BB.
39013 Return 4 if the number of instructions >= 4. */
39014
39015 static int
39016 ix86_count_insn (basic_block bb)
39017 {
39018 edge e;
39019 edge_iterator ei;
39020 int min_prev_count;
39021
39022 /* Only bother counting instructions along paths with no
39023 more than 2 basic blocks between entry and exit. Given
39024 that BB has an edge to exit, determine if a predecessor
39025 of BB has an edge from entry. If so, compute the number
39026 of instructions in the predecessor block. If there
39027 happen to be multiple such blocks, compute the minimum. */
39028 min_prev_count = 4;
39029 FOR_EACH_EDGE (e, ei, bb->preds)
39030 {
39031 edge prev_e;
39032 edge_iterator prev_ei;
39033
39034 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39035 {
39036 min_prev_count = 0;
39037 break;
39038 }
39039 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
39040 {
39041 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39042 {
39043 int count = ix86_count_insn_bb (e->src);
39044 if (count < min_prev_count)
39045 min_prev_count = count;
39046 break;
39047 }
39048 }
39049 }
39050
39051 if (min_prev_count < 4)
39052 min_prev_count += ix86_count_insn_bb (bb);
39053
39054 return min_prev_count;
39055 }
39056
39057 /* Pad short function to 4 instructions. */
39058
39059 static void
39060 ix86_pad_short_function (void)
39061 {
39062 edge e;
39063 edge_iterator ei;
39064
39065 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39066 {
39067 rtx ret = BB_END (e->src);
39068 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
39069 {
39070 int insn_count = ix86_count_insn (e->src);
39071
39072 /* Pad short function. */
39073 if (insn_count < 4)
39074 {
39075 rtx insn = ret;
39076
39077 /* Find epilogue. */
39078 while (insn
39079 && (!NOTE_P (insn)
39080 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
39081 insn = PREV_INSN (insn);
39082
39083 if (!insn)
39084 insn = ret;
39085
39086 /* Two NOPs count as one instruction. */
39087 insn_count = 2 * (4 - insn_count);
39088 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
39089 }
39090 }
39091 }
39092 }
39093
39094 /* Fix up a Windows system unwinder issue. If an EH region falls through into
39095 the epilogue, the Windows system unwinder will apply epilogue logic and
39096 produce incorrect offsets. This can be avoided by adding a nop between
39097 the last insn that can throw and the first insn of the epilogue. */
39098
39099 static void
39100 ix86_seh_fixup_eh_fallthru (void)
39101 {
39102 edge e;
39103 edge_iterator ei;
39104
39105 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39106 {
39107 rtx insn, next;
39108
39109 /* Find the beginning of the epilogue. */
39110 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
39111 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
39112 break;
39113 if (insn == NULL)
39114 continue;
39115
39116 /* We only care about preceding insns that can throw. */
39117 insn = prev_active_insn (insn);
39118 if (insn == NULL || !can_throw_internal (insn))
39119 continue;
39120
39121 /* Do not separate calls from their debug information. */
39122 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
39123 if (NOTE_P (next)
39124 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
39125 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
39126 insn = next;
39127 else
39128 break;
39129
39130 emit_insn_after (gen_nops (const1_rtx), insn);
39131 }
39132 }
39133
39134 /* Implement machine specific optimizations. We implement padding of returns
39135 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
39136 static void
39137 ix86_reorg (void)
39138 {
39139 /* We are freeing block_for_insn in the toplev to keep compatibility
39140 with old MDEP_REORGS that are not CFG based. Recompute it now. */
39141 compute_bb_for_insn ();
39142
39143 if (TARGET_SEH && current_function_has_exception_handlers ())
39144 ix86_seh_fixup_eh_fallthru ();
39145
39146 if (optimize && optimize_function_for_speed_p (cfun))
39147 {
39148 if (TARGET_PAD_SHORT_FUNCTION)
39149 ix86_pad_short_function ();
39150 else if (TARGET_PAD_RETURNS)
39151 ix86_pad_returns ();
39152 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39153 if (TARGET_FOUR_JUMP_LIMIT)
39154 ix86_avoid_jump_mispredicts ();
39155 #endif
39156 }
39157 }
39158
39159 /* Return nonzero when QImode register that must be represented via REX prefix
39160 is used. */
39161 bool
39162 x86_extended_QIreg_mentioned_p (rtx insn)
39163 {
39164 int i;
39165 extract_insn_cached (insn);
39166 for (i = 0; i < recog_data.n_operands; i++)
39167 if (GENERAL_REG_P (recog_data.operand[i])
39168 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
39169 return true;
39170 return false;
39171 }
39172
39173 /* Return nonzero when P points to register encoded via REX prefix.
39174 Called via for_each_rtx. */
39175 static int
39176 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
39177 {
39178 unsigned int regno;
39179 if (!REG_P (*p))
39180 return 0;
39181 regno = REGNO (*p);
39182 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
39183 }
39184
39185 /* Return true when INSN mentions register that must be encoded using REX
39186 prefix. */
39187 bool
39188 x86_extended_reg_mentioned_p (rtx insn)
39189 {
39190 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
39191 extended_reg_mentioned_1, NULL);
39192 }
39193
39194 /* If profitable, negate (without causing overflow) integer constant
39195 of mode MODE at location LOC. Return true in this case. */
39196 bool
39197 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
39198 {
39199 HOST_WIDE_INT val;
39200
39201 if (!CONST_INT_P (*loc))
39202 return false;
39203
39204 switch (mode)
39205 {
39206 case DImode:
39207 /* DImode x86_64 constants must fit in 32 bits. */
39208 gcc_assert (x86_64_immediate_operand (*loc, mode));
39209
39210 mode = SImode;
39211 break;
39212
39213 case SImode:
39214 case HImode:
39215 case QImode:
39216 break;
39217
39218 default:
39219 gcc_unreachable ();
39220 }
39221
39222 /* Avoid overflows. */
39223 if (mode_signbit_p (mode, *loc))
39224 return false;
39225
39226 val = INTVAL (*loc);
39227
39228 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
39229 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
39230 if ((val < 0 && val != -128)
39231 || val == 128)
39232 {
39233 *loc = GEN_INT (-val);
39234 return true;
39235 }
39236
39237 return false;
39238 }
39239
39240 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
39241 optabs would emit if we didn't have TFmode patterns. */
39242
39243 void
39244 x86_emit_floatuns (rtx operands[2])
39245 {
39246 rtx neglab, donelab, i0, i1, f0, in, out;
39247 enum machine_mode mode, inmode;
39248
39249 inmode = GET_MODE (operands[1]);
39250 gcc_assert (inmode == SImode || inmode == DImode);
39251
39252 out = operands[0];
39253 in = force_reg (inmode, operands[1]);
39254 mode = GET_MODE (out);
39255 neglab = gen_label_rtx ();
39256 donelab = gen_label_rtx ();
39257 f0 = gen_reg_rtx (mode);
39258
39259 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
39260
39261 expand_float (out, in, 0);
39262
39263 emit_jump_insn (gen_jump (donelab));
39264 emit_barrier ();
39265
39266 emit_label (neglab);
39267
39268 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
39269 1, OPTAB_DIRECT);
39270 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
39271 1, OPTAB_DIRECT);
39272 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
39273
39274 expand_float (f0, i0, 0);
39275
39276 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
39277
39278 emit_label (donelab);
39279 }
39280 \f
39281 /* AVX512F does support 64-byte integer vector operations,
39282 thus the longest vector we are faced with is V64QImode. */
39283 #define MAX_VECT_LEN 64
39284
39285 struct expand_vec_perm_d
39286 {
39287 rtx target, op0, op1;
39288 unsigned char perm[MAX_VECT_LEN];
39289 enum machine_mode vmode;
39290 unsigned char nelt;
39291 bool one_operand_p;
39292 bool testing_p;
39293 };
39294
39295 static bool canonicalize_perm (struct expand_vec_perm_d *d);
39296 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
39297 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
39298
39299 /* Get a vector mode of the same size as the original but with elements
39300 twice as wide. This is only guaranteed to apply to integral vectors. */
39301
39302 static inline enum machine_mode
39303 get_mode_wider_vector (enum machine_mode o)
39304 {
39305 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
39306 enum machine_mode n = GET_MODE_WIDER_MODE (o);
39307 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
39308 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
39309 return n;
39310 }
39311
39312 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
39313 fill target with val via vec_duplicate. */
39314
39315 static bool
39316 ix86_vector_duplicate_value (enum machine_mode mode, rtx target, rtx val)
39317 {
39318 bool ok;
39319 rtx insn, dup;
39320
39321 /* First attempt to recognize VAL as-is. */
39322 dup = gen_rtx_VEC_DUPLICATE (mode, val);
39323 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
39324 if (recog_memoized (insn) < 0)
39325 {
39326 rtx seq;
39327 /* If that fails, force VAL into a register. */
39328
39329 start_sequence ();
39330 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
39331 seq = get_insns ();
39332 end_sequence ();
39333 if (seq)
39334 emit_insn_before (seq, insn);
39335
39336 ok = recog_memoized (insn) >= 0;
39337 gcc_assert (ok);
39338 }
39339 return true;
39340 }
39341
39342 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39343 with all elements equal to VAR. Return true if successful. */
39344
39345 static bool
39346 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
39347 rtx target, rtx val)
39348 {
39349 bool ok;
39350
39351 switch (mode)
39352 {
39353 case V2SImode:
39354 case V2SFmode:
39355 if (!mmx_ok)
39356 return false;
39357 /* FALLTHRU */
39358
39359 case V4DFmode:
39360 case V4DImode:
39361 case V8SFmode:
39362 case V8SImode:
39363 case V2DFmode:
39364 case V2DImode:
39365 case V4SFmode:
39366 case V4SImode:
39367 case V16SImode:
39368 case V8DImode:
39369 case V16SFmode:
39370 case V8DFmode:
39371 return ix86_vector_duplicate_value (mode, target, val);
39372
39373 case V4HImode:
39374 if (!mmx_ok)
39375 return false;
39376 if (TARGET_SSE || TARGET_3DNOW_A)
39377 {
39378 rtx x;
39379
39380 val = gen_lowpart (SImode, val);
39381 x = gen_rtx_TRUNCATE (HImode, val);
39382 x = gen_rtx_VEC_DUPLICATE (mode, x);
39383 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39384 return true;
39385 }
39386 goto widen;
39387
39388 case V8QImode:
39389 if (!mmx_ok)
39390 return false;
39391 goto widen;
39392
39393 case V8HImode:
39394 if (TARGET_SSE2)
39395 {
39396 struct expand_vec_perm_d dperm;
39397 rtx tmp1, tmp2;
39398
39399 permute:
39400 memset (&dperm, 0, sizeof (dperm));
39401 dperm.target = target;
39402 dperm.vmode = mode;
39403 dperm.nelt = GET_MODE_NUNITS (mode);
39404 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
39405 dperm.one_operand_p = true;
39406
39407 /* Extend to SImode using a paradoxical SUBREG. */
39408 tmp1 = gen_reg_rtx (SImode);
39409 emit_move_insn (tmp1, gen_lowpart (SImode, val));
39410
39411 /* Insert the SImode value as low element of a V4SImode vector. */
39412 tmp2 = gen_reg_rtx (V4SImode);
39413 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
39414 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
39415
39416 ok = (expand_vec_perm_1 (&dperm)
39417 || expand_vec_perm_broadcast_1 (&dperm));
39418 gcc_assert (ok);
39419 return ok;
39420 }
39421 goto widen;
39422
39423 case V16QImode:
39424 if (TARGET_SSE2)
39425 goto permute;
39426 goto widen;
39427
39428 widen:
39429 /* Replicate the value once into the next wider mode and recurse. */
39430 {
39431 enum machine_mode smode, wsmode, wvmode;
39432 rtx x;
39433
39434 smode = GET_MODE_INNER (mode);
39435 wvmode = get_mode_wider_vector (mode);
39436 wsmode = GET_MODE_INNER (wvmode);
39437
39438 val = convert_modes (wsmode, smode, val, true);
39439 x = expand_simple_binop (wsmode, ASHIFT, val,
39440 GEN_INT (GET_MODE_BITSIZE (smode)),
39441 NULL_RTX, 1, OPTAB_LIB_WIDEN);
39442 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
39443
39444 x = gen_reg_rtx (wvmode);
39445 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
39446 gcc_assert (ok);
39447 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
39448 return ok;
39449 }
39450
39451 case V16HImode:
39452 case V32QImode:
39453 {
39454 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
39455 rtx x = gen_reg_rtx (hvmode);
39456
39457 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
39458 gcc_assert (ok);
39459
39460 x = gen_rtx_VEC_CONCAT (mode, x, x);
39461 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39462 }
39463 return true;
39464
39465 default:
39466 return false;
39467 }
39468 }
39469
39470 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39471 whose ONE_VAR element is VAR, and other elements are zero. Return true
39472 if successful. */
39473
39474 static bool
39475 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
39476 rtx target, rtx var, int one_var)
39477 {
39478 enum machine_mode vsimode;
39479 rtx new_target;
39480 rtx x, tmp;
39481 bool use_vector_set = false;
39482
39483 switch (mode)
39484 {
39485 case V2DImode:
39486 /* For SSE4.1, we normally use vector set. But if the second
39487 element is zero and inter-unit moves are OK, we use movq
39488 instead. */
39489 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
39490 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
39491 && one_var == 0));
39492 break;
39493 case V16QImode:
39494 case V4SImode:
39495 case V4SFmode:
39496 use_vector_set = TARGET_SSE4_1;
39497 break;
39498 case V8HImode:
39499 use_vector_set = TARGET_SSE2;
39500 break;
39501 case V4HImode:
39502 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
39503 break;
39504 case V32QImode:
39505 case V16HImode:
39506 case V8SImode:
39507 case V8SFmode:
39508 case V4DFmode:
39509 use_vector_set = TARGET_AVX;
39510 break;
39511 case V4DImode:
39512 /* Use ix86_expand_vector_set in 64bit mode only. */
39513 use_vector_set = TARGET_AVX && TARGET_64BIT;
39514 break;
39515 default:
39516 break;
39517 }
39518
39519 if (use_vector_set)
39520 {
39521 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
39522 var = force_reg (GET_MODE_INNER (mode), var);
39523 ix86_expand_vector_set (mmx_ok, target, var, one_var);
39524 return true;
39525 }
39526
39527 switch (mode)
39528 {
39529 case V2SFmode:
39530 case V2SImode:
39531 if (!mmx_ok)
39532 return false;
39533 /* FALLTHRU */
39534
39535 case V2DFmode:
39536 case V2DImode:
39537 if (one_var != 0)
39538 return false;
39539 var = force_reg (GET_MODE_INNER (mode), var);
39540 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
39541 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39542 return true;
39543
39544 case V4SFmode:
39545 case V4SImode:
39546 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
39547 new_target = gen_reg_rtx (mode);
39548 else
39549 new_target = target;
39550 var = force_reg (GET_MODE_INNER (mode), var);
39551 x = gen_rtx_VEC_DUPLICATE (mode, var);
39552 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
39553 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
39554 if (one_var != 0)
39555 {
39556 /* We need to shuffle the value to the correct position, so
39557 create a new pseudo to store the intermediate result. */
39558
39559 /* With SSE2, we can use the integer shuffle insns. */
39560 if (mode != V4SFmode && TARGET_SSE2)
39561 {
39562 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
39563 const1_rtx,
39564 GEN_INT (one_var == 1 ? 0 : 1),
39565 GEN_INT (one_var == 2 ? 0 : 1),
39566 GEN_INT (one_var == 3 ? 0 : 1)));
39567 if (target != new_target)
39568 emit_move_insn (target, new_target);
39569 return true;
39570 }
39571
39572 /* Otherwise convert the intermediate result to V4SFmode and
39573 use the SSE1 shuffle instructions. */
39574 if (mode != V4SFmode)
39575 {
39576 tmp = gen_reg_rtx (V4SFmode);
39577 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
39578 }
39579 else
39580 tmp = new_target;
39581
39582 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
39583 const1_rtx,
39584 GEN_INT (one_var == 1 ? 0 : 1),
39585 GEN_INT (one_var == 2 ? 0+4 : 1+4),
39586 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
39587
39588 if (mode != V4SFmode)
39589 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
39590 else if (tmp != target)
39591 emit_move_insn (target, tmp);
39592 }
39593 else if (target != new_target)
39594 emit_move_insn (target, new_target);
39595 return true;
39596
39597 case V8HImode:
39598 case V16QImode:
39599 vsimode = V4SImode;
39600 goto widen;
39601 case V4HImode:
39602 case V8QImode:
39603 if (!mmx_ok)
39604 return false;
39605 vsimode = V2SImode;
39606 goto widen;
39607 widen:
39608 if (one_var != 0)
39609 return false;
39610
39611 /* Zero extend the variable element to SImode and recurse. */
39612 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
39613
39614 x = gen_reg_rtx (vsimode);
39615 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
39616 var, one_var))
39617 gcc_unreachable ();
39618
39619 emit_move_insn (target, gen_lowpart (mode, x));
39620 return true;
39621
39622 default:
39623 return false;
39624 }
39625 }
39626
39627 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39628 consisting of the values in VALS. It is known that all elements
39629 except ONE_VAR are constants. Return true if successful. */
39630
39631 static bool
39632 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
39633 rtx target, rtx vals, int one_var)
39634 {
39635 rtx var = XVECEXP (vals, 0, one_var);
39636 enum machine_mode wmode;
39637 rtx const_vec, x;
39638
39639 const_vec = copy_rtx (vals);
39640 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
39641 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
39642
39643 switch (mode)
39644 {
39645 case V2DFmode:
39646 case V2DImode:
39647 case V2SFmode:
39648 case V2SImode:
39649 /* For the two element vectors, it's just as easy to use
39650 the general case. */
39651 return false;
39652
39653 case V4DImode:
39654 /* Use ix86_expand_vector_set in 64bit mode only. */
39655 if (!TARGET_64BIT)
39656 return false;
39657 case V4DFmode:
39658 case V8SFmode:
39659 case V8SImode:
39660 case V16HImode:
39661 case V32QImode:
39662 case V4SFmode:
39663 case V4SImode:
39664 case V8HImode:
39665 case V4HImode:
39666 break;
39667
39668 case V16QImode:
39669 if (TARGET_SSE4_1)
39670 break;
39671 wmode = V8HImode;
39672 goto widen;
39673 case V8QImode:
39674 wmode = V4HImode;
39675 goto widen;
39676 widen:
39677 /* There's no way to set one QImode entry easily. Combine
39678 the variable value with its adjacent constant value, and
39679 promote to an HImode set. */
39680 x = XVECEXP (vals, 0, one_var ^ 1);
39681 if (one_var & 1)
39682 {
39683 var = convert_modes (HImode, QImode, var, true);
39684 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
39685 NULL_RTX, 1, OPTAB_LIB_WIDEN);
39686 x = GEN_INT (INTVAL (x) & 0xff);
39687 }
39688 else
39689 {
39690 var = convert_modes (HImode, QImode, var, true);
39691 x = gen_int_mode (INTVAL (x) << 8, HImode);
39692 }
39693 if (x != const0_rtx)
39694 var = expand_simple_binop (HImode, IOR, var, x, var,
39695 1, OPTAB_LIB_WIDEN);
39696
39697 x = gen_reg_rtx (wmode);
39698 emit_move_insn (x, gen_lowpart (wmode, const_vec));
39699 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
39700
39701 emit_move_insn (target, gen_lowpart (mode, x));
39702 return true;
39703
39704 default:
39705 return false;
39706 }
39707
39708 emit_move_insn (target, const_vec);
39709 ix86_expand_vector_set (mmx_ok, target, var, one_var);
39710 return true;
39711 }
39712
39713 /* A subroutine of ix86_expand_vector_init_general. Use vector
39714 concatenate to handle the most general case: all values variable,
39715 and none identical. */
39716
39717 static void
39718 ix86_expand_vector_init_concat (enum machine_mode mode,
39719 rtx target, rtx *ops, int n)
39720 {
39721 enum machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
39722 rtx first[16], second[8], third[4];
39723 rtvec v;
39724 int i, j;
39725
39726 switch (n)
39727 {
39728 case 2:
39729 switch (mode)
39730 {
39731 case V16SImode:
39732 cmode = V8SImode;
39733 break;
39734 case V16SFmode:
39735 cmode = V8SFmode;
39736 break;
39737 case V8DImode:
39738 cmode = V4DImode;
39739 break;
39740 case V8DFmode:
39741 cmode = V4DFmode;
39742 break;
39743 case V8SImode:
39744 cmode = V4SImode;
39745 break;
39746 case V8SFmode:
39747 cmode = V4SFmode;
39748 break;
39749 case V4DImode:
39750 cmode = V2DImode;
39751 break;
39752 case V4DFmode:
39753 cmode = V2DFmode;
39754 break;
39755 case V4SImode:
39756 cmode = V2SImode;
39757 break;
39758 case V4SFmode:
39759 cmode = V2SFmode;
39760 break;
39761 case V2DImode:
39762 cmode = DImode;
39763 break;
39764 case V2SImode:
39765 cmode = SImode;
39766 break;
39767 case V2DFmode:
39768 cmode = DFmode;
39769 break;
39770 case V2SFmode:
39771 cmode = SFmode;
39772 break;
39773 default:
39774 gcc_unreachable ();
39775 }
39776
39777 if (!register_operand (ops[1], cmode))
39778 ops[1] = force_reg (cmode, ops[1]);
39779 if (!register_operand (ops[0], cmode))
39780 ops[0] = force_reg (cmode, ops[0]);
39781 emit_insn (gen_rtx_SET (VOIDmode, target,
39782 gen_rtx_VEC_CONCAT (mode, ops[0],
39783 ops[1])));
39784 break;
39785
39786 case 4:
39787 switch (mode)
39788 {
39789 case V4DImode:
39790 cmode = V2DImode;
39791 break;
39792 case V4DFmode:
39793 cmode = V2DFmode;
39794 break;
39795 case V4SImode:
39796 cmode = V2SImode;
39797 break;
39798 case V4SFmode:
39799 cmode = V2SFmode;
39800 break;
39801 default:
39802 gcc_unreachable ();
39803 }
39804 goto half;
39805
39806 case 8:
39807 switch (mode)
39808 {
39809 case V8DImode:
39810 cmode = V2DImode;
39811 hmode = V4DImode;
39812 break;
39813 case V8DFmode:
39814 cmode = V2DFmode;
39815 hmode = V4DFmode;
39816 break;
39817 case V8SImode:
39818 cmode = V2SImode;
39819 hmode = V4SImode;
39820 break;
39821 case V8SFmode:
39822 cmode = V2SFmode;
39823 hmode = V4SFmode;
39824 break;
39825 default:
39826 gcc_unreachable ();
39827 }
39828 goto half;
39829
39830 case 16:
39831 switch (mode)
39832 {
39833 case V16SImode:
39834 cmode = V2SImode;
39835 hmode = V4SImode;
39836 gmode = V8SImode;
39837 break;
39838 case V16SFmode:
39839 cmode = V2SFmode;
39840 hmode = V4SFmode;
39841 gmode = V8SFmode;
39842 break;
39843 default:
39844 gcc_unreachable ();
39845 }
39846 goto half;
39847
39848 half:
39849 /* FIXME: We process inputs backward to help RA. PR 36222. */
39850 i = n - 1;
39851 j = (n >> 1) - 1;
39852 for (; i > 0; i -= 2, j--)
39853 {
39854 first[j] = gen_reg_rtx (cmode);
39855 v = gen_rtvec (2, ops[i - 1], ops[i]);
39856 ix86_expand_vector_init (false, first[j],
39857 gen_rtx_PARALLEL (cmode, v));
39858 }
39859
39860 n >>= 1;
39861 if (n > 4)
39862 {
39863 gcc_assert (hmode != VOIDmode);
39864 gcc_assert (gmode != VOIDmode);
39865 for (i = j = 0; i < n; i += 2, j++)
39866 {
39867 second[j] = gen_reg_rtx (hmode);
39868 ix86_expand_vector_init_concat (hmode, second [j],
39869 &first [i], 2);
39870 }
39871 n >>= 1;
39872 for (i = j = 0; i < n; i += 2, j++)
39873 {
39874 third[j] = gen_reg_rtx (gmode);
39875 ix86_expand_vector_init_concat (gmode, third[j],
39876 &second[i], 2);
39877 }
39878 n >>= 1;
39879 ix86_expand_vector_init_concat (mode, target, third, n);
39880 }
39881 else if (n > 2)
39882 {
39883 gcc_assert (hmode != VOIDmode);
39884 for (i = j = 0; i < n; i += 2, j++)
39885 {
39886 second[j] = gen_reg_rtx (hmode);
39887 ix86_expand_vector_init_concat (hmode, second [j],
39888 &first [i], 2);
39889 }
39890 n >>= 1;
39891 ix86_expand_vector_init_concat (mode, target, second, n);
39892 }
39893 else
39894 ix86_expand_vector_init_concat (mode, target, first, n);
39895 break;
39896
39897 default:
39898 gcc_unreachable ();
39899 }
39900 }
39901
39902 /* A subroutine of ix86_expand_vector_init_general. Use vector
39903 interleave to handle the most general case: all values variable,
39904 and none identical. */
39905
39906 static void
39907 ix86_expand_vector_init_interleave (enum machine_mode mode,
39908 rtx target, rtx *ops, int n)
39909 {
39910 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
39911 int i, j;
39912 rtx op0, op1;
39913 rtx (*gen_load_even) (rtx, rtx, rtx);
39914 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
39915 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
39916
39917 switch (mode)
39918 {
39919 case V8HImode:
39920 gen_load_even = gen_vec_setv8hi;
39921 gen_interleave_first_low = gen_vec_interleave_lowv4si;
39922 gen_interleave_second_low = gen_vec_interleave_lowv2di;
39923 inner_mode = HImode;
39924 first_imode = V4SImode;
39925 second_imode = V2DImode;
39926 third_imode = VOIDmode;
39927 break;
39928 case V16QImode:
39929 gen_load_even = gen_vec_setv16qi;
39930 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
39931 gen_interleave_second_low = gen_vec_interleave_lowv4si;
39932 inner_mode = QImode;
39933 first_imode = V8HImode;
39934 second_imode = V4SImode;
39935 third_imode = V2DImode;
39936 break;
39937 default:
39938 gcc_unreachable ();
39939 }
39940
39941 for (i = 0; i < n; i++)
39942 {
39943 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
39944 op0 = gen_reg_rtx (SImode);
39945 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
39946
39947 /* Insert the SImode value as low element of V4SImode vector. */
39948 op1 = gen_reg_rtx (V4SImode);
39949 op0 = gen_rtx_VEC_MERGE (V4SImode,
39950 gen_rtx_VEC_DUPLICATE (V4SImode,
39951 op0),
39952 CONST0_RTX (V4SImode),
39953 const1_rtx);
39954 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
39955
39956 /* Cast the V4SImode vector back to a vector in orignal mode. */
39957 op0 = gen_reg_rtx (mode);
39958 emit_move_insn (op0, gen_lowpart (mode, op1));
39959
39960 /* Load even elements into the second position. */
39961 emit_insn (gen_load_even (op0,
39962 force_reg (inner_mode,
39963 ops [i + i + 1]),
39964 const1_rtx));
39965
39966 /* Cast vector to FIRST_IMODE vector. */
39967 ops[i] = gen_reg_rtx (first_imode);
39968 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
39969 }
39970
39971 /* Interleave low FIRST_IMODE vectors. */
39972 for (i = j = 0; i < n; i += 2, j++)
39973 {
39974 op0 = gen_reg_rtx (first_imode);
39975 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
39976
39977 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
39978 ops[j] = gen_reg_rtx (second_imode);
39979 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
39980 }
39981
39982 /* Interleave low SECOND_IMODE vectors. */
39983 switch (second_imode)
39984 {
39985 case V4SImode:
39986 for (i = j = 0; i < n / 2; i += 2, j++)
39987 {
39988 op0 = gen_reg_rtx (second_imode);
39989 emit_insn (gen_interleave_second_low (op0, ops[i],
39990 ops[i + 1]));
39991
39992 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
39993 vector. */
39994 ops[j] = gen_reg_rtx (third_imode);
39995 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
39996 }
39997 second_imode = V2DImode;
39998 gen_interleave_second_low = gen_vec_interleave_lowv2di;
39999 /* FALLTHRU */
40000
40001 case V2DImode:
40002 op0 = gen_reg_rtx (second_imode);
40003 emit_insn (gen_interleave_second_low (op0, ops[0],
40004 ops[1]));
40005
40006 /* Cast the SECOND_IMODE vector back to a vector on original
40007 mode. */
40008 emit_insn (gen_rtx_SET (VOIDmode, target,
40009 gen_lowpart (mode, op0)));
40010 break;
40011
40012 default:
40013 gcc_unreachable ();
40014 }
40015 }
40016
40017 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
40018 all values variable, and none identical. */
40019
40020 static void
40021 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
40022 rtx target, rtx vals)
40023 {
40024 rtx ops[64], op0, op1;
40025 enum machine_mode half_mode = VOIDmode;
40026 int n, i;
40027
40028 switch (mode)
40029 {
40030 case V2SFmode:
40031 case V2SImode:
40032 if (!mmx_ok && !TARGET_SSE)
40033 break;
40034 /* FALLTHRU */
40035
40036 case V16SImode:
40037 case V16SFmode:
40038 case V8DFmode:
40039 case V8DImode:
40040 case V8SFmode:
40041 case V8SImode:
40042 case V4DFmode:
40043 case V4DImode:
40044 case V4SFmode:
40045 case V4SImode:
40046 case V2DFmode:
40047 case V2DImode:
40048 n = GET_MODE_NUNITS (mode);
40049 for (i = 0; i < n; i++)
40050 ops[i] = XVECEXP (vals, 0, i);
40051 ix86_expand_vector_init_concat (mode, target, ops, n);
40052 return;
40053
40054 case V32QImode:
40055 half_mode = V16QImode;
40056 goto half;
40057
40058 case V16HImode:
40059 half_mode = V8HImode;
40060 goto half;
40061
40062 half:
40063 n = GET_MODE_NUNITS (mode);
40064 for (i = 0; i < n; i++)
40065 ops[i] = XVECEXP (vals, 0, i);
40066 op0 = gen_reg_rtx (half_mode);
40067 op1 = gen_reg_rtx (half_mode);
40068 ix86_expand_vector_init_interleave (half_mode, op0, ops,
40069 n >> 2);
40070 ix86_expand_vector_init_interleave (half_mode, op1,
40071 &ops [n >> 1], n >> 2);
40072 emit_insn (gen_rtx_SET (VOIDmode, target,
40073 gen_rtx_VEC_CONCAT (mode, op0, op1)));
40074 return;
40075
40076 case V16QImode:
40077 if (!TARGET_SSE4_1)
40078 break;
40079 /* FALLTHRU */
40080
40081 case V8HImode:
40082 if (!TARGET_SSE2)
40083 break;
40084
40085 /* Don't use ix86_expand_vector_init_interleave if we can't
40086 move from GPR to SSE register directly. */
40087 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
40088 break;
40089
40090 n = GET_MODE_NUNITS (mode);
40091 for (i = 0; i < n; i++)
40092 ops[i] = XVECEXP (vals, 0, i);
40093 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
40094 return;
40095
40096 case V4HImode:
40097 case V8QImode:
40098 break;
40099
40100 default:
40101 gcc_unreachable ();
40102 }
40103
40104 {
40105 int i, j, n_elts, n_words, n_elt_per_word;
40106 enum machine_mode inner_mode;
40107 rtx words[4], shift;
40108
40109 inner_mode = GET_MODE_INNER (mode);
40110 n_elts = GET_MODE_NUNITS (mode);
40111 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
40112 n_elt_per_word = n_elts / n_words;
40113 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
40114
40115 for (i = 0; i < n_words; ++i)
40116 {
40117 rtx word = NULL_RTX;
40118
40119 for (j = 0; j < n_elt_per_word; ++j)
40120 {
40121 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
40122 elt = convert_modes (word_mode, inner_mode, elt, true);
40123
40124 if (j == 0)
40125 word = elt;
40126 else
40127 {
40128 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
40129 word, 1, OPTAB_LIB_WIDEN);
40130 word = expand_simple_binop (word_mode, IOR, word, elt,
40131 word, 1, OPTAB_LIB_WIDEN);
40132 }
40133 }
40134
40135 words[i] = word;
40136 }
40137
40138 if (n_words == 1)
40139 emit_move_insn (target, gen_lowpart (mode, words[0]));
40140 else if (n_words == 2)
40141 {
40142 rtx tmp = gen_reg_rtx (mode);
40143 emit_clobber (tmp);
40144 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
40145 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
40146 emit_move_insn (target, tmp);
40147 }
40148 else if (n_words == 4)
40149 {
40150 rtx tmp = gen_reg_rtx (V4SImode);
40151 gcc_assert (word_mode == SImode);
40152 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
40153 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
40154 emit_move_insn (target, gen_lowpart (mode, tmp));
40155 }
40156 else
40157 gcc_unreachable ();
40158 }
40159 }
40160
40161 /* Initialize vector TARGET via VALS. Suppress the use of MMX
40162 instructions unless MMX_OK is true. */
40163
40164 void
40165 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
40166 {
40167 enum machine_mode mode = GET_MODE (target);
40168 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40169 int n_elts = GET_MODE_NUNITS (mode);
40170 int n_var = 0, one_var = -1;
40171 bool all_same = true, all_const_zero = true;
40172 int i;
40173 rtx x;
40174
40175 for (i = 0; i < n_elts; ++i)
40176 {
40177 x = XVECEXP (vals, 0, i);
40178 if (!(CONST_INT_P (x)
40179 || GET_CODE (x) == CONST_DOUBLE
40180 || GET_CODE (x) == CONST_FIXED))
40181 n_var++, one_var = i;
40182 else if (x != CONST0_RTX (inner_mode))
40183 all_const_zero = false;
40184 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
40185 all_same = false;
40186 }
40187
40188 /* Constants are best loaded from the constant pool. */
40189 if (n_var == 0)
40190 {
40191 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
40192 return;
40193 }
40194
40195 /* If all values are identical, broadcast the value. */
40196 if (all_same
40197 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
40198 XVECEXP (vals, 0, 0)))
40199 return;
40200
40201 /* Values where only one field is non-constant are best loaded from
40202 the pool and overwritten via move later. */
40203 if (n_var == 1)
40204 {
40205 if (all_const_zero
40206 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
40207 XVECEXP (vals, 0, one_var),
40208 one_var))
40209 return;
40210
40211 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
40212 return;
40213 }
40214
40215 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
40216 }
40217
40218 void
40219 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
40220 {
40221 enum machine_mode mode = GET_MODE (target);
40222 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40223 enum machine_mode half_mode;
40224 bool use_vec_merge = false;
40225 rtx tmp;
40226 static rtx (*gen_extract[6][2]) (rtx, rtx)
40227 = {
40228 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
40229 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
40230 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
40231 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
40232 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
40233 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
40234 };
40235 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
40236 = {
40237 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
40238 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
40239 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
40240 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
40241 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
40242 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
40243 };
40244 int i, j, n;
40245
40246 switch (mode)
40247 {
40248 case V2SFmode:
40249 case V2SImode:
40250 if (mmx_ok)
40251 {
40252 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40253 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
40254 if (elt == 0)
40255 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40256 else
40257 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40258 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40259 return;
40260 }
40261 break;
40262
40263 case V2DImode:
40264 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
40265 if (use_vec_merge)
40266 break;
40267
40268 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40269 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
40270 if (elt == 0)
40271 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40272 else
40273 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40274 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40275 return;
40276
40277 case V2DFmode:
40278 {
40279 rtx op0, op1;
40280
40281 /* For the two element vectors, we implement a VEC_CONCAT with
40282 the extraction of the other element. */
40283
40284 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
40285 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
40286
40287 if (elt == 0)
40288 op0 = val, op1 = tmp;
40289 else
40290 op0 = tmp, op1 = val;
40291
40292 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
40293 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40294 }
40295 return;
40296
40297 case V4SFmode:
40298 use_vec_merge = TARGET_SSE4_1;
40299 if (use_vec_merge)
40300 break;
40301
40302 switch (elt)
40303 {
40304 case 0:
40305 use_vec_merge = true;
40306 break;
40307
40308 case 1:
40309 /* tmp = target = A B C D */
40310 tmp = copy_to_reg (target);
40311 /* target = A A B B */
40312 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
40313 /* target = X A B B */
40314 ix86_expand_vector_set (false, target, val, 0);
40315 /* target = A X C D */
40316 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40317 const1_rtx, const0_rtx,
40318 GEN_INT (2+4), GEN_INT (3+4)));
40319 return;
40320
40321 case 2:
40322 /* tmp = target = A B C D */
40323 tmp = copy_to_reg (target);
40324 /* tmp = X B C D */
40325 ix86_expand_vector_set (false, tmp, val, 0);
40326 /* target = A B X D */
40327 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40328 const0_rtx, const1_rtx,
40329 GEN_INT (0+4), GEN_INT (3+4)));
40330 return;
40331
40332 case 3:
40333 /* tmp = target = A B C D */
40334 tmp = copy_to_reg (target);
40335 /* tmp = X B C D */
40336 ix86_expand_vector_set (false, tmp, val, 0);
40337 /* target = A B X D */
40338 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40339 const0_rtx, const1_rtx,
40340 GEN_INT (2+4), GEN_INT (0+4)));
40341 return;
40342
40343 default:
40344 gcc_unreachable ();
40345 }
40346 break;
40347
40348 case V4SImode:
40349 use_vec_merge = TARGET_SSE4_1;
40350 if (use_vec_merge)
40351 break;
40352
40353 /* Element 0 handled by vec_merge below. */
40354 if (elt == 0)
40355 {
40356 use_vec_merge = true;
40357 break;
40358 }
40359
40360 if (TARGET_SSE2)
40361 {
40362 /* With SSE2, use integer shuffles to swap element 0 and ELT,
40363 store into element 0, then shuffle them back. */
40364
40365 rtx order[4];
40366
40367 order[0] = GEN_INT (elt);
40368 order[1] = const1_rtx;
40369 order[2] = const2_rtx;
40370 order[3] = GEN_INT (3);
40371 order[elt] = const0_rtx;
40372
40373 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40374 order[1], order[2], order[3]));
40375
40376 ix86_expand_vector_set (false, target, val, 0);
40377
40378 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40379 order[1], order[2], order[3]));
40380 }
40381 else
40382 {
40383 /* For SSE1, we have to reuse the V4SF code. */
40384 rtx t = gen_reg_rtx (V4SFmode);
40385 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
40386 emit_move_insn (target, gen_lowpart (mode, t));
40387 }
40388 return;
40389
40390 case V8HImode:
40391 use_vec_merge = TARGET_SSE2;
40392 break;
40393 case V4HImode:
40394 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40395 break;
40396
40397 case V16QImode:
40398 use_vec_merge = TARGET_SSE4_1;
40399 break;
40400
40401 case V8QImode:
40402 break;
40403
40404 case V32QImode:
40405 half_mode = V16QImode;
40406 j = 0;
40407 n = 16;
40408 goto half;
40409
40410 case V16HImode:
40411 half_mode = V8HImode;
40412 j = 1;
40413 n = 8;
40414 goto half;
40415
40416 case V8SImode:
40417 half_mode = V4SImode;
40418 j = 2;
40419 n = 4;
40420 goto half;
40421
40422 case V4DImode:
40423 half_mode = V2DImode;
40424 j = 3;
40425 n = 2;
40426 goto half;
40427
40428 case V8SFmode:
40429 half_mode = V4SFmode;
40430 j = 4;
40431 n = 4;
40432 goto half;
40433
40434 case V4DFmode:
40435 half_mode = V2DFmode;
40436 j = 5;
40437 n = 2;
40438 goto half;
40439
40440 half:
40441 /* Compute offset. */
40442 i = elt / n;
40443 elt %= n;
40444
40445 gcc_assert (i <= 1);
40446
40447 /* Extract the half. */
40448 tmp = gen_reg_rtx (half_mode);
40449 emit_insn (gen_extract[j][i] (tmp, target));
40450
40451 /* Put val in tmp at elt. */
40452 ix86_expand_vector_set (false, tmp, val, elt);
40453
40454 /* Put it back. */
40455 emit_insn (gen_insert[j][i] (target, target, tmp));
40456 return;
40457
40458 default:
40459 break;
40460 }
40461
40462 if (use_vec_merge)
40463 {
40464 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
40465 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
40466 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40467 }
40468 else
40469 {
40470 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
40471
40472 emit_move_insn (mem, target);
40473
40474 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
40475 emit_move_insn (tmp, val);
40476
40477 emit_move_insn (target, mem);
40478 }
40479 }
40480
40481 void
40482 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
40483 {
40484 enum machine_mode mode = GET_MODE (vec);
40485 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40486 bool use_vec_extr = false;
40487 rtx tmp;
40488
40489 switch (mode)
40490 {
40491 case V2SImode:
40492 case V2SFmode:
40493 if (!mmx_ok)
40494 break;
40495 /* FALLTHRU */
40496
40497 case V2DFmode:
40498 case V2DImode:
40499 use_vec_extr = true;
40500 break;
40501
40502 case V4SFmode:
40503 use_vec_extr = TARGET_SSE4_1;
40504 if (use_vec_extr)
40505 break;
40506
40507 switch (elt)
40508 {
40509 case 0:
40510 tmp = vec;
40511 break;
40512
40513 case 1:
40514 case 3:
40515 tmp = gen_reg_rtx (mode);
40516 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
40517 GEN_INT (elt), GEN_INT (elt),
40518 GEN_INT (elt+4), GEN_INT (elt+4)));
40519 break;
40520
40521 case 2:
40522 tmp = gen_reg_rtx (mode);
40523 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
40524 break;
40525
40526 default:
40527 gcc_unreachable ();
40528 }
40529 vec = tmp;
40530 use_vec_extr = true;
40531 elt = 0;
40532 break;
40533
40534 case V4SImode:
40535 use_vec_extr = TARGET_SSE4_1;
40536 if (use_vec_extr)
40537 break;
40538
40539 if (TARGET_SSE2)
40540 {
40541 switch (elt)
40542 {
40543 case 0:
40544 tmp = vec;
40545 break;
40546
40547 case 1:
40548 case 3:
40549 tmp = gen_reg_rtx (mode);
40550 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
40551 GEN_INT (elt), GEN_INT (elt),
40552 GEN_INT (elt), GEN_INT (elt)));
40553 break;
40554
40555 case 2:
40556 tmp = gen_reg_rtx (mode);
40557 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
40558 break;
40559
40560 default:
40561 gcc_unreachable ();
40562 }
40563 vec = tmp;
40564 use_vec_extr = true;
40565 elt = 0;
40566 }
40567 else
40568 {
40569 /* For SSE1, we have to reuse the V4SF code. */
40570 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
40571 gen_lowpart (V4SFmode, vec), elt);
40572 return;
40573 }
40574 break;
40575
40576 case V8HImode:
40577 use_vec_extr = TARGET_SSE2;
40578 break;
40579 case V4HImode:
40580 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40581 break;
40582
40583 case V16QImode:
40584 use_vec_extr = TARGET_SSE4_1;
40585 break;
40586
40587 case V8SFmode:
40588 if (TARGET_AVX)
40589 {
40590 tmp = gen_reg_rtx (V4SFmode);
40591 if (elt < 4)
40592 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
40593 else
40594 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
40595 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40596 return;
40597 }
40598 break;
40599
40600 case V4DFmode:
40601 if (TARGET_AVX)
40602 {
40603 tmp = gen_reg_rtx (V2DFmode);
40604 if (elt < 2)
40605 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
40606 else
40607 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
40608 ix86_expand_vector_extract (false, target, tmp, elt & 1);
40609 return;
40610 }
40611 break;
40612
40613 case V32QImode:
40614 if (TARGET_AVX)
40615 {
40616 tmp = gen_reg_rtx (V16QImode);
40617 if (elt < 16)
40618 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
40619 else
40620 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
40621 ix86_expand_vector_extract (false, target, tmp, elt & 15);
40622 return;
40623 }
40624 break;
40625
40626 case V16HImode:
40627 if (TARGET_AVX)
40628 {
40629 tmp = gen_reg_rtx (V8HImode);
40630 if (elt < 8)
40631 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
40632 else
40633 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
40634 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40635 return;
40636 }
40637 break;
40638
40639 case V8SImode:
40640 if (TARGET_AVX)
40641 {
40642 tmp = gen_reg_rtx (V4SImode);
40643 if (elt < 4)
40644 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
40645 else
40646 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
40647 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40648 return;
40649 }
40650 break;
40651
40652 case V4DImode:
40653 if (TARGET_AVX)
40654 {
40655 tmp = gen_reg_rtx (V2DImode);
40656 if (elt < 2)
40657 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
40658 else
40659 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
40660 ix86_expand_vector_extract (false, target, tmp, elt & 1);
40661 return;
40662 }
40663 break;
40664
40665 case V16SFmode:
40666 tmp = gen_reg_rtx (V8SFmode);
40667 if (elt < 8)
40668 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
40669 else
40670 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
40671 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40672 return;
40673
40674 case V8DFmode:
40675 tmp = gen_reg_rtx (V4DFmode);
40676 if (elt < 4)
40677 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
40678 else
40679 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
40680 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40681 return;
40682
40683 case V16SImode:
40684 tmp = gen_reg_rtx (V8SImode);
40685 if (elt < 8)
40686 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
40687 else
40688 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
40689 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40690 return;
40691
40692 case V8DImode:
40693 tmp = gen_reg_rtx (V4DImode);
40694 if (elt < 4)
40695 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
40696 else
40697 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
40698 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40699 return;
40700
40701 case V8QImode:
40702 /* ??? Could extract the appropriate HImode element and shift. */
40703 default:
40704 break;
40705 }
40706
40707 if (use_vec_extr)
40708 {
40709 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
40710 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
40711
40712 /* Let the rtl optimizers know about the zero extension performed. */
40713 if (inner_mode == QImode || inner_mode == HImode)
40714 {
40715 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
40716 target = gen_lowpart (SImode, target);
40717 }
40718
40719 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40720 }
40721 else
40722 {
40723 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
40724
40725 emit_move_insn (mem, vec);
40726
40727 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
40728 emit_move_insn (target, tmp);
40729 }
40730 }
40731
40732 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
40733 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
40734 The upper bits of DEST are undefined, though they shouldn't cause
40735 exceptions (some bits from src or all zeros are ok). */
40736
40737 static void
40738 emit_reduc_half (rtx dest, rtx src, int i)
40739 {
40740 rtx tem, d = dest;
40741 switch (GET_MODE (src))
40742 {
40743 case V4SFmode:
40744 if (i == 128)
40745 tem = gen_sse_movhlps (dest, src, src);
40746 else
40747 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
40748 GEN_INT (1 + 4), GEN_INT (1 + 4));
40749 break;
40750 case V2DFmode:
40751 tem = gen_vec_interleave_highv2df (dest, src, src);
40752 break;
40753 case V16QImode:
40754 case V8HImode:
40755 case V4SImode:
40756 case V2DImode:
40757 d = gen_reg_rtx (V1TImode);
40758 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
40759 GEN_INT (i / 2));
40760 break;
40761 case V8SFmode:
40762 if (i == 256)
40763 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
40764 else
40765 tem = gen_avx_shufps256 (dest, src, src,
40766 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
40767 break;
40768 case V4DFmode:
40769 if (i == 256)
40770 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
40771 else
40772 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
40773 break;
40774 case V32QImode:
40775 case V16HImode:
40776 case V8SImode:
40777 case V4DImode:
40778 if (i == 256)
40779 {
40780 if (GET_MODE (dest) != V4DImode)
40781 d = gen_reg_rtx (V4DImode);
40782 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
40783 gen_lowpart (V4DImode, src),
40784 const1_rtx);
40785 }
40786 else
40787 {
40788 d = gen_reg_rtx (V2TImode);
40789 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
40790 GEN_INT (i / 2));
40791 }
40792 break;
40793 case V16SImode:
40794 case V16SFmode:
40795 case V8DImode:
40796 case V8DFmode:
40797 if (i > 128)
40798 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
40799 gen_lowpart (V16SImode, src),
40800 gen_lowpart (V16SImode, src),
40801 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
40802 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
40803 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
40804 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
40805 GEN_INT (0xC), GEN_INT (0xD),
40806 GEN_INT (0xE), GEN_INT (0xF),
40807 GEN_INT (0x10), GEN_INT (0x11),
40808 GEN_INT (0x12), GEN_INT (0x13),
40809 GEN_INT (0x14), GEN_INT (0x15),
40810 GEN_INT (0x16), GEN_INT (0x17));
40811 else
40812 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
40813 gen_lowpart (V16SImode, src),
40814 GEN_INT (i == 128 ? 0x2 : 0x1),
40815 GEN_INT (0x3),
40816 GEN_INT (0x3),
40817 GEN_INT (0x3),
40818 GEN_INT (i == 128 ? 0x6 : 0x5),
40819 GEN_INT (0x7),
40820 GEN_INT (0x7),
40821 GEN_INT (0x7),
40822 GEN_INT (i == 128 ? 0xA : 0x9),
40823 GEN_INT (0xB),
40824 GEN_INT (0xB),
40825 GEN_INT (0xB),
40826 GEN_INT (i == 128 ? 0xE : 0xD),
40827 GEN_INT (0xF),
40828 GEN_INT (0xF),
40829 GEN_INT (0xF));
40830 break;
40831 default:
40832 gcc_unreachable ();
40833 }
40834 emit_insn (tem);
40835 if (d != dest)
40836 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
40837 }
40838
40839 /* Expand a vector reduction. FN is the binary pattern to reduce;
40840 DEST is the destination; IN is the input vector. */
40841
40842 void
40843 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
40844 {
40845 rtx half, dst, vec = in;
40846 enum machine_mode mode = GET_MODE (in);
40847 int i;
40848
40849 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
40850 if (TARGET_SSE4_1
40851 && mode == V8HImode
40852 && fn == gen_uminv8hi3)
40853 {
40854 emit_insn (gen_sse4_1_phminposuw (dest, in));
40855 return;
40856 }
40857
40858 for (i = GET_MODE_BITSIZE (mode);
40859 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
40860 i >>= 1)
40861 {
40862 half = gen_reg_rtx (mode);
40863 emit_reduc_half (half, vec, i);
40864 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
40865 dst = dest;
40866 else
40867 dst = gen_reg_rtx (mode);
40868 emit_insn (fn (dst, half, vec));
40869 vec = dst;
40870 }
40871 }
40872 \f
40873 /* Target hook for scalar_mode_supported_p. */
40874 static bool
40875 ix86_scalar_mode_supported_p (enum machine_mode mode)
40876 {
40877 if (DECIMAL_FLOAT_MODE_P (mode))
40878 return default_decimal_float_supported_p ();
40879 else if (mode == TFmode)
40880 return true;
40881 else
40882 return default_scalar_mode_supported_p (mode);
40883 }
40884
40885 /* Implements target hook vector_mode_supported_p. */
40886 static bool
40887 ix86_vector_mode_supported_p (enum machine_mode mode)
40888 {
40889 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
40890 return true;
40891 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
40892 return true;
40893 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
40894 return true;
40895 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
40896 return true;
40897 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
40898 return true;
40899 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
40900 return true;
40901 return false;
40902 }
40903
40904 /* Target hook for c_mode_for_suffix. */
40905 static enum machine_mode
40906 ix86_c_mode_for_suffix (char suffix)
40907 {
40908 if (suffix == 'q')
40909 return TFmode;
40910 if (suffix == 'w')
40911 return XFmode;
40912
40913 return VOIDmode;
40914 }
40915
40916 /* Worker function for TARGET_MD_ASM_CLOBBERS.
40917
40918 We do this in the new i386 backend to maintain source compatibility
40919 with the old cc0-based compiler. */
40920
40921 static tree
40922 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
40923 tree inputs ATTRIBUTE_UNUSED,
40924 tree clobbers)
40925 {
40926 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
40927 clobbers);
40928 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
40929 clobbers);
40930 return clobbers;
40931 }
40932
40933 /* Implements target vector targetm.asm.encode_section_info. */
40934
40935 static void ATTRIBUTE_UNUSED
40936 ix86_encode_section_info (tree decl, rtx rtl, int first)
40937 {
40938 default_encode_section_info (decl, rtl, first);
40939
40940 if (TREE_CODE (decl) == VAR_DECL
40941 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
40942 && ix86_in_large_data_p (decl))
40943 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
40944 }
40945
40946 /* Worker function for REVERSE_CONDITION. */
40947
40948 enum rtx_code
40949 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
40950 {
40951 return (mode != CCFPmode && mode != CCFPUmode
40952 ? reverse_condition (code)
40953 : reverse_condition_maybe_unordered (code));
40954 }
40955
40956 /* Output code to perform an x87 FP register move, from OPERANDS[1]
40957 to OPERANDS[0]. */
40958
40959 const char *
40960 output_387_reg_move (rtx insn, rtx *operands)
40961 {
40962 if (REG_P (operands[0]))
40963 {
40964 if (REG_P (operands[1])
40965 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
40966 {
40967 if (REGNO (operands[0]) == FIRST_STACK_REG)
40968 return output_387_ffreep (operands, 0);
40969 return "fstp\t%y0";
40970 }
40971 if (STACK_TOP_P (operands[0]))
40972 return "fld%Z1\t%y1";
40973 return "fst\t%y0";
40974 }
40975 else if (MEM_P (operands[0]))
40976 {
40977 gcc_assert (REG_P (operands[1]));
40978 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
40979 return "fstp%Z0\t%y0";
40980 else
40981 {
40982 /* There is no non-popping store to memory for XFmode.
40983 So if we need one, follow the store with a load. */
40984 if (GET_MODE (operands[0]) == XFmode)
40985 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
40986 else
40987 return "fst%Z0\t%y0";
40988 }
40989 }
40990 else
40991 gcc_unreachable();
40992 }
40993
40994 /* Output code to perform a conditional jump to LABEL, if C2 flag in
40995 FP status register is set. */
40996
40997 void
40998 ix86_emit_fp_unordered_jump (rtx label)
40999 {
41000 rtx reg = gen_reg_rtx (HImode);
41001 rtx temp;
41002
41003 emit_insn (gen_x86_fnstsw_1 (reg));
41004
41005 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
41006 {
41007 emit_insn (gen_x86_sahf_1 (reg));
41008
41009 temp = gen_rtx_REG (CCmode, FLAGS_REG);
41010 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
41011 }
41012 else
41013 {
41014 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
41015
41016 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
41017 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
41018 }
41019
41020 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
41021 gen_rtx_LABEL_REF (VOIDmode, label),
41022 pc_rtx);
41023 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
41024
41025 emit_jump_insn (temp);
41026 predict_jump (REG_BR_PROB_BASE * 10 / 100);
41027 }
41028
41029 /* Output code to perform a log1p XFmode calculation. */
41030
41031 void ix86_emit_i387_log1p (rtx op0, rtx op1)
41032 {
41033 rtx label1 = gen_label_rtx ();
41034 rtx label2 = gen_label_rtx ();
41035
41036 rtx tmp = gen_reg_rtx (XFmode);
41037 rtx tmp2 = gen_reg_rtx (XFmode);
41038 rtx test;
41039
41040 emit_insn (gen_absxf2 (tmp, op1));
41041 test = gen_rtx_GE (VOIDmode, tmp,
41042 CONST_DOUBLE_FROM_REAL_VALUE (
41043 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
41044 XFmode));
41045 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
41046
41047 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41048 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
41049 emit_jump (label2);
41050
41051 emit_label (label1);
41052 emit_move_insn (tmp, CONST1_RTX (XFmode));
41053 emit_insn (gen_addxf3 (tmp, op1, tmp));
41054 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41055 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
41056
41057 emit_label (label2);
41058 }
41059
41060 /* Emit code for round calculation. */
41061 void ix86_emit_i387_round (rtx op0, rtx op1)
41062 {
41063 enum machine_mode inmode = GET_MODE (op1);
41064 enum machine_mode outmode = GET_MODE (op0);
41065 rtx e1, e2, res, tmp, tmp1, half;
41066 rtx scratch = gen_reg_rtx (HImode);
41067 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
41068 rtx jump_label = gen_label_rtx ();
41069 rtx insn;
41070 rtx (*gen_abs) (rtx, rtx);
41071 rtx (*gen_neg) (rtx, rtx);
41072
41073 switch (inmode)
41074 {
41075 case SFmode:
41076 gen_abs = gen_abssf2;
41077 break;
41078 case DFmode:
41079 gen_abs = gen_absdf2;
41080 break;
41081 case XFmode:
41082 gen_abs = gen_absxf2;
41083 break;
41084 default:
41085 gcc_unreachable ();
41086 }
41087
41088 switch (outmode)
41089 {
41090 case SFmode:
41091 gen_neg = gen_negsf2;
41092 break;
41093 case DFmode:
41094 gen_neg = gen_negdf2;
41095 break;
41096 case XFmode:
41097 gen_neg = gen_negxf2;
41098 break;
41099 case HImode:
41100 gen_neg = gen_neghi2;
41101 break;
41102 case SImode:
41103 gen_neg = gen_negsi2;
41104 break;
41105 case DImode:
41106 gen_neg = gen_negdi2;
41107 break;
41108 default:
41109 gcc_unreachable ();
41110 }
41111
41112 e1 = gen_reg_rtx (inmode);
41113 e2 = gen_reg_rtx (inmode);
41114 res = gen_reg_rtx (outmode);
41115
41116 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
41117
41118 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
41119
41120 /* scratch = fxam(op1) */
41121 emit_insn (gen_rtx_SET (VOIDmode, scratch,
41122 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
41123 UNSPEC_FXAM)));
41124 /* e1 = fabs(op1) */
41125 emit_insn (gen_abs (e1, op1));
41126
41127 /* e2 = e1 + 0.5 */
41128 half = force_reg (inmode, half);
41129 emit_insn (gen_rtx_SET (VOIDmode, e2,
41130 gen_rtx_PLUS (inmode, e1, half)));
41131
41132 /* res = floor(e2) */
41133 if (inmode != XFmode)
41134 {
41135 tmp1 = gen_reg_rtx (XFmode);
41136
41137 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
41138 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
41139 }
41140 else
41141 tmp1 = e2;
41142
41143 switch (outmode)
41144 {
41145 case SFmode:
41146 case DFmode:
41147 {
41148 rtx tmp0 = gen_reg_rtx (XFmode);
41149
41150 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
41151
41152 emit_insn (gen_rtx_SET (VOIDmode, res,
41153 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
41154 UNSPEC_TRUNC_NOOP)));
41155 }
41156 break;
41157 case XFmode:
41158 emit_insn (gen_frndintxf2_floor (res, tmp1));
41159 break;
41160 case HImode:
41161 emit_insn (gen_lfloorxfhi2 (res, tmp1));
41162 break;
41163 case SImode:
41164 emit_insn (gen_lfloorxfsi2 (res, tmp1));
41165 break;
41166 case DImode:
41167 emit_insn (gen_lfloorxfdi2 (res, tmp1));
41168 break;
41169 default:
41170 gcc_unreachable ();
41171 }
41172
41173 /* flags = signbit(a) */
41174 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
41175
41176 /* if (flags) then res = -res */
41177 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
41178 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
41179 gen_rtx_LABEL_REF (VOIDmode, jump_label),
41180 pc_rtx);
41181 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41182 predict_jump (REG_BR_PROB_BASE * 50 / 100);
41183 JUMP_LABEL (insn) = jump_label;
41184
41185 emit_insn (gen_neg (res, res));
41186
41187 emit_label (jump_label);
41188 LABEL_NUSES (jump_label) = 1;
41189
41190 emit_move_insn (op0, res);
41191 }
41192
41193 /* Output code to perform a Newton-Rhapson approximation of a single precision
41194 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
41195
41196 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
41197 {
41198 rtx x0, x1, e0, e1;
41199
41200 x0 = gen_reg_rtx (mode);
41201 e0 = gen_reg_rtx (mode);
41202 e1 = gen_reg_rtx (mode);
41203 x1 = gen_reg_rtx (mode);
41204
41205 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
41206
41207 b = force_reg (mode, b);
41208
41209 /* x0 = rcp(b) estimate */
41210 if (mode == V16SFmode || mode == V8DFmode)
41211 emit_insn (gen_rtx_SET (VOIDmode, x0,
41212 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41213 UNSPEC_RCP14)));
41214 else
41215 emit_insn (gen_rtx_SET (VOIDmode, x0,
41216 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41217 UNSPEC_RCP)));
41218
41219 /* e0 = x0 * b */
41220 emit_insn (gen_rtx_SET (VOIDmode, e0,
41221 gen_rtx_MULT (mode, x0, b)));
41222
41223 /* e0 = x0 * e0 */
41224 emit_insn (gen_rtx_SET (VOIDmode, e0,
41225 gen_rtx_MULT (mode, x0, e0)));
41226
41227 /* e1 = x0 + x0 */
41228 emit_insn (gen_rtx_SET (VOIDmode, e1,
41229 gen_rtx_PLUS (mode, x0, x0)));
41230
41231 /* x1 = e1 - e0 */
41232 emit_insn (gen_rtx_SET (VOIDmode, x1,
41233 gen_rtx_MINUS (mode, e1, e0)));
41234
41235 /* res = a * x1 */
41236 emit_insn (gen_rtx_SET (VOIDmode, res,
41237 gen_rtx_MULT (mode, a, x1)));
41238 }
41239
41240 /* Output code to perform a Newton-Rhapson approximation of a
41241 single precision floating point [reciprocal] square root. */
41242
41243 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
41244 bool recip)
41245 {
41246 rtx x0, e0, e1, e2, e3, mthree, mhalf;
41247 REAL_VALUE_TYPE r;
41248 int unspec;
41249
41250 x0 = gen_reg_rtx (mode);
41251 e0 = gen_reg_rtx (mode);
41252 e1 = gen_reg_rtx (mode);
41253 e2 = gen_reg_rtx (mode);
41254 e3 = gen_reg_rtx (mode);
41255
41256 real_from_integer (&r, VOIDmode, -3, -1, 0);
41257 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41258
41259 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
41260 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41261 unspec = UNSPEC_RSQRT;
41262
41263 if (VECTOR_MODE_P (mode))
41264 {
41265 mthree = ix86_build_const_vector (mode, true, mthree);
41266 mhalf = ix86_build_const_vector (mode, true, mhalf);
41267 /* There is no 512-bit rsqrt. There is however rsqrt14. */
41268 if (GET_MODE_SIZE (mode) == 64)
41269 unspec = UNSPEC_RSQRT14;
41270 }
41271
41272 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
41273 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
41274
41275 a = force_reg (mode, a);
41276
41277 /* x0 = rsqrt(a) estimate */
41278 emit_insn (gen_rtx_SET (VOIDmode, x0,
41279 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
41280 unspec)));
41281
41282 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
41283 if (!recip)
41284 {
41285 rtx zero, mask;
41286
41287 zero = gen_reg_rtx (mode);
41288 mask = gen_reg_rtx (mode);
41289
41290 zero = force_reg (mode, CONST0_RTX(mode));
41291
41292 /* Handle masked compare. */
41293 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
41294 {
41295 mask = gen_reg_rtx (HImode);
41296 /* Imm value 0x4 corresponds to not-equal comparison. */
41297 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
41298 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
41299 }
41300 else
41301 {
41302 emit_insn (gen_rtx_SET (VOIDmode, mask,
41303 gen_rtx_NE (mode, zero, a)));
41304
41305 emit_insn (gen_rtx_SET (VOIDmode, x0,
41306 gen_rtx_AND (mode, x0, mask)));
41307 }
41308 }
41309
41310 /* e0 = x0 * a */
41311 emit_insn (gen_rtx_SET (VOIDmode, e0,
41312 gen_rtx_MULT (mode, x0, a)));
41313 /* e1 = e0 * x0 */
41314 emit_insn (gen_rtx_SET (VOIDmode, e1,
41315 gen_rtx_MULT (mode, e0, x0)));
41316
41317 /* e2 = e1 - 3. */
41318 mthree = force_reg (mode, mthree);
41319 emit_insn (gen_rtx_SET (VOIDmode, e2,
41320 gen_rtx_PLUS (mode, e1, mthree)));
41321
41322 mhalf = force_reg (mode, mhalf);
41323 if (recip)
41324 /* e3 = -.5 * x0 */
41325 emit_insn (gen_rtx_SET (VOIDmode, e3,
41326 gen_rtx_MULT (mode, x0, mhalf)));
41327 else
41328 /* e3 = -.5 * e0 */
41329 emit_insn (gen_rtx_SET (VOIDmode, e3,
41330 gen_rtx_MULT (mode, e0, mhalf)));
41331 /* ret = e2 * e3 */
41332 emit_insn (gen_rtx_SET (VOIDmode, res,
41333 gen_rtx_MULT (mode, e2, e3)));
41334 }
41335
41336 #ifdef TARGET_SOLARIS
41337 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
41338
41339 static void
41340 i386_solaris_elf_named_section (const char *name, unsigned int flags,
41341 tree decl)
41342 {
41343 /* With Binutils 2.15, the "@unwind" marker must be specified on
41344 every occurrence of the ".eh_frame" section, not just the first
41345 one. */
41346 if (TARGET_64BIT
41347 && strcmp (name, ".eh_frame") == 0)
41348 {
41349 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
41350 flags & SECTION_WRITE ? "aw" : "a");
41351 return;
41352 }
41353
41354 #ifndef USE_GAS
41355 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
41356 {
41357 solaris_elf_asm_comdat_section (name, flags, decl);
41358 return;
41359 }
41360 #endif
41361
41362 default_elf_asm_named_section (name, flags, decl);
41363 }
41364 #endif /* TARGET_SOLARIS */
41365
41366 /* Return the mangling of TYPE if it is an extended fundamental type. */
41367
41368 static const char *
41369 ix86_mangle_type (const_tree type)
41370 {
41371 type = TYPE_MAIN_VARIANT (type);
41372
41373 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
41374 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
41375 return NULL;
41376
41377 switch (TYPE_MODE (type))
41378 {
41379 case TFmode:
41380 /* __float128 is "g". */
41381 return "g";
41382 case XFmode:
41383 /* "long double" or __float80 is "e". */
41384 return "e";
41385 default:
41386 return NULL;
41387 }
41388 }
41389
41390 /* For 32-bit code we can save PIC register setup by using
41391 __stack_chk_fail_local hidden function instead of calling
41392 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
41393 register, so it is better to call __stack_chk_fail directly. */
41394
41395 static tree ATTRIBUTE_UNUSED
41396 ix86_stack_protect_fail (void)
41397 {
41398 return TARGET_64BIT
41399 ? default_external_stack_protect_fail ()
41400 : default_hidden_stack_protect_fail ();
41401 }
41402
41403 /* Select a format to encode pointers in exception handling data. CODE
41404 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
41405 true if the symbol may be affected by dynamic relocations.
41406
41407 ??? All x86 object file formats are capable of representing this.
41408 After all, the relocation needed is the same as for the call insn.
41409 Whether or not a particular assembler allows us to enter such, I
41410 guess we'll have to see. */
41411 int
41412 asm_preferred_eh_data_format (int code, int global)
41413 {
41414 if (flag_pic)
41415 {
41416 int type = DW_EH_PE_sdata8;
41417 if (!TARGET_64BIT
41418 || ix86_cmodel == CM_SMALL_PIC
41419 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
41420 type = DW_EH_PE_sdata4;
41421 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
41422 }
41423 if (ix86_cmodel == CM_SMALL
41424 || (ix86_cmodel == CM_MEDIUM && code))
41425 return DW_EH_PE_udata4;
41426 return DW_EH_PE_absptr;
41427 }
41428 \f
41429 /* Expand copysign from SIGN to the positive value ABS_VALUE
41430 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
41431 the sign-bit. */
41432 static void
41433 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
41434 {
41435 enum machine_mode mode = GET_MODE (sign);
41436 rtx sgn = gen_reg_rtx (mode);
41437 if (mask == NULL_RTX)
41438 {
41439 enum machine_mode vmode;
41440
41441 if (mode == SFmode)
41442 vmode = V4SFmode;
41443 else if (mode == DFmode)
41444 vmode = V2DFmode;
41445 else
41446 vmode = mode;
41447
41448 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
41449 if (!VECTOR_MODE_P (mode))
41450 {
41451 /* We need to generate a scalar mode mask in this case. */
41452 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41453 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41454 mask = gen_reg_rtx (mode);
41455 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41456 }
41457 }
41458 else
41459 mask = gen_rtx_NOT (mode, mask);
41460 emit_insn (gen_rtx_SET (VOIDmode, sgn,
41461 gen_rtx_AND (mode, mask, sign)));
41462 emit_insn (gen_rtx_SET (VOIDmode, result,
41463 gen_rtx_IOR (mode, abs_value, sgn)));
41464 }
41465
41466 /* Expand fabs (OP0) and return a new rtx that holds the result. The
41467 mask for masking out the sign-bit is stored in *SMASK, if that is
41468 non-null. */
41469 static rtx
41470 ix86_expand_sse_fabs (rtx op0, rtx *smask)
41471 {
41472 enum machine_mode vmode, mode = GET_MODE (op0);
41473 rtx xa, mask;
41474
41475 xa = gen_reg_rtx (mode);
41476 if (mode == SFmode)
41477 vmode = V4SFmode;
41478 else if (mode == DFmode)
41479 vmode = V2DFmode;
41480 else
41481 vmode = mode;
41482 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
41483 if (!VECTOR_MODE_P (mode))
41484 {
41485 /* We need to generate a scalar mode mask in this case. */
41486 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41487 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41488 mask = gen_reg_rtx (mode);
41489 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41490 }
41491 emit_insn (gen_rtx_SET (VOIDmode, xa,
41492 gen_rtx_AND (mode, op0, mask)));
41493
41494 if (smask)
41495 *smask = mask;
41496
41497 return xa;
41498 }
41499
41500 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
41501 swapping the operands if SWAP_OPERANDS is true. The expanded
41502 code is a forward jump to a newly created label in case the
41503 comparison is true. The generated label rtx is returned. */
41504 static rtx
41505 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
41506 bool swap_operands)
41507 {
41508 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
41509 rtx label, tmp;
41510
41511 if (swap_operands)
41512 {
41513 tmp = op0;
41514 op0 = op1;
41515 op1 = tmp;
41516 }
41517
41518 label = gen_label_rtx ();
41519 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
41520 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41521 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
41522 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
41523 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
41524 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
41525 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41526 JUMP_LABEL (tmp) = label;
41527
41528 return label;
41529 }
41530
41531 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
41532 using comparison code CODE. Operands are swapped for the comparison if
41533 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
41534 static rtx
41535 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
41536 bool swap_operands)
41537 {
41538 rtx (*insn)(rtx, rtx, rtx, rtx);
41539 enum machine_mode mode = GET_MODE (op0);
41540 rtx mask = gen_reg_rtx (mode);
41541
41542 if (swap_operands)
41543 {
41544 rtx tmp = op0;
41545 op0 = op1;
41546 op1 = tmp;
41547 }
41548
41549 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
41550
41551 emit_insn (insn (mask, op0, op1,
41552 gen_rtx_fmt_ee (code, mode, op0, op1)));
41553 return mask;
41554 }
41555
41556 /* Generate and return a rtx of mode MODE for 2**n where n is the number
41557 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
41558 static rtx
41559 ix86_gen_TWO52 (enum machine_mode mode)
41560 {
41561 REAL_VALUE_TYPE TWO52r;
41562 rtx TWO52;
41563
41564 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
41565 TWO52 = const_double_from_real_value (TWO52r, mode);
41566 TWO52 = force_reg (mode, TWO52);
41567
41568 return TWO52;
41569 }
41570
41571 /* Expand SSE sequence for computing lround from OP1 storing
41572 into OP0. */
41573 void
41574 ix86_expand_lround (rtx op0, rtx op1)
41575 {
41576 /* C code for the stuff we're doing below:
41577 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
41578 return (long)tmp;
41579 */
41580 enum machine_mode mode = GET_MODE (op1);
41581 const struct real_format *fmt;
41582 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
41583 rtx adj;
41584
41585 /* load nextafter (0.5, 0.0) */
41586 fmt = REAL_MODE_FORMAT (mode);
41587 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
41588 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
41589
41590 /* adj = copysign (0.5, op1) */
41591 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
41592 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
41593
41594 /* adj = op1 + adj */
41595 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
41596
41597 /* op0 = (imode)adj */
41598 expand_fix (op0, adj, 0);
41599 }
41600
41601 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
41602 into OPERAND0. */
41603 void
41604 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
41605 {
41606 /* C code for the stuff we're doing below (for do_floor):
41607 xi = (long)op1;
41608 xi -= (double)xi > op1 ? 1 : 0;
41609 return xi;
41610 */
41611 enum machine_mode fmode = GET_MODE (op1);
41612 enum machine_mode imode = GET_MODE (op0);
41613 rtx ireg, freg, label, tmp;
41614
41615 /* reg = (long)op1 */
41616 ireg = gen_reg_rtx (imode);
41617 expand_fix (ireg, op1, 0);
41618
41619 /* freg = (double)reg */
41620 freg = gen_reg_rtx (fmode);
41621 expand_float (freg, ireg, 0);
41622
41623 /* ireg = (freg > op1) ? ireg - 1 : ireg */
41624 label = ix86_expand_sse_compare_and_jump (UNLE,
41625 freg, op1, !do_floor);
41626 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
41627 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
41628 emit_move_insn (ireg, tmp);
41629
41630 emit_label (label);
41631 LABEL_NUSES (label) = 1;
41632
41633 emit_move_insn (op0, ireg);
41634 }
41635
41636 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
41637 result in OPERAND0. */
41638 void
41639 ix86_expand_rint (rtx operand0, rtx operand1)
41640 {
41641 /* C code for the stuff we're doing below:
41642 xa = fabs (operand1);
41643 if (!isless (xa, 2**52))
41644 return operand1;
41645 xa = xa + 2**52 - 2**52;
41646 return copysign (xa, operand1);
41647 */
41648 enum machine_mode mode = GET_MODE (operand0);
41649 rtx res, xa, label, TWO52, mask;
41650
41651 res = gen_reg_rtx (mode);
41652 emit_move_insn (res, operand1);
41653
41654 /* xa = abs (operand1) */
41655 xa = ix86_expand_sse_fabs (res, &mask);
41656
41657 /* if (!isless (xa, TWO52)) goto label; */
41658 TWO52 = ix86_gen_TWO52 (mode);
41659 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41660
41661 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
41662 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
41663
41664 ix86_sse_copysign_to_positive (res, xa, res, mask);
41665
41666 emit_label (label);
41667 LABEL_NUSES (label) = 1;
41668
41669 emit_move_insn (operand0, res);
41670 }
41671
41672 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
41673 into OPERAND0. */
41674 void
41675 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
41676 {
41677 /* C code for the stuff we expand below.
41678 double xa = fabs (x), x2;
41679 if (!isless (xa, TWO52))
41680 return x;
41681 xa = xa + TWO52 - TWO52;
41682 x2 = copysign (xa, x);
41683 Compensate. Floor:
41684 if (x2 > x)
41685 x2 -= 1;
41686 Compensate. Ceil:
41687 if (x2 < x)
41688 x2 -= -1;
41689 return x2;
41690 */
41691 enum machine_mode mode = GET_MODE (operand0);
41692 rtx xa, TWO52, tmp, label, one, res, mask;
41693
41694 TWO52 = ix86_gen_TWO52 (mode);
41695
41696 /* Temporary for holding the result, initialized to the input
41697 operand to ease control flow. */
41698 res = gen_reg_rtx (mode);
41699 emit_move_insn (res, operand1);
41700
41701 /* xa = abs (operand1) */
41702 xa = ix86_expand_sse_fabs (res, &mask);
41703
41704 /* if (!isless (xa, TWO52)) goto label; */
41705 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41706
41707 /* xa = xa + TWO52 - TWO52; */
41708 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
41709 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
41710
41711 /* xa = copysign (xa, operand1) */
41712 ix86_sse_copysign_to_positive (xa, xa, res, mask);
41713
41714 /* generate 1.0 or -1.0 */
41715 one = force_reg (mode,
41716 const_double_from_real_value (do_floor
41717 ? dconst1 : dconstm1, mode));
41718
41719 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
41720 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
41721 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41722 gen_rtx_AND (mode, one, tmp)));
41723 /* We always need to subtract here to preserve signed zero. */
41724 tmp = expand_simple_binop (mode, MINUS,
41725 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
41726 emit_move_insn (res, tmp);
41727
41728 emit_label (label);
41729 LABEL_NUSES (label) = 1;
41730
41731 emit_move_insn (operand0, res);
41732 }
41733
41734 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
41735 into OPERAND0. */
41736 void
41737 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
41738 {
41739 /* C code for the stuff we expand below.
41740 double xa = fabs (x), x2;
41741 if (!isless (xa, TWO52))
41742 return x;
41743 x2 = (double)(long)x;
41744 Compensate. Floor:
41745 if (x2 > x)
41746 x2 -= 1;
41747 Compensate. Ceil:
41748 if (x2 < x)
41749 x2 += 1;
41750 if (HONOR_SIGNED_ZEROS (mode))
41751 return copysign (x2, x);
41752 return x2;
41753 */
41754 enum machine_mode mode = GET_MODE (operand0);
41755 rtx xa, xi, TWO52, tmp, label, one, res, mask;
41756
41757 TWO52 = ix86_gen_TWO52 (mode);
41758
41759 /* Temporary for holding the result, initialized to the input
41760 operand to ease control flow. */
41761 res = gen_reg_rtx (mode);
41762 emit_move_insn (res, operand1);
41763
41764 /* xa = abs (operand1) */
41765 xa = ix86_expand_sse_fabs (res, &mask);
41766
41767 /* if (!isless (xa, TWO52)) goto label; */
41768 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41769
41770 /* xa = (double)(long)x */
41771 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
41772 expand_fix (xi, res, 0);
41773 expand_float (xa, xi, 0);
41774
41775 /* generate 1.0 */
41776 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
41777
41778 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
41779 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
41780 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41781 gen_rtx_AND (mode, one, tmp)));
41782 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
41783 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
41784 emit_move_insn (res, tmp);
41785
41786 if (HONOR_SIGNED_ZEROS (mode))
41787 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
41788
41789 emit_label (label);
41790 LABEL_NUSES (label) = 1;
41791
41792 emit_move_insn (operand0, res);
41793 }
41794
41795 /* Expand SSE sequence for computing round from OPERAND1 storing
41796 into OPERAND0. Sequence that works without relying on DImode truncation
41797 via cvttsd2siq that is only available on 64bit targets. */
41798 void
41799 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
41800 {
41801 /* C code for the stuff we expand below.
41802 double xa = fabs (x), xa2, x2;
41803 if (!isless (xa, TWO52))
41804 return x;
41805 Using the absolute value and copying back sign makes
41806 -0.0 -> -0.0 correct.
41807 xa2 = xa + TWO52 - TWO52;
41808 Compensate.
41809 dxa = xa2 - xa;
41810 if (dxa <= -0.5)
41811 xa2 += 1;
41812 else if (dxa > 0.5)
41813 xa2 -= 1;
41814 x2 = copysign (xa2, x);
41815 return x2;
41816 */
41817 enum machine_mode mode = GET_MODE (operand0);
41818 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
41819
41820 TWO52 = ix86_gen_TWO52 (mode);
41821
41822 /* Temporary for holding the result, initialized to the input
41823 operand to ease control flow. */
41824 res = gen_reg_rtx (mode);
41825 emit_move_insn (res, operand1);
41826
41827 /* xa = abs (operand1) */
41828 xa = ix86_expand_sse_fabs (res, &mask);
41829
41830 /* if (!isless (xa, TWO52)) goto label; */
41831 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41832
41833 /* xa2 = xa + TWO52 - TWO52; */
41834 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
41835 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
41836
41837 /* dxa = xa2 - xa; */
41838 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
41839
41840 /* generate 0.5, 1.0 and -0.5 */
41841 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
41842 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
41843 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
41844 0, OPTAB_DIRECT);
41845
41846 /* Compensate. */
41847 tmp = gen_reg_rtx (mode);
41848 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
41849 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
41850 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41851 gen_rtx_AND (mode, one, tmp)));
41852 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
41853 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
41854 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
41855 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41856 gen_rtx_AND (mode, one, tmp)));
41857 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
41858
41859 /* res = copysign (xa2, operand1) */
41860 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
41861
41862 emit_label (label);
41863 LABEL_NUSES (label) = 1;
41864
41865 emit_move_insn (operand0, res);
41866 }
41867
41868 /* Expand SSE sequence for computing trunc from OPERAND1 storing
41869 into OPERAND0. */
41870 void
41871 ix86_expand_trunc (rtx operand0, rtx operand1)
41872 {
41873 /* C code for SSE variant we expand below.
41874 double xa = fabs (x), x2;
41875 if (!isless (xa, TWO52))
41876 return x;
41877 x2 = (double)(long)x;
41878 if (HONOR_SIGNED_ZEROS (mode))
41879 return copysign (x2, x);
41880 return x2;
41881 */
41882 enum machine_mode mode = GET_MODE (operand0);
41883 rtx xa, xi, TWO52, label, res, mask;
41884
41885 TWO52 = ix86_gen_TWO52 (mode);
41886
41887 /* Temporary for holding the result, initialized to the input
41888 operand to ease control flow. */
41889 res = gen_reg_rtx (mode);
41890 emit_move_insn (res, operand1);
41891
41892 /* xa = abs (operand1) */
41893 xa = ix86_expand_sse_fabs (res, &mask);
41894
41895 /* if (!isless (xa, TWO52)) goto label; */
41896 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41897
41898 /* x = (double)(long)x */
41899 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
41900 expand_fix (xi, res, 0);
41901 expand_float (res, xi, 0);
41902
41903 if (HONOR_SIGNED_ZEROS (mode))
41904 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
41905
41906 emit_label (label);
41907 LABEL_NUSES (label) = 1;
41908
41909 emit_move_insn (operand0, res);
41910 }
41911
41912 /* Expand SSE sequence for computing trunc from OPERAND1 storing
41913 into OPERAND0. */
41914 void
41915 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
41916 {
41917 enum machine_mode mode = GET_MODE (operand0);
41918 rtx xa, mask, TWO52, label, one, res, smask, tmp;
41919
41920 /* C code for SSE variant we expand below.
41921 double xa = fabs (x), x2;
41922 if (!isless (xa, TWO52))
41923 return x;
41924 xa2 = xa + TWO52 - TWO52;
41925 Compensate:
41926 if (xa2 > xa)
41927 xa2 -= 1.0;
41928 x2 = copysign (xa2, x);
41929 return x2;
41930 */
41931
41932 TWO52 = ix86_gen_TWO52 (mode);
41933
41934 /* Temporary for holding the result, initialized to the input
41935 operand to ease control flow. */
41936 res = gen_reg_rtx (mode);
41937 emit_move_insn (res, operand1);
41938
41939 /* xa = abs (operand1) */
41940 xa = ix86_expand_sse_fabs (res, &smask);
41941
41942 /* if (!isless (xa, TWO52)) goto label; */
41943 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41944
41945 /* res = xa + TWO52 - TWO52; */
41946 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
41947 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
41948 emit_move_insn (res, tmp);
41949
41950 /* generate 1.0 */
41951 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
41952
41953 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
41954 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
41955 emit_insn (gen_rtx_SET (VOIDmode, mask,
41956 gen_rtx_AND (mode, mask, one)));
41957 tmp = expand_simple_binop (mode, MINUS,
41958 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
41959 emit_move_insn (res, tmp);
41960
41961 /* res = copysign (res, operand1) */
41962 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
41963
41964 emit_label (label);
41965 LABEL_NUSES (label) = 1;
41966
41967 emit_move_insn (operand0, res);
41968 }
41969
41970 /* Expand SSE sequence for computing round from OPERAND1 storing
41971 into OPERAND0. */
41972 void
41973 ix86_expand_round (rtx operand0, rtx operand1)
41974 {
41975 /* C code for the stuff we're doing below:
41976 double xa = fabs (x);
41977 if (!isless (xa, TWO52))
41978 return x;
41979 xa = (double)(long)(xa + nextafter (0.5, 0.0));
41980 return copysign (xa, x);
41981 */
41982 enum machine_mode mode = GET_MODE (operand0);
41983 rtx res, TWO52, xa, label, xi, half, mask;
41984 const struct real_format *fmt;
41985 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
41986
41987 /* Temporary for holding the result, initialized to the input
41988 operand to ease control flow. */
41989 res = gen_reg_rtx (mode);
41990 emit_move_insn (res, operand1);
41991
41992 TWO52 = ix86_gen_TWO52 (mode);
41993 xa = ix86_expand_sse_fabs (res, &mask);
41994 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41995
41996 /* load nextafter (0.5, 0.0) */
41997 fmt = REAL_MODE_FORMAT (mode);
41998 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
41999 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42000
42001 /* xa = xa + 0.5 */
42002 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
42003 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
42004
42005 /* xa = (double)(int64_t)xa */
42006 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42007 expand_fix (xi, xa, 0);
42008 expand_float (xa, xi, 0);
42009
42010 /* res = copysign (xa, operand1) */
42011 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
42012
42013 emit_label (label);
42014 LABEL_NUSES (label) = 1;
42015
42016 emit_move_insn (operand0, res);
42017 }
42018
42019 /* Expand SSE sequence for computing round
42020 from OP1 storing into OP0 using sse4 round insn. */
42021 void
42022 ix86_expand_round_sse4 (rtx op0, rtx op1)
42023 {
42024 enum machine_mode mode = GET_MODE (op0);
42025 rtx e1, e2, res, half;
42026 const struct real_format *fmt;
42027 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42028 rtx (*gen_copysign) (rtx, rtx, rtx);
42029 rtx (*gen_round) (rtx, rtx, rtx);
42030
42031 switch (mode)
42032 {
42033 case SFmode:
42034 gen_copysign = gen_copysignsf3;
42035 gen_round = gen_sse4_1_roundsf2;
42036 break;
42037 case DFmode:
42038 gen_copysign = gen_copysigndf3;
42039 gen_round = gen_sse4_1_rounddf2;
42040 break;
42041 default:
42042 gcc_unreachable ();
42043 }
42044
42045 /* round (a) = trunc (a + copysign (0.5, a)) */
42046
42047 /* load nextafter (0.5, 0.0) */
42048 fmt = REAL_MODE_FORMAT (mode);
42049 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42050 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42051 half = const_double_from_real_value (pred_half, mode);
42052
42053 /* e1 = copysign (0.5, op1) */
42054 e1 = gen_reg_rtx (mode);
42055 emit_insn (gen_copysign (e1, half, op1));
42056
42057 /* e2 = op1 + e1 */
42058 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
42059
42060 /* res = trunc (e2) */
42061 res = gen_reg_rtx (mode);
42062 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
42063
42064 emit_move_insn (op0, res);
42065 }
42066 \f
42067
42068 /* Table of valid machine attributes. */
42069 static const struct attribute_spec ix86_attribute_table[] =
42070 {
42071 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
42072 affects_type_identity } */
42073 /* Stdcall attribute says callee is responsible for popping arguments
42074 if they are not variable. */
42075 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42076 true },
42077 /* Fastcall attribute says callee is responsible for popping arguments
42078 if they are not variable. */
42079 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42080 true },
42081 /* Thiscall attribute says callee is responsible for popping arguments
42082 if they are not variable. */
42083 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42084 true },
42085 /* Cdecl attribute says the callee is a normal C declaration */
42086 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42087 true },
42088 /* Regparm attribute specifies how many integer arguments are to be
42089 passed in registers. */
42090 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
42091 true },
42092 /* Sseregparm attribute says we are using x86_64 calling conventions
42093 for FP arguments. */
42094 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42095 true },
42096 /* The transactional memory builtins are implicitly regparm or fastcall
42097 depending on the ABI. Override the generic do-nothing attribute that
42098 these builtins were declared with. */
42099 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
42100 true },
42101 /* force_align_arg_pointer says this function realigns the stack at entry. */
42102 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
42103 false, true, true, ix86_handle_cconv_attribute, false },
42104 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42105 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
42106 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
42107 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
42108 false },
42109 #endif
42110 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42111 false },
42112 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42113 false },
42114 #ifdef SUBTARGET_ATTRIBUTE_TABLE
42115 SUBTARGET_ATTRIBUTE_TABLE,
42116 #endif
42117 /* ms_abi and sysv_abi calling convention function attributes. */
42118 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42119 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42120 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
42121 false },
42122 { "callee_pop_aggregate_return", 1, 1, false, true, true,
42123 ix86_handle_callee_pop_aggregate_return, true },
42124 /* End element. */
42125 { NULL, 0, 0, false, false, false, NULL, false }
42126 };
42127
42128 /* Implement targetm.vectorize.builtin_vectorization_cost. */
42129 static int
42130 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
42131 tree vectype,
42132 int misalign ATTRIBUTE_UNUSED)
42133 {
42134 unsigned elements;
42135
42136 switch (type_of_cost)
42137 {
42138 case scalar_stmt:
42139 return ix86_cost->scalar_stmt_cost;
42140
42141 case scalar_load:
42142 return ix86_cost->scalar_load_cost;
42143
42144 case scalar_store:
42145 return ix86_cost->scalar_store_cost;
42146
42147 case vector_stmt:
42148 return ix86_cost->vec_stmt_cost;
42149
42150 case vector_load:
42151 return ix86_cost->vec_align_load_cost;
42152
42153 case vector_store:
42154 return ix86_cost->vec_store_cost;
42155
42156 case vec_to_scalar:
42157 return ix86_cost->vec_to_scalar_cost;
42158
42159 case scalar_to_vec:
42160 return ix86_cost->scalar_to_vec_cost;
42161
42162 case unaligned_load:
42163 case unaligned_store:
42164 return ix86_cost->vec_unalign_load_cost;
42165
42166 case cond_branch_taken:
42167 return ix86_cost->cond_taken_branch_cost;
42168
42169 case cond_branch_not_taken:
42170 return ix86_cost->cond_not_taken_branch_cost;
42171
42172 case vec_perm:
42173 case vec_promote_demote:
42174 return ix86_cost->vec_stmt_cost;
42175
42176 case vec_construct:
42177 elements = TYPE_VECTOR_SUBPARTS (vectype);
42178 return elements / 2 + 1;
42179
42180 default:
42181 gcc_unreachable ();
42182 }
42183 }
42184
42185 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
42186 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
42187 insn every time. */
42188
42189 static GTY(()) rtx vselect_insn;
42190
42191 /* Initialize vselect_insn. */
42192
42193 static void
42194 init_vselect_insn (void)
42195 {
42196 unsigned i;
42197 rtx x;
42198
42199 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
42200 for (i = 0; i < MAX_VECT_LEN; ++i)
42201 XVECEXP (x, 0, i) = const0_rtx;
42202 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
42203 const0_rtx), x);
42204 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
42205 start_sequence ();
42206 vselect_insn = emit_insn (x);
42207 end_sequence ();
42208 }
42209
42210 /* Construct (set target (vec_select op0 (parallel perm))) and
42211 return true if that's a valid instruction in the active ISA. */
42212
42213 static bool
42214 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
42215 unsigned nelt, bool testing_p)
42216 {
42217 unsigned int i;
42218 rtx x, save_vconcat;
42219 int icode;
42220
42221 if (vselect_insn == NULL_RTX)
42222 init_vselect_insn ();
42223
42224 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
42225 PUT_NUM_ELEM (XVEC (x, 0), nelt);
42226 for (i = 0; i < nelt; ++i)
42227 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
42228 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42229 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
42230 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
42231 SET_DEST (PATTERN (vselect_insn)) = target;
42232 icode = recog_memoized (vselect_insn);
42233
42234 if (icode >= 0 && !testing_p)
42235 emit_insn (copy_rtx (PATTERN (vselect_insn)));
42236
42237 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
42238 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
42239 INSN_CODE (vselect_insn) = -1;
42240
42241 return icode >= 0;
42242 }
42243
42244 /* Similar, but generate a vec_concat from op0 and op1 as well. */
42245
42246 static bool
42247 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
42248 const unsigned char *perm, unsigned nelt,
42249 bool testing_p)
42250 {
42251 enum machine_mode v2mode;
42252 rtx x;
42253 bool ok;
42254
42255 if (vselect_insn == NULL_RTX)
42256 init_vselect_insn ();
42257
42258 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
42259 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42260 PUT_MODE (x, v2mode);
42261 XEXP (x, 0) = op0;
42262 XEXP (x, 1) = op1;
42263 ok = expand_vselect (target, x, perm, nelt, testing_p);
42264 XEXP (x, 0) = const0_rtx;
42265 XEXP (x, 1) = const0_rtx;
42266 return ok;
42267 }
42268
42269 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42270 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
42271
42272 static bool
42273 expand_vec_perm_blend (struct expand_vec_perm_d *d)
42274 {
42275 enum machine_mode vmode = d->vmode;
42276 unsigned i, mask, nelt = d->nelt;
42277 rtx target, op0, op1, x;
42278 rtx rperm[32], vperm;
42279
42280 if (d->one_operand_p)
42281 return false;
42282 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
42283 ;
42284 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
42285 ;
42286 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
42287 ;
42288 else
42289 return false;
42290
42291 /* This is a blend, not a permute. Elements must stay in their
42292 respective lanes. */
42293 for (i = 0; i < nelt; ++i)
42294 {
42295 unsigned e = d->perm[i];
42296 if (!(e == i || e == i + nelt))
42297 return false;
42298 }
42299
42300 if (d->testing_p)
42301 return true;
42302
42303 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
42304 decision should be extracted elsewhere, so that we only try that
42305 sequence once all budget==3 options have been tried. */
42306 target = d->target;
42307 op0 = d->op0;
42308 op1 = d->op1;
42309 mask = 0;
42310
42311 switch (vmode)
42312 {
42313 case V4DFmode:
42314 case V8SFmode:
42315 case V2DFmode:
42316 case V4SFmode:
42317 case V8HImode:
42318 case V8SImode:
42319 for (i = 0; i < nelt; ++i)
42320 mask |= (d->perm[i] >= nelt) << i;
42321 break;
42322
42323 case V2DImode:
42324 for (i = 0; i < 2; ++i)
42325 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
42326 vmode = V8HImode;
42327 goto do_subreg;
42328
42329 case V4SImode:
42330 for (i = 0; i < 4; ++i)
42331 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42332 vmode = V8HImode;
42333 goto do_subreg;
42334
42335 case V16QImode:
42336 /* See if bytes move in pairs so we can use pblendw with
42337 an immediate argument, rather than pblendvb with a vector
42338 argument. */
42339 for (i = 0; i < 16; i += 2)
42340 if (d->perm[i] + 1 != d->perm[i + 1])
42341 {
42342 use_pblendvb:
42343 for (i = 0; i < nelt; ++i)
42344 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
42345
42346 finish_pblendvb:
42347 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
42348 vperm = force_reg (vmode, vperm);
42349
42350 if (GET_MODE_SIZE (vmode) == 16)
42351 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
42352 else
42353 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
42354 if (target != d->target)
42355 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42356 return true;
42357 }
42358
42359 for (i = 0; i < 8; ++i)
42360 mask |= (d->perm[i * 2] >= 16) << i;
42361 vmode = V8HImode;
42362 /* FALLTHRU */
42363
42364 do_subreg:
42365 target = gen_reg_rtx (vmode);
42366 op0 = gen_lowpart (vmode, op0);
42367 op1 = gen_lowpart (vmode, op1);
42368 break;
42369
42370 case V32QImode:
42371 /* See if bytes move in pairs. If not, vpblendvb must be used. */
42372 for (i = 0; i < 32; i += 2)
42373 if (d->perm[i] + 1 != d->perm[i + 1])
42374 goto use_pblendvb;
42375 /* See if bytes move in quadruplets. If yes, vpblendd
42376 with immediate can be used. */
42377 for (i = 0; i < 32; i += 4)
42378 if (d->perm[i] + 2 != d->perm[i + 2])
42379 break;
42380 if (i < 32)
42381 {
42382 /* See if bytes move the same in both lanes. If yes,
42383 vpblendw with immediate can be used. */
42384 for (i = 0; i < 16; i += 2)
42385 if (d->perm[i] + 16 != d->perm[i + 16])
42386 goto use_pblendvb;
42387
42388 /* Use vpblendw. */
42389 for (i = 0; i < 16; ++i)
42390 mask |= (d->perm[i * 2] >= 32) << i;
42391 vmode = V16HImode;
42392 goto do_subreg;
42393 }
42394
42395 /* Use vpblendd. */
42396 for (i = 0; i < 8; ++i)
42397 mask |= (d->perm[i * 4] >= 32) << i;
42398 vmode = V8SImode;
42399 goto do_subreg;
42400
42401 case V16HImode:
42402 /* See if words move in pairs. If yes, vpblendd can be used. */
42403 for (i = 0; i < 16; i += 2)
42404 if (d->perm[i] + 1 != d->perm[i + 1])
42405 break;
42406 if (i < 16)
42407 {
42408 /* See if words move the same in both lanes. If not,
42409 vpblendvb must be used. */
42410 for (i = 0; i < 8; i++)
42411 if (d->perm[i] + 8 != d->perm[i + 8])
42412 {
42413 /* Use vpblendvb. */
42414 for (i = 0; i < 32; ++i)
42415 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
42416
42417 vmode = V32QImode;
42418 nelt = 32;
42419 target = gen_reg_rtx (vmode);
42420 op0 = gen_lowpart (vmode, op0);
42421 op1 = gen_lowpart (vmode, op1);
42422 goto finish_pblendvb;
42423 }
42424
42425 /* Use vpblendw. */
42426 for (i = 0; i < 16; ++i)
42427 mask |= (d->perm[i] >= 16) << i;
42428 break;
42429 }
42430
42431 /* Use vpblendd. */
42432 for (i = 0; i < 8; ++i)
42433 mask |= (d->perm[i * 2] >= 16) << i;
42434 vmode = V8SImode;
42435 goto do_subreg;
42436
42437 case V4DImode:
42438 /* Use vpblendd. */
42439 for (i = 0; i < 4; ++i)
42440 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42441 vmode = V8SImode;
42442 goto do_subreg;
42443
42444 default:
42445 gcc_unreachable ();
42446 }
42447
42448 /* This matches five different patterns with the different modes. */
42449 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
42450 x = gen_rtx_SET (VOIDmode, target, x);
42451 emit_insn (x);
42452 if (target != d->target)
42453 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42454
42455 return true;
42456 }
42457
42458 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42459 in terms of the variable form of vpermilps.
42460
42461 Note that we will have already failed the immediate input vpermilps,
42462 which requires that the high and low part shuffle be identical; the
42463 variable form doesn't require that. */
42464
42465 static bool
42466 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
42467 {
42468 rtx rperm[8], vperm;
42469 unsigned i;
42470
42471 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
42472 return false;
42473
42474 /* We can only permute within the 128-bit lane. */
42475 for (i = 0; i < 8; ++i)
42476 {
42477 unsigned e = d->perm[i];
42478 if (i < 4 ? e >= 4 : e < 4)
42479 return false;
42480 }
42481
42482 if (d->testing_p)
42483 return true;
42484
42485 for (i = 0; i < 8; ++i)
42486 {
42487 unsigned e = d->perm[i];
42488
42489 /* Within each 128-bit lane, the elements of op0 are numbered
42490 from 0 and the elements of op1 are numbered from 4. */
42491 if (e >= 8 + 4)
42492 e -= 8;
42493 else if (e >= 4)
42494 e -= 4;
42495
42496 rperm[i] = GEN_INT (e);
42497 }
42498
42499 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
42500 vperm = force_reg (V8SImode, vperm);
42501 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
42502
42503 return true;
42504 }
42505
42506 /* Return true if permutation D can be performed as VMODE permutation
42507 instead. */
42508
42509 static bool
42510 valid_perm_using_mode_p (enum machine_mode vmode, struct expand_vec_perm_d *d)
42511 {
42512 unsigned int i, j, chunk;
42513
42514 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
42515 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
42516 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
42517 return false;
42518
42519 if (GET_MODE_NUNITS (vmode) >= d->nelt)
42520 return true;
42521
42522 chunk = d->nelt / GET_MODE_NUNITS (vmode);
42523 for (i = 0; i < d->nelt; i += chunk)
42524 if (d->perm[i] & (chunk - 1))
42525 return false;
42526 else
42527 for (j = 1; j < chunk; ++j)
42528 if (d->perm[i] + j != d->perm[i + j])
42529 return false;
42530
42531 return true;
42532 }
42533
42534 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42535 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
42536
42537 static bool
42538 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
42539 {
42540 unsigned i, nelt, eltsz, mask;
42541 unsigned char perm[32];
42542 enum machine_mode vmode = V16QImode;
42543 rtx rperm[32], vperm, target, op0, op1;
42544
42545 nelt = d->nelt;
42546
42547 if (!d->one_operand_p)
42548 {
42549 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
42550 {
42551 if (TARGET_AVX2
42552 && valid_perm_using_mode_p (V2TImode, d))
42553 {
42554 if (d->testing_p)
42555 return true;
42556
42557 /* Use vperm2i128 insn. The pattern uses
42558 V4DImode instead of V2TImode. */
42559 target = d->target;
42560 if (d->vmode != V4DImode)
42561 target = gen_reg_rtx (V4DImode);
42562 op0 = gen_lowpart (V4DImode, d->op0);
42563 op1 = gen_lowpart (V4DImode, d->op1);
42564 rperm[0]
42565 = GEN_INT (((d->perm[0] & (nelt / 2)) ? 1 : 0)
42566 || ((d->perm[nelt / 2] & (nelt / 2)) ? 2 : 0));
42567 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
42568 if (target != d->target)
42569 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42570 return true;
42571 }
42572 return false;
42573 }
42574 }
42575 else
42576 {
42577 if (GET_MODE_SIZE (d->vmode) == 16)
42578 {
42579 if (!TARGET_SSSE3)
42580 return false;
42581 }
42582 else if (GET_MODE_SIZE (d->vmode) == 32)
42583 {
42584 if (!TARGET_AVX2)
42585 return false;
42586
42587 /* V4DImode should be already handled through
42588 expand_vselect by vpermq instruction. */
42589 gcc_assert (d->vmode != V4DImode);
42590
42591 vmode = V32QImode;
42592 if (d->vmode == V8SImode
42593 || d->vmode == V16HImode
42594 || d->vmode == V32QImode)
42595 {
42596 /* First see if vpermq can be used for
42597 V8SImode/V16HImode/V32QImode. */
42598 if (valid_perm_using_mode_p (V4DImode, d))
42599 {
42600 for (i = 0; i < 4; i++)
42601 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
42602 if (d->testing_p)
42603 return true;
42604 target = gen_reg_rtx (V4DImode);
42605 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
42606 perm, 4, false))
42607 {
42608 emit_move_insn (d->target,
42609 gen_lowpart (d->vmode, target));
42610 return true;
42611 }
42612 return false;
42613 }
42614
42615 /* Next see if vpermd can be used. */
42616 if (valid_perm_using_mode_p (V8SImode, d))
42617 vmode = V8SImode;
42618 }
42619 /* Or if vpermps can be used. */
42620 else if (d->vmode == V8SFmode)
42621 vmode = V8SImode;
42622
42623 if (vmode == V32QImode)
42624 {
42625 /* vpshufb only works intra lanes, it is not
42626 possible to shuffle bytes in between the lanes. */
42627 for (i = 0; i < nelt; ++i)
42628 if ((d->perm[i] ^ i) & (nelt / 2))
42629 return false;
42630 }
42631 }
42632 else
42633 return false;
42634 }
42635
42636 if (d->testing_p)
42637 return true;
42638
42639 if (vmode == V8SImode)
42640 for (i = 0; i < 8; ++i)
42641 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
42642 else
42643 {
42644 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
42645 if (!d->one_operand_p)
42646 mask = 2 * nelt - 1;
42647 else if (vmode == V16QImode)
42648 mask = nelt - 1;
42649 else
42650 mask = nelt / 2 - 1;
42651
42652 for (i = 0; i < nelt; ++i)
42653 {
42654 unsigned j, e = d->perm[i] & mask;
42655 for (j = 0; j < eltsz; ++j)
42656 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
42657 }
42658 }
42659
42660 vperm = gen_rtx_CONST_VECTOR (vmode,
42661 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
42662 vperm = force_reg (vmode, vperm);
42663
42664 target = d->target;
42665 if (d->vmode != vmode)
42666 target = gen_reg_rtx (vmode);
42667 op0 = gen_lowpart (vmode, d->op0);
42668 if (d->one_operand_p)
42669 {
42670 if (vmode == V16QImode)
42671 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
42672 else if (vmode == V32QImode)
42673 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
42674 else if (vmode == V8SFmode)
42675 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
42676 else
42677 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
42678 }
42679 else
42680 {
42681 op1 = gen_lowpart (vmode, d->op1);
42682 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
42683 }
42684 if (target != d->target)
42685 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42686
42687 return true;
42688 }
42689
42690 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
42691 in a single instruction. */
42692
42693 static bool
42694 expand_vec_perm_1 (struct expand_vec_perm_d *d)
42695 {
42696 unsigned i, nelt = d->nelt;
42697 unsigned char perm2[MAX_VECT_LEN];
42698
42699 /* Check plain VEC_SELECT first, because AVX has instructions that could
42700 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
42701 input where SEL+CONCAT may not. */
42702 if (d->one_operand_p)
42703 {
42704 int mask = nelt - 1;
42705 bool identity_perm = true;
42706 bool broadcast_perm = true;
42707
42708 for (i = 0; i < nelt; i++)
42709 {
42710 perm2[i] = d->perm[i] & mask;
42711 if (perm2[i] != i)
42712 identity_perm = false;
42713 if (perm2[i])
42714 broadcast_perm = false;
42715 }
42716
42717 if (identity_perm)
42718 {
42719 if (!d->testing_p)
42720 emit_move_insn (d->target, d->op0);
42721 return true;
42722 }
42723 else if (broadcast_perm && TARGET_AVX2)
42724 {
42725 /* Use vpbroadcast{b,w,d}. */
42726 rtx (*gen) (rtx, rtx) = NULL;
42727 switch (d->vmode)
42728 {
42729 case V32QImode:
42730 gen = gen_avx2_pbroadcastv32qi_1;
42731 break;
42732 case V16HImode:
42733 gen = gen_avx2_pbroadcastv16hi_1;
42734 break;
42735 case V8SImode:
42736 gen = gen_avx2_pbroadcastv8si_1;
42737 break;
42738 case V16QImode:
42739 gen = gen_avx2_pbroadcastv16qi;
42740 break;
42741 case V8HImode:
42742 gen = gen_avx2_pbroadcastv8hi;
42743 break;
42744 case V8SFmode:
42745 gen = gen_avx2_vec_dupv8sf_1;
42746 break;
42747 /* For other modes prefer other shuffles this function creates. */
42748 default: break;
42749 }
42750 if (gen != NULL)
42751 {
42752 if (!d->testing_p)
42753 emit_insn (gen (d->target, d->op0));
42754 return true;
42755 }
42756 }
42757
42758 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
42759 return true;
42760
42761 /* There are plenty of patterns in sse.md that are written for
42762 SEL+CONCAT and are not replicated for a single op. Perhaps
42763 that should be changed, to avoid the nastiness here. */
42764
42765 /* Recognize interleave style patterns, which means incrementing
42766 every other permutation operand. */
42767 for (i = 0; i < nelt; i += 2)
42768 {
42769 perm2[i] = d->perm[i] & mask;
42770 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
42771 }
42772 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
42773 d->testing_p))
42774 return true;
42775
42776 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
42777 if (nelt >= 4)
42778 {
42779 for (i = 0; i < nelt; i += 4)
42780 {
42781 perm2[i + 0] = d->perm[i + 0] & mask;
42782 perm2[i + 1] = d->perm[i + 1] & mask;
42783 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
42784 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
42785 }
42786
42787 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
42788 d->testing_p))
42789 return true;
42790 }
42791 }
42792
42793 /* Finally, try the fully general two operand permute. */
42794 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
42795 d->testing_p))
42796 return true;
42797
42798 /* Recognize interleave style patterns with reversed operands. */
42799 if (!d->one_operand_p)
42800 {
42801 for (i = 0; i < nelt; ++i)
42802 {
42803 unsigned e = d->perm[i];
42804 if (e >= nelt)
42805 e -= nelt;
42806 else
42807 e += nelt;
42808 perm2[i] = e;
42809 }
42810
42811 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
42812 d->testing_p))
42813 return true;
42814 }
42815
42816 /* Try the SSE4.1 blend variable merge instructions. */
42817 if (expand_vec_perm_blend (d))
42818 return true;
42819
42820 /* Try one of the AVX vpermil variable permutations. */
42821 if (expand_vec_perm_vpermil (d))
42822 return true;
42823
42824 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
42825 vpshufb, vpermd, vpermps or vpermq variable permutation. */
42826 if (expand_vec_perm_pshufb (d))
42827 return true;
42828
42829 /* Try the AVX512F vpermi2 instructions. */
42830 rtx vec[64];
42831 enum machine_mode mode = d->vmode;
42832 if (mode == V8DFmode)
42833 mode = V8DImode;
42834 else if (mode == V16SFmode)
42835 mode = V16SImode;
42836 for (i = 0; i < nelt; ++i)
42837 vec[i] = GEN_INT (d->perm[i]);
42838 rtx mask = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt, vec));
42839 if (ix86_expand_vec_perm_vpermi2 (d->target, d->op0, mask, d->op1))
42840 return true;
42841
42842 return false;
42843 }
42844
42845 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42846 in terms of a pair of pshuflw + pshufhw instructions. */
42847
42848 static bool
42849 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
42850 {
42851 unsigned char perm2[MAX_VECT_LEN];
42852 unsigned i;
42853 bool ok;
42854
42855 if (d->vmode != V8HImode || !d->one_operand_p)
42856 return false;
42857
42858 /* The two permutations only operate in 64-bit lanes. */
42859 for (i = 0; i < 4; ++i)
42860 if (d->perm[i] >= 4)
42861 return false;
42862 for (i = 4; i < 8; ++i)
42863 if (d->perm[i] < 4)
42864 return false;
42865
42866 if (d->testing_p)
42867 return true;
42868
42869 /* Emit the pshuflw. */
42870 memcpy (perm2, d->perm, 4);
42871 for (i = 4; i < 8; ++i)
42872 perm2[i] = i;
42873 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
42874 gcc_assert (ok);
42875
42876 /* Emit the pshufhw. */
42877 memcpy (perm2 + 4, d->perm + 4, 4);
42878 for (i = 0; i < 4; ++i)
42879 perm2[i] = i;
42880 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
42881 gcc_assert (ok);
42882
42883 return true;
42884 }
42885
42886 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
42887 the permutation using the SSSE3 palignr instruction. This succeeds
42888 when all of the elements in PERM fit within one vector and we merely
42889 need to shift them down so that a single vector permutation has a
42890 chance to succeed. */
42891
42892 static bool
42893 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
42894 {
42895 unsigned i, nelt = d->nelt;
42896 unsigned min, max;
42897 bool in_order, ok;
42898 rtx shift, target;
42899 struct expand_vec_perm_d dcopy;
42900
42901 /* Even with AVX, palignr only operates on 128-bit vectors. */
42902 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
42903 return false;
42904
42905 min = nelt, max = 0;
42906 for (i = 0; i < nelt; ++i)
42907 {
42908 unsigned e = d->perm[i];
42909 if (e < min)
42910 min = e;
42911 if (e > max)
42912 max = e;
42913 }
42914 if (min == 0 || max - min >= nelt)
42915 return false;
42916
42917 /* Given that we have SSSE3, we know we'll be able to implement the
42918 single operand permutation after the palignr with pshufb. */
42919 if (d->testing_p)
42920 return true;
42921
42922 dcopy = *d;
42923 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
42924 target = gen_reg_rtx (TImode);
42925 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, d->op1),
42926 gen_lowpart (TImode, d->op0), shift));
42927
42928 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
42929 dcopy.one_operand_p = true;
42930
42931 in_order = true;
42932 for (i = 0; i < nelt; ++i)
42933 {
42934 unsigned e = dcopy.perm[i] - min;
42935 if (e != i)
42936 in_order = false;
42937 dcopy.perm[i] = e;
42938 }
42939
42940 /* Test for the degenerate case where the alignment by itself
42941 produces the desired permutation. */
42942 if (in_order)
42943 {
42944 emit_move_insn (d->target, dcopy.op0);
42945 return true;
42946 }
42947
42948 ok = expand_vec_perm_1 (&dcopy);
42949 gcc_assert (ok);
42950
42951 return ok;
42952 }
42953
42954 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
42955
42956 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
42957 a two vector permutation into a single vector permutation by using
42958 an interleave operation to merge the vectors. */
42959
42960 static bool
42961 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
42962 {
42963 struct expand_vec_perm_d dremap, dfinal;
42964 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
42965 unsigned HOST_WIDE_INT contents;
42966 unsigned char remap[2 * MAX_VECT_LEN];
42967 rtx seq;
42968 bool ok, same_halves = false;
42969
42970 if (GET_MODE_SIZE (d->vmode) == 16)
42971 {
42972 if (d->one_operand_p)
42973 return false;
42974 }
42975 else if (GET_MODE_SIZE (d->vmode) == 32)
42976 {
42977 if (!TARGET_AVX)
42978 return false;
42979 /* For 32-byte modes allow even d->one_operand_p.
42980 The lack of cross-lane shuffling in some instructions
42981 might prevent a single insn shuffle. */
42982 dfinal = *d;
42983 dfinal.testing_p = true;
42984 /* If expand_vec_perm_interleave3 can expand this into
42985 a 3 insn sequence, give up and let it be expanded as
42986 3 insn sequence. While that is one insn longer,
42987 it doesn't need a memory operand and in the common
42988 case that both interleave low and high permutations
42989 with the same operands are adjacent needs 4 insns
42990 for both after CSE. */
42991 if (expand_vec_perm_interleave3 (&dfinal))
42992 return false;
42993 }
42994 else
42995 return false;
42996
42997 /* Examine from whence the elements come. */
42998 contents = 0;
42999 for (i = 0; i < nelt; ++i)
43000 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
43001
43002 memset (remap, 0xff, sizeof (remap));
43003 dremap = *d;
43004
43005 if (GET_MODE_SIZE (d->vmode) == 16)
43006 {
43007 unsigned HOST_WIDE_INT h1, h2, h3, h4;
43008
43009 /* Split the two input vectors into 4 halves. */
43010 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
43011 h2 = h1 << nelt2;
43012 h3 = h2 << nelt2;
43013 h4 = h3 << nelt2;
43014
43015 /* If the elements from the low halves use interleave low, and similarly
43016 for interleave high. If the elements are from mis-matched halves, we
43017 can use shufps for V4SF/V4SI or do a DImode shuffle. */
43018 if ((contents & (h1 | h3)) == contents)
43019 {
43020 /* punpckl* */
43021 for (i = 0; i < nelt2; ++i)
43022 {
43023 remap[i] = i * 2;
43024 remap[i + nelt] = i * 2 + 1;
43025 dremap.perm[i * 2] = i;
43026 dremap.perm[i * 2 + 1] = i + nelt;
43027 }
43028 if (!TARGET_SSE2 && d->vmode == V4SImode)
43029 dremap.vmode = V4SFmode;
43030 }
43031 else if ((contents & (h2 | h4)) == contents)
43032 {
43033 /* punpckh* */
43034 for (i = 0; i < nelt2; ++i)
43035 {
43036 remap[i + nelt2] = i * 2;
43037 remap[i + nelt + nelt2] = i * 2 + 1;
43038 dremap.perm[i * 2] = i + nelt2;
43039 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
43040 }
43041 if (!TARGET_SSE2 && d->vmode == V4SImode)
43042 dremap.vmode = V4SFmode;
43043 }
43044 else if ((contents & (h1 | h4)) == contents)
43045 {
43046 /* shufps */
43047 for (i = 0; i < nelt2; ++i)
43048 {
43049 remap[i] = i;
43050 remap[i + nelt + nelt2] = i + nelt2;
43051 dremap.perm[i] = i;
43052 dremap.perm[i + nelt2] = i + nelt + nelt2;
43053 }
43054 if (nelt != 4)
43055 {
43056 /* shufpd */
43057 dremap.vmode = V2DImode;
43058 dremap.nelt = 2;
43059 dremap.perm[0] = 0;
43060 dremap.perm[1] = 3;
43061 }
43062 }
43063 else if ((contents & (h2 | h3)) == contents)
43064 {
43065 /* shufps */
43066 for (i = 0; i < nelt2; ++i)
43067 {
43068 remap[i + nelt2] = i;
43069 remap[i + nelt] = i + nelt2;
43070 dremap.perm[i] = i + nelt2;
43071 dremap.perm[i + nelt2] = i + nelt;
43072 }
43073 if (nelt != 4)
43074 {
43075 /* shufpd */
43076 dremap.vmode = V2DImode;
43077 dremap.nelt = 2;
43078 dremap.perm[0] = 1;
43079 dremap.perm[1] = 2;
43080 }
43081 }
43082 else
43083 return false;
43084 }
43085 else
43086 {
43087 unsigned int nelt4 = nelt / 4, nzcnt = 0;
43088 unsigned HOST_WIDE_INT q[8];
43089 unsigned int nonzero_halves[4];
43090
43091 /* Split the two input vectors into 8 quarters. */
43092 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
43093 for (i = 1; i < 8; ++i)
43094 q[i] = q[0] << (nelt4 * i);
43095 for (i = 0; i < 4; ++i)
43096 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
43097 {
43098 nonzero_halves[nzcnt] = i;
43099 ++nzcnt;
43100 }
43101
43102 if (nzcnt == 1)
43103 {
43104 gcc_assert (d->one_operand_p);
43105 nonzero_halves[1] = nonzero_halves[0];
43106 same_halves = true;
43107 }
43108 else if (d->one_operand_p)
43109 {
43110 gcc_assert (nonzero_halves[0] == 0);
43111 gcc_assert (nonzero_halves[1] == 1);
43112 }
43113
43114 if (nzcnt <= 2)
43115 {
43116 if (d->perm[0] / nelt2 == nonzero_halves[1])
43117 {
43118 /* Attempt to increase the likelihood that dfinal
43119 shuffle will be intra-lane. */
43120 char tmph = nonzero_halves[0];
43121 nonzero_halves[0] = nonzero_halves[1];
43122 nonzero_halves[1] = tmph;
43123 }
43124
43125 /* vperm2f128 or vperm2i128. */
43126 for (i = 0; i < nelt2; ++i)
43127 {
43128 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
43129 remap[i + nonzero_halves[0] * nelt2] = i;
43130 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
43131 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
43132 }
43133
43134 if (d->vmode != V8SFmode
43135 && d->vmode != V4DFmode
43136 && d->vmode != V8SImode)
43137 {
43138 dremap.vmode = V8SImode;
43139 dremap.nelt = 8;
43140 for (i = 0; i < 4; ++i)
43141 {
43142 dremap.perm[i] = i + nonzero_halves[0] * 4;
43143 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
43144 }
43145 }
43146 }
43147 else if (d->one_operand_p)
43148 return false;
43149 else if (TARGET_AVX2
43150 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
43151 {
43152 /* vpunpckl* */
43153 for (i = 0; i < nelt4; ++i)
43154 {
43155 remap[i] = i * 2;
43156 remap[i + nelt] = i * 2 + 1;
43157 remap[i + nelt2] = i * 2 + nelt2;
43158 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
43159 dremap.perm[i * 2] = i;
43160 dremap.perm[i * 2 + 1] = i + nelt;
43161 dremap.perm[i * 2 + nelt2] = i + nelt2;
43162 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
43163 }
43164 }
43165 else if (TARGET_AVX2
43166 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
43167 {
43168 /* vpunpckh* */
43169 for (i = 0; i < nelt4; ++i)
43170 {
43171 remap[i + nelt4] = i * 2;
43172 remap[i + nelt + nelt4] = i * 2 + 1;
43173 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
43174 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
43175 dremap.perm[i * 2] = i + nelt4;
43176 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
43177 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
43178 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
43179 }
43180 }
43181 else
43182 return false;
43183 }
43184
43185 /* Use the remapping array set up above to move the elements from their
43186 swizzled locations into their final destinations. */
43187 dfinal = *d;
43188 for (i = 0; i < nelt; ++i)
43189 {
43190 unsigned e = remap[d->perm[i]];
43191 gcc_assert (e < nelt);
43192 /* If same_halves is true, both halves of the remapped vector are the
43193 same. Avoid cross-lane accesses if possible. */
43194 if (same_halves && i >= nelt2)
43195 {
43196 gcc_assert (e < nelt2);
43197 dfinal.perm[i] = e + nelt2;
43198 }
43199 else
43200 dfinal.perm[i] = e;
43201 }
43202 dremap.target = gen_reg_rtx (dremap.vmode);
43203 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43204 dfinal.op1 = dfinal.op0;
43205 dfinal.one_operand_p = true;
43206
43207 /* Test if the final remap can be done with a single insn. For V4SFmode or
43208 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
43209 start_sequence ();
43210 ok = expand_vec_perm_1 (&dfinal);
43211 seq = get_insns ();
43212 end_sequence ();
43213
43214 if (!ok)
43215 return false;
43216
43217 if (d->testing_p)
43218 return true;
43219
43220 if (dremap.vmode != dfinal.vmode)
43221 {
43222 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
43223 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
43224 }
43225
43226 ok = expand_vec_perm_1 (&dremap);
43227 gcc_assert (ok);
43228
43229 emit_insn (seq);
43230 return true;
43231 }
43232
43233 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43234 a single vector cross-lane permutation into vpermq followed
43235 by any of the single insn permutations. */
43236
43237 static bool
43238 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
43239 {
43240 struct expand_vec_perm_d dremap, dfinal;
43241 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
43242 unsigned contents[2];
43243 bool ok;
43244
43245 if (!(TARGET_AVX2
43246 && (d->vmode == V32QImode || d->vmode == V16HImode)
43247 && d->one_operand_p))
43248 return false;
43249
43250 contents[0] = 0;
43251 contents[1] = 0;
43252 for (i = 0; i < nelt2; ++i)
43253 {
43254 contents[0] |= 1u << (d->perm[i] / nelt4);
43255 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
43256 }
43257
43258 for (i = 0; i < 2; ++i)
43259 {
43260 unsigned int cnt = 0;
43261 for (j = 0; j < 4; ++j)
43262 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
43263 return false;
43264 }
43265
43266 if (d->testing_p)
43267 return true;
43268
43269 dremap = *d;
43270 dremap.vmode = V4DImode;
43271 dremap.nelt = 4;
43272 dremap.target = gen_reg_rtx (V4DImode);
43273 dremap.op0 = gen_lowpart (V4DImode, d->op0);
43274 dremap.op1 = dremap.op0;
43275 dremap.one_operand_p = true;
43276 for (i = 0; i < 2; ++i)
43277 {
43278 unsigned int cnt = 0;
43279 for (j = 0; j < 4; ++j)
43280 if ((contents[i] & (1u << j)) != 0)
43281 dremap.perm[2 * i + cnt++] = j;
43282 for (; cnt < 2; ++cnt)
43283 dremap.perm[2 * i + cnt] = 0;
43284 }
43285
43286 dfinal = *d;
43287 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43288 dfinal.op1 = dfinal.op0;
43289 dfinal.one_operand_p = true;
43290 for (i = 0, j = 0; i < nelt; ++i)
43291 {
43292 if (i == nelt2)
43293 j = 2;
43294 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
43295 if ((d->perm[i] / nelt4) == dremap.perm[j])
43296 ;
43297 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
43298 dfinal.perm[i] |= nelt4;
43299 else
43300 gcc_unreachable ();
43301 }
43302
43303 ok = expand_vec_perm_1 (&dremap);
43304 gcc_assert (ok);
43305
43306 ok = expand_vec_perm_1 (&dfinal);
43307 gcc_assert (ok);
43308
43309 return true;
43310 }
43311
43312 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
43313 a vector permutation using two instructions, vperm2f128 resp.
43314 vperm2i128 followed by any single in-lane permutation. */
43315
43316 static bool
43317 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
43318 {
43319 struct expand_vec_perm_d dfirst, dsecond;
43320 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
43321 bool ok;
43322
43323 if (!TARGET_AVX
43324 || GET_MODE_SIZE (d->vmode) != 32
43325 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
43326 return false;
43327
43328 dsecond = *d;
43329 dsecond.one_operand_p = false;
43330 dsecond.testing_p = true;
43331
43332 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
43333 immediate. For perm < 16 the second permutation uses
43334 d->op0 as first operand, for perm >= 16 it uses d->op1
43335 as first operand. The second operand is the result of
43336 vperm2[fi]128. */
43337 for (perm = 0; perm < 32; perm++)
43338 {
43339 /* Ignore permutations which do not move anything cross-lane. */
43340 if (perm < 16)
43341 {
43342 /* The second shuffle for e.g. V4DFmode has
43343 0123 and ABCD operands.
43344 Ignore AB23, as 23 is already in the second lane
43345 of the first operand. */
43346 if ((perm & 0xc) == (1 << 2)) continue;
43347 /* And 01CD, as 01 is in the first lane of the first
43348 operand. */
43349 if ((perm & 3) == 0) continue;
43350 /* And 4567, as then the vperm2[fi]128 doesn't change
43351 anything on the original 4567 second operand. */
43352 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
43353 }
43354 else
43355 {
43356 /* The second shuffle for e.g. V4DFmode has
43357 4567 and ABCD operands.
43358 Ignore AB67, as 67 is already in the second lane
43359 of the first operand. */
43360 if ((perm & 0xc) == (3 << 2)) continue;
43361 /* And 45CD, as 45 is in the first lane of the first
43362 operand. */
43363 if ((perm & 3) == 2) continue;
43364 /* And 0123, as then the vperm2[fi]128 doesn't change
43365 anything on the original 0123 first operand. */
43366 if ((perm & 0xf) == (1 << 2)) continue;
43367 }
43368
43369 for (i = 0; i < nelt; i++)
43370 {
43371 j = d->perm[i] / nelt2;
43372 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
43373 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
43374 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
43375 dsecond.perm[i] = d->perm[i] & (nelt - 1);
43376 else
43377 break;
43378 }
43379
43380 if (i == nelt)
43381 {
43382 start_sequence ();
43383 ok = expand_vec_perm_1 (&dsecond);
43384 end_sequence ();
43385 }
43386 else
43387 ok = false;
43388
43389 if (ok)
43390 {
43391 if (d->testing_p)
43392 return true;
43393
43394 /* Found a usable second shuffle. dfirst will be
43395 vperm2f128 on d->op0 and d->op1. */
43396 dsecond.testing_p = false;
43397 dfirst = *d;
43398 dfirst.target = gen_reg_rtx (d->vmode);
43399 for (i = 0; i < nelt; i++)
43400 dfirst.perm[i] = (i & (nelt2 - 1))
43401 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
43402
43403 ok = expand_vec_perm_1 (&dfirst);
43404 gcc_assert (ok);
43405
43406 /* And dsecond is some single insn shuffle, taking
43407 d->op0 and result of vperm2f128 (if perm < 16) or
43408 d->op1 and result of vperm2f128 (otherwise). */
43409 dsecond.op1 = dfirst.target;
43410 if (perm >= 16)
43411 dsecond.op0 = dfirst.op1;
43412
43413 ok = expand_vec_perm_1 (&dsecond);
43414 gcc_assert (ok);
43415
43416 return true;
43417 }
43418
43419 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
43420 if (d->one_operand_p)
43421 return false;
43422 }
43423
43424 return false;
43425 }
43426
43427 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43428 a two vector permutation using 2 intra-lane interleave insns
43429 and cross-lane shuffle for 32-byte vectors. */
43430
43431 static bool
43432 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
43433 {
43434 unsigned i, nelt;
43435 rtx (*gen) (rtx, rtx, rtx);
43436
43437 if (d->one_operand_p)
43438 return false;
43439 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
43440 ;
43441 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
43442 ;
43443 else
43444 return false;
43445
43446 nelt = d->nelt;
43447 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
43448 return false;
43449 for (i = 0; i < nelt; i += 2)
43450 if (d->perm[i] != d->perm[0] + i / 2
43451 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
43452 return false;
43453
43454 if (d->testing_p)
43455 return true;
43456
43457 switch (d->vmode)
43458 {
43459 case V32QImode:
43460 if (d->perm[0])
43461 gen = gen_vec_interleave_highv32qi;
43462 else
43463 gen = gen_vec_interleave_lowv32qi;
43464 break;
43465 case V16HImode:
43466 if (d->perm[0])
43467 gen = gen_vec_interleave_highv16hi;
43468 else
43469 gen = gen_vec_interleave_lowv16hi;
43470 break;
43471 case V8SImode:
43472 if (d->perm[0])
43473 gen = gen_vec_interleave_highv8si;
43474 else
43475 gen = gen_vec_interleave_lowv8si;
43476 break;
43477 case V4DImode:
43478 if (d->perm[0])
43479 gen = gen_vec_interleave_highv4di;
43480 else
43481 gen = gen_vec_interleave_lowv4di;
43482 break;
43483 case V8SFmode:
43484 if (d->perm[0])
43485 gen = gen_vec_interleave_highv8sf;
43486 else
43487 gen = gen_vec_interleave_lowv8sf;
43488 break;
43489 case V4DFmode:
43490 if (d->perm[0])
43491 gen = gen_vec_interleave_highv4df;
43492 else
43493 gen = gen_vec_interleave_lowv4df;
43494 break;
43495 default:
43496 gcc_unreachable ();
43497 }
43498
43499 emit_insn (gen (d->target, d->op0, d->op1));
43500 return true;
43501 }
43502
43503 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
43504 a single vector permutation using a single intra-lane vector
43505 permutation, vperm2f128 swapping the lanes and vblend* insn blending
43506 the non-swapped and swapped vectors together. */
43507
43508 static bool
43509 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
43510 {
43511 struct expand_vec_perm_d dfirst, dsecond;
43512 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
43513 rtx seq;
43514 bool ok;
43515 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
43516
43517 if (!TARGET_AVX
43518 || TARGET_AVX2
43519 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
43520 || !d->one_operand_p)
43521 return false;
43522
43523 dfirst = *d;
43524 for (i = 0; i < nelt; i++)
43525 dfirst.perm[i] = 0xff;
43526 for (i = 0, msk = 0; i < nelt; i++)
43527 {
43528 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
43529 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
43530 return false;
43531 dfirst.perm[j] = d->perm[i];
43532 if (j != i)
43533 msk |= (1 << i);
43534 }
43535 for (i = 0; i < nelt; i++)
43536 if (dfirst.perm[i] == 0xff)
43537 dfirst.perm[i] = i;
43538
43539 if (!d->testing_p)
43540 dfirst.target = gen_reg_rtx (dfirst.vmode);
43541
43542 start_sequence ();
43543 ok = expand_vec_perm_1 (&dfirst);
43544 seq = get_insns ();
43545 end_sequence ();
43546
43547 if (!ok)
43548 return false;
43549
43550 if (d->testing_p)
43551 return true;
43552
43553 emit_insn (seq);
43554
43555 dsecond = *d;
43556 dsecond.op0 = dfirst.target;
43557 dsecond.op1 = dfirst.target;
43558 dsecond.one_operand_p = true;
43559 dsecond.target = gen_reg_rtx (dsecond.vmode);
43560 for (i = 0; i < nelt; i++)
43561 dsecond.perm[i] = i ^ nelt2;
43562
43563 ok = expand_vec_perm_1 (&dsecond);
43564 gcc_assert (ok);
43565
43566 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
43567 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
43568 return true;
43569 }
43570
43571 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
43572 permutation using two vperm2f128, followed by a vshufpd insn blending
43573 the two vectors together. */
43574
43575 static bool
43576 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
43577 {
43578 struct expand_vec_perm_d dfirst, dsecond, dthird;
43579 bool ok;
43580
43581 if (!TARGET_AVX || (d->vmode != V4DFmode))
43582 return false;
43583
43584 if (d->testing_p)
43585 return true;
43586
43587 dfirst = *d;
43588 dsecond = *d;
43589 dthird = *d;
43590
43591 dfirst.perm[0] = (d->perm[0] & ~1);
43592 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
43593 dfirst.perm[2] = (d->perm[2] & ~1);
43594 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
43595 dsecond.perm[0] = (d->perm[1] & ~1);
43596 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
43597 dsecond.perm[2] = (d->perm[3] & ~1);
43598 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
43599 dthird.perm[0] = (d->perm[0] % 2);
43600 dthird.perm[1] = (d->perm[1] % 2) + 4;
43601 dthird.perm[2] = (d->perm[2] % 2) + 2;
43602 dthird.perm[3] = (d->perm[3] % 2) + 6;
43603
43604 dfirst.target = gen_reg_rtx (dfirst.vmode);
43605 dsecond.target = gen_reg_rtx (dsecond.vmode);
43606 dthird.op0 = dfirst.target;
43607 dthird.op1 = dsecond.target;
43608 dthird.one_operand_p = false;
43609
43610 canonicalize_perm (&dfirst);
43611 canonicalize_perm (&dsecond);
43612
43613 ok = expand_vec_perm_1 (&dfirst)
43614 && expand_vec_perm_1 (&dsecond)
43615 && expand_vec_perm_1 (&dthird);
43616
43617 gcc_assert (ok);
43618
43619 return true;
43620 }
43621
43622 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
43623 permutation with two pshufb insns and an ior. We should have already
43624 failed all two instruction sequences. */
43625
43626 static bool
43627 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
43628 {
43629 rtx rperm[2][16], vperm, l, h, op, m128;
43630 unsigned int i, nelt, eltsz;
43631
43632 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
43633 return false;
43634 gcc_assert (!d->one_operand_p);
43635
43636 nelt = d->nelt;
43637 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
43638
43639 /* Generate two permutation masks. If the required element is within
43640 the given vector it is shuffled into the proper lane. If the required
43641 element is in the other vector, force a zero into the lane by setting
43642 bit 7 in the permutation mask. */
43643 m128 = GEN_INT (-128);
43644 for (i = 0; i < nelt; ++i)
43645 {
43646 unsigned j, e = d->perm[i];
43647 unsigned which = (e >= nelt);
43648 if (e >= nelt)
43649 e -= nelt;
43650
43651 for (j = 0; j < eltsz; ++j)
43652 {
43653 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
43654 rperm[1-which][i*eltsz + j] = m128;
43655 }
43656 }
43657
43658 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
43659 vperm = force_reg (V16QImode, vperm);
43660
43661 l = gen_reg_rtx (V16QImode);
43662 op = gen_lowpart (V16QImode, d->op0);
43663 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
43664
43665 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
43666 vperm = force_reg (V16QImode, vperm);
43667
43668 h = gen_reg_rtx (V16QImode);
43669 op = gen_lowpart (V16QImode, d->op1);
43670 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
43671
43672 op = d->target;
43673 if (d->vmode != V16QImode)
43674 op = gen_reg_rtx (V16QImode);
43675 emit_insn (gen_iorv16qi3 (op, l, h));
43676 if (op != d->target)
43677 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
43678
43679 return true;
43680 }
43681
43682 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
43683 with two vpshufb insns, vpermq and vpor. We should have already failed
43684 all two or three instruction sequences. */
43685
43686 static bool
43687 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
43688 {
43689 rtx rperm[2][32], vperm, l, h, hp, op, m128;
43690 unsigned int i, nelt, eltsz;
43691
43692 if (!TARGET_AVX2
43693 || !d->one_operand_p
43694 || (d->vmode != V32QImode && d->vmode != V16HImode))
43695 return false;
43696
43697 if (d->testing_p)
43698 return true;
43699
43700 nelt = d->nelt;
43701 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
43702
43703 /* Generate two permutation masks. If the required element is within
43704 the same lane, it is shuffled in. If the required element from the
43705 other lane, force a zero by setting bit 7 in the permutation mask.
43706 In the other mask the mask has non-negative elements if element
43707 is requested from the other lane, but also moved to the other lane,
43708 so that the result of vpshufb can have the two V2TImode halves
43709 swapped. */
43710 m128 = GEN_INT (-128);
43711 for (i = 0; i < nelt; ++i)
43712 {
43713 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
43714 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
43715
43716 for (j = 0; j < eltsz; ++j)
43717 {
43718 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
43719 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
43720 }
43721 }
43722
43723 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
43724 vperm = force_reg (V32QImode, vperm);
43725
43726 h = gen_reg_rtx (V32QImode);
43727 op = gen_lowpart (V32QImode, d->op0);
43728 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
43729
43730 /* Swap the 128-byte lanes of h into hp. */
43731 hp = gen_reg_rtx (V4DImode);
43732 op = gen_lowpart (V4DImode, h);
43733 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
43734 const1_rtx));
43735
43736 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
43737 vperm = force_reg (V32QImode, vperm);
43738
43739 l = gen_reg_rtx (V32QImode);
43740 op = gen_lowpart (V32QImode, d->op0);
43741 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
43742
43743 op = d->target;
43744 if (d->vmode != V32QImode)
43745 op = gen_reg_rtx (V32QImode);
43746 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
43747 if (op != d->target)
43748 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
43749
43750 return true;
43751 }
43752
43753 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
43754 and extract-odd permutations of two V32QImode and V16QImode operand
43755 with two vpshufb insns, vpor and vpermq. We should have already
43756 failed all two or three instruction sequences. */
43757
43758 static bool
43759 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
43760 {
43761 rtx rperm[2][32], vperm, l, h, ior, op, m128;
43762 unsigned int i, nelt, eltsz;
43763
43764 if (!TARGET_AVX2
43765 || d->one_operand_p
43766 || (d->vmode != V32QImode && d->vmode != V16HImode))
43767 return false;
43768
43769 for (i = 0; i < d->nelt; ++i)
43770 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
43771 return false;
43772
43773 if (d->testing_p)
43774 return true;
43775
43776 nelt = d->nelt;
43777 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
43778
43779 /* Generate two permutation masks. In the first permutation mask
43780 the first quarter will contain indexes for the first half
43781 of the op0, the second quarter will contain bit 7 set, third quarter
43782 will contain indexes for the second half of the op0 and the
43783 last quarter bit 7 set. In the second permutation mask
43784 the first quarter will contain bit 7 set, the second quarter
43785 indexes for the first half of the op1, the third quarter bit 7 set
43786 and last quarter indexes for the second half of the op1.
43787 I.e. the first mask e.g. for V32QImode extract even will be:
43788 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
43789 (all values masked with 0xf except for -128) and second mask
43790 for extract even will be
43791 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
43792 m128 = GEN_INT (-128);
43793 for (i = 0; i < nelt; ++i)
43794 {
43795 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
43796 unsigned which = d->perm[i] >= nelt;
43797 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
43798
43799 for (j = 0; j < eltsz; ++j)
43800 {
43801 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
43802 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
43803 }
43804 }
43805
43806 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
43807 vperm = force_reg (V32QImode, vperm);
43808
43809 l = gen_reg_rtx (V32QImode);
43810 op = gen_lowpart (V32QImode, d->op0);
43811 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
43812
43813 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
43814 vperm = force_reg (V32QImode, vperm);
43815
43816 h = gen_reg_rtx (V32QImode);
43817 op = gen_lowpart (V32QImode, d->op1);
43818 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
43819
43820 ior = gen_reg_rtx (V32QImode);
43821 emit_insn (gen_iorv32qi3 (ior, l, h));
43822
43823 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
43824 op = gen_reg_rtx (V4DImode);
43825 ior = gen_lowpart (V4DImode, ior);
43826 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
43827 const1_rtx, GEN_INT (3)));
43828 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
43829
43830 return true;
43831 }
43832
43833 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
43834 and extract-odd permutations. */
43835
43836 static bool
43837 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
43838 {
43839 rtx t1, t2, t3, t4, t5;
43840
43841 switch (d->vmode)
43842 {
43843 case V4DFmode:
43844 t1 = gen_reg_rtx (V4DFmode);
43845 t2 = gen_reg_rtx (V4DFmode);
43846
43847 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
43848 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
43849 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
43850
43851 /* Now an unpck[lh]pd will produce the result required. */
43852 if (odd)
43853 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
43854 else
43855 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
43856 emit_insn (t3);
43857 break;
43858
43859 case V8SFmode:
43860 {
43861 int mask = odd ? 0xdd : 0x88;
43862
43863 t1 = gen_reg_rtx (V8SFmode);
43864 t2 = gen_reg_rtx (V8SFmode);
43865 t3 = gen_reg_rtx (V8SFmode);
43866
43867 /* Shuffle within the 128-bit lanes to produce:
43868 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
43869 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
43870 GEN_INT (mask)));
43871
43872 /* Shuffle the lanes around to produce:
43873 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
43874 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
43875 GEN_INT (0x3)));
43876
43877 /* Shuffle within the 128-bit lanes to produce:
43878 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
43879 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
43880
43881 /* Shuffle within the 128-bit lanes to produce:
43882 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
43883 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
43884
43885 /* Shuffle the lanes around to produce:
43886 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
43887 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
43888 GEN_INT (0x20)));
43889 }
43890 break;
43891
43892 case V2DFmode:
43893 case V4SFmode:
43894 case V2DImode:
43895 case V4SImode:
43896 /* These are always directly implementable by expand_vec_perm_1. */
43897 gcc_unreachable ();
43898
43899 case V8HImode:
43900 if (TARGET_SSSE3)
43901 return expand_vec_perm_pshufb2 (d);
43902 else
43903 {
43904 /* We need 2*log2(N)-1 operations to achieve odd/even
43905 with interleave. */
43906 t1 = gen_reg_rtx (V8HImode);
43907 t2 = gen_reg_rtx (V8HImode);
43908 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
43909 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
43910 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
43911 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
43912 if (odd)
43913 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
43914 else
43915 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
43916 emit_insn (t3);
43917 }
43918 break;
43919
43920 case V16QImode:
43921 if (TARGET_SSSE3)
43922 return expand_vec_perm_pshufb2 (d);
43923 else
43924 {
43925 t1 = gen_reg_rtx (V16QImode);
43926 t2 = gen_reg_rtx (V16QImode);
43927 t3 = gen_reg_rtx (V16QImode);
43928 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
43929 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
43930 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
43931 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
43932 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
43933 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
43934 if (odd)
43935 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
43936 else
43937 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
43938 emit_insn (t3);
43939 }
43940 break;
43941
43942 case V16HImode:
43943 case V32QImode:
43944 return expand_vec_perm_vpshufb2_vpermq_even_odd (d);
43945
43946 case V4DImode:
43947 if (!TARGET_AVX2)
43948 {
43949 struct expand_vec_perm_d d_copy = *d;
43950 d_copy.vmode = V4DFmode;
43951 d_copy.target = gen_reg_rtx (V4DFmode);
43952 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
43953 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
43954 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
43955 {
43956 if (!d->testing_p)
43957 emit_move_insn (d->target,
43958 gen_lowpart (V4DImode, d_copy.target));
43959 return true;
43960 }
43961 return false;
43962 }
43963
43964 t1 = gen_reg_rtx (V4DImode);
43965 t2 = gen_reg_rtx (V4DImode);
43966
43967 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
43968 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
43969 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
43970
43971 /* Now an vpunpck[lh]qdq will produce the result required. */
43972 if (odd)
43973 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
43974 else
43975 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
43976 emit_insn (t3);
43977 break;
43978
43979 case V8SImode:
43980 if (!TARGET_AVX2)
43981 {
43982 struct expand_vec_perm_d d_copy = *d;
43983 d_copy.vmode = V8SFmode;
43984 d_copy.target = gen_reg_rtx (V8SFmode);
43985 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
43986 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
43987 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
43988 {
43989 if (!d->testing_p)
43990 emit_move_insn (d->target,
43991 gen_lowpart (V8SImode, d_copy.target));
43992 return true;
43993 }
43994 return false;
43995 }
43996
43997 t1 = gen_reg_rtx (V8SImode);
43998 t2 = gen_reg_rtx (V8SImode);
43999 t3 = gen_reg_rtx (V4DImode);
44000 t4 = gen_reg_rtx (V4DImode);
44001 t5 = gen_reg_rtx (V4DImode);
44002
44003 /* Shuffle the lanes around into
44004 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
44005 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
44006 gen_lowpart (V4DImode, d->op1),
44007 GEN_INT (0x20)));
44008 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
44009 gen_lowpart (V4DImode, d->op1),
44010 GEN_INT (0x31)));
44011
44012 /* Swap the 2nd and 3rd position in each lane into
44013 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
44014 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
44015 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44016 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
44017 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44018
44019 /* Now an vpunpck[lh]qdq will produce
44020 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
44021 if (odd)
44022 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
44023 gen_lowpart (V4DImode, t2));
44024 else
44025 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
44026 gen_lowpart (V4DImode, t2));
44027 emit_insn (t3);
44028 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
44029 break;
44030
44031 default:
44032 gcc_unreachable ();
44033 }
44034
44035 return true;
44036 }
44037
44038 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44039 extract-even and extract-odd permutations. */
44040
44041 static bool
44042 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
44043 {
44044 unsigned i, odd, nelt = d->nelt;
44045
44046 odd = d->perm[0];
44047 if (odd != 0 && odd != 1)
44048 return false;
44049
44050 for (i = 1; i < nelt; ++i)
44051 if (d->perm[i] != 2 * i + odd)
44052 return false;
44053
44054 return expand_vec_perm_even_odd_1 (d, odd);
44055 }
44056
44057 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
44058 permutations. We assume that expand_vec_perm_1 has already failed. */
44059
44060 static bool
44061 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
44062 {
44063 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
44064 enum machine_mode vmode = d->vmode;
44065 unsigned char perm2[4];
44066 rtx op0 = d->op0, dest;
44067 bool ok;
44068
44069 switch (vmode)
44070 {
44071 case V4DFmode:
44072 case V8SFmode:
44073 /* These are special-cased in sse.md so that we can optionally
44074 use the vbroadcast instruction. They expand to two insns
44075 if the input happens to be in a register. */
44076 gcc_unreachable ();
44077
44078 case V2DFmode:
44079 case V2DImode:
44080 case V4SFmode:
44081 case V4SImode:
44082 /* These are always implementable using standard shuffle patterns. */
44083 gcc_unreachable ();
44084
44085 case V8HImode:
44086 case V16QImode:
44087 /* These can be implemented via interleave. We save one insn by
44088 stopping once we have promoted to V4SImode and then use pshufd. */
44089 do
44090 {
44091 rtx dest;
44092 rtx (*gen) (rtx, rtx, rtx)
44093 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
44094 : gen_vec_interleave_lowv8hi;
44095
44096 if (elt >= nelt2)
44097 {
44098 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
44099 : gen_vec_interleave_highv8hi;
44100 elt -= nelt2;
44101 }
44102 nelt2 /= 2;
44103
44104 dest = gen_reg_rtx (vmode);
44105 emit_insn (gen (dest, op0, op0));
44106 vmode = get_mode_wider_vector (vmode);
44107 op0 = gen_lowpart (vmode, dest);
44108 }
44109 while (vmode != V4SImode);
44110
44111 memset (perm2, elt, 4);
44112 dest = gen_reg_rtx (V4SImode);
44113 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
44114 gcc_assert (ok);
44115 if (!d->testing_p)
44116 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
44117 return true;
44118
44119 case V32QImode:
44120 case V16HImode:
44121 case V8SImode:
44122 case V4DImode:
44123 /* For AVX2 broadcasts of the first element vpbroadcast* or
44124 vpermq should be used by expand_vec_perm_1. */
44125 gcc_assert (!TARGET_AVX2 || d->perm[0]);
44126 return false;
44127
44128 default:
44129 gcc_unreachable ();
44130 }
44131 }
44132
44133 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44134 broadcast permutations. */
44135
44136 static bool
44137 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
44138 {
44139 unsigned i, elt, nelt = d->nelt;
44140
44141 if (!d->one_operand_p)
44142 return false;
44143
44144 elt = d->perm[0];
44145 for (i = 1; i < nelt; ++i)
44146 if (d->perm[i] != elt)
44147 return false;
44148
44149 return expand_vec_perm_broadcast_1 (d);
44150 }
44151
44152 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
44153 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
44154 all the shorter instruction sequences. */
44155
44156 static bool
44157 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
44158 {
44159 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
44160 unsigned int i, nelt, eltsz;
44161 bool used[4];
44162
44163 if (!TARGET_AVX2
44164 || d->one_operand_p
44165 || (d->vmode != V32QImode && d->vmode != V16HImode))
44166 return false;
44167
44168 if (d->testing_p)
44169 return true;
44170
44171 nelt = d->nelt;
44172 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44173
44174 /* Generate 4 permutation masks. If the required element is within
44175 the same lane, it is shuffled in. If the required element from the
44176 other lane, force a zero by setting bit 7 in the permutation mask.
44177 In the other mask the mask has non-negative elements if element
44178 is requested from the other lane, but also moved to the other lane,
44179 so that the result of vpshufb can have the two V2TImode halves
44180 swapped. */
44181 m128 = GEN_INT (-128);
44182 for (i = 0; i < 32; ++i)
44183 {
44184 rperm[0][i] = m128;
44185 rperm[1][i] = m128;
44186 rperm[2][i] = m128;
44187 rperm[3][i] = m128;
44188 }
44189 used[0] = false;
44190 used[1] = false;
44191 used[2] = false;
44192 used[3] = false;
44193 for (i = 0; i < nelt; ++i)
44194 {
44195 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44196 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
44197 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
44198
44199 for (j = 0; j < eltsz; ++j)
44200 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
44201 used[which] = true;
44202 }
44203
44204 for (i = 0; i < 2; ++i)
44205 {
44206 if (!used[2 * i + 1])
44207 {
44208 h[i] = NULL_RTX;
44209 continue;
44210 }
44211 vperm = gen_rtx_CONST_VECTOR (V32QImode,
44212 gen_rtvec_v (32, rperm[2 * i + 1]));
44213 vperm = force_reg (V32QImode, vperm);
44214 h[i] = gen_reg_rtx (V32QImode);
44215 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44216 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
44217 }
44218
44219 /* Swap the 128-byte lanes of h[X]. */
44220 for (i = 0; i < 2; ++i)
44221 {
44222 if (h[i] == NULL_RTX)
44223 continue;
44224 op = gen_reg_rtx (V4DImode);
44225 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
44226 const2_rtx, GEN_INT (3), const0_rtx,
44227 const1_rtx));
44228 h[i] = gen_lowpart (V32QImode, op);
44229 }
44230
44231 for (i = 0; i < 2; ++i)
44232 {
44233 if (!used[2 * i])
44234 {
44235 l[i] = NULL_RTX;
44236 continue;
44237 }
44238 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
44239 vperm = force_reg (V32QImode, vperm);
44240 l[i] = gen_reg_rtx (V32QImode);
44241 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44242 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
44243 }
44244
44245 for (i = 0; i < 2; ++i)
44246 {
44247 if (h[i] && l[i])
44248 {
44249 op = gen_reg_rtx (V32QImode);
44250 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
44251 l[i] = op;
44252 }
44253 else if (h[i])
44254 l[i] = h[i];
44255 }
44256
44257 gcc_assert (l[0] && l[1]);
44258 op = d->target;
44259 if (d->vmode != V32QImode)
44260 op = gen_reg_rtx (V32QImode);
44261 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
44262 if (op != d->target)
44263 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44264 return true;
44265 }
44266
44267 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
44268 With all of the interface bits taken care of, perform the expansion
44269 in D and return true on success. */
44270
44271 static bool
44272 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
44273 {
44274 /* Try a single instruction expansion. */
44275 if (expand_vec_perm_1 (d))
44276 return true;
44277
44278 /* Try sequences of two instructions. */
44279
44280 if (expand_vec_perm_pshuflw_pshufhw (d))
44281 return true;
44282
44283 if (expand_vec_perm_palignr (d))
44284 return true;
44285
44286 if (expand_vec_perm_interleave2 (d))
44287 return true;
44288
44289 if (expand_vec_perm_broadcast (d))
44290 return true;
44291
44292 if (expand_vec_perm_vpermq_perm_1 (d))
44293 return true;
44294
44295 if (expand_vec_perm_vperm2f128 (d))
44296 return true;
44297
44298 /* Try sequences of three instructions. */
44299
44300 if (expand_vec_perm_2vperm2f128_vshuf (d))
44301 return true;
44302
44303 if (expand_vec_perm_pshufb2 (d))
44304 return true;
44305
44306 if (expand_vec_perm_interleave3 (d))
44307 return true;
44308
44309 if (expand_vec_perm_vperm2f128_vblend (d))
44310 return true;
44311
44312 /* Try sequences of four instructions. */
44313
44314 if (expand_vec_perm_vpshufb2_vpermq (d))
44315 return true;
44316
44317 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
44318 return true;
44319
44320 /* ??? Look for narrow permutations whose element orderings would
44321 allow the promotion to a wider mode. */
44322
44323 /* ??? Look for sequences of interleave or a wider permute that place
44324 the data into the correct lanes for a half-vector shuffle like
44325 pshuf[lh]w or vpermilps. */
44326
44327 /* ??? Look for sequences of interleave that produce the desired results.
44328 The combinatorics of punpck[lh] get pretty ugly... */
44329
44330 if (expand_vec_perm_even_odd (d))
44331 return true;
44332
44333 /* Even longer sequences. */
44334 if (expand_vec_perm_vpshufb4_vpermq2 (d))
44335 return true;
44336
44337 return false;
44338 }
44339
44340 /* If a permutation only uses one operand, make it clear. Returns true
44341 if the permutation references both operands. */
44342
44343 static bool
44344 canonicalize_perm (struct expand_vec_perm_d *d)
44345 {
44346 int i, which, nelt = d->nelt;
44347
44348 for (i = which = 0; i < nelt; ++i)
44349 which |= (d->perm[i] < nelt ? 1 : 2);
44350
44351 d->one_operand_p = true;
44352 switch (which)
44353 {
44354 default:
44355 gcc_unreachable();
44356
44357 case 3:
44358 if (!rtx_equal_p (d->op0, d->op1))
44359 {
44360 d->one_operand_p = false;
44361 break;
44362 }
44363 /* The elements of PERM do not suggest that only the first operand
44364 is used, but both operands are identical. Allow easier matching
44365 of the permutation by folding the permutation into the single
44366 input vector. */
44367 /* FALLTHRU */
44368
44369 case 2:
44370 for (i = 0; i < nelt; ++i)
44371 d->perm[i] &= nelt - 1;
44372 d->op0 = d->op1;
44373 break;
44374
44375 case 1:
44376 d->op1 = d->op0;
44377 break;
44378 }
44379
44380 return (which == 3);
44381 }
44382
44383 bool
44384 ix86_expand_vec_perm_const (rtx operands[4])
44385 {
44386 struct expand_vec_perm_d d;
44387 unsigned char perm[MAX_VECT_LEN];
44388 int i, nelt;
44389 bool two_args;
44390 rtx sel;
44391
44392 d.target = operands[0];
44393 d.op0 = operands[1];
44394 d.op1 = operands[2];
44395 sel = operands[3];
44396
44397 d.vmode = GET_MODE (d.target);
44398 gcc_assert (VECTOR_MODE_P (d.vmode));
44399 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44400 d.testing_p = false;
44401
44402 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
44403 gcc_assert (XVECLEN (sel, 0) == nelt);
44404 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
44405
44406 for (i = 0; i < nelt; ++i)
44407 {
44408 rtx e = XVECEXP (sel, 0, i);
44409 int ei = INTVAL (e) & (2 * nelt - 1);
44410 d.perm[i] = ei;
44411 perm[i] = ei;
44412 }
44413
44414 two_args = canonicalize_perm (&d);
44415
44416 if (ix86_expand_vec_perm_const_1 (&d))
44417 return true;
44418
44419 /* If the selector says both arguments are needed, but the operands are the
44420 same, the above tried to expand with one_operand_p and flattened selector.
44421 If that didn't work, retry without one_operand_p; we succeeded with that
44422 during testing. */
44423 if (two_args && d.one_operand_p)
44424 {
44425 d.one_operand_p = false;
44426 memcpy (d.perm, perm, sizeof (perm));
44427 return ix86_expand_vec_perm_const_1 (&d);
44428 }
44429
44430 return false;
44431 }
44432
44433 /* Implement targetm.vectorize.vec_perm_const_ok. */
44434
44435 static bool
44436 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
44437 const unsigned char *sel)
44438 {
44439 struct expand_vec_perm_d d;
44440 unsigned int i, nelt, which;
44441 bool ret;
44442
44443 d.vmode = vmode;
44444 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44445 d.testing_p = true;
44446
44447 /* Given sufficient ISA support we can just return true here
44448 for selected vector modes. */
44449 if (d.vmode == V16SImode || d.vmode == V16SFmode
44450 || d.vmode == V8DFmode || d.vmode == V8DImode)
44451 /* All implementable with a single vpermi2 insn. */
44452 return true;
44453 if (GET_MODE_SIZE (d.vmode) == 16)
44454 {
44455 /* All implementable with a single vpperm insn. */
44456 if (TARGET_XOP)
44457 return true;
44458 /* All implementable with 2 pshufb + 1 ior. */
44459 if (TARGET_SSSE3)
44460 return true;
44461 /* All implementable with shufpd or unpck[lh]pd. */
44462 if (d.nelt == 2)
44463 return true;
44464 }
44465
44466 /* Extract the values from the vector CST into the permutation
44467 array in D. */
44468 memcpy (d.perm, sel, nelt);
44469 for (i = which = 0; i < nelt; ++i)
44470 {
44471 unsigned char e = d.perm[i];
44472 gcc_assert (e < 2 * nelt);
44473 which |= (e < nelt ? 1 : 2);
44474 }
44475
44476 /* For all elements from second vector, fold the elements to first. */
44477 if (which == 2)
44478 for (i = 0; i < nelt; ++i)
44479 d.perm[i] -= nelt;
44480
44481 /* Check whether the mask can be applied to the vector type. */
44482 d.one_operand_p = (which != 3);
44483
44484 /* Implementable with shufps or pshufd. */
44485 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
44486 return true;
44487
44488 /* Otherwise we have to go through the motions and see if we can
44489 figure out how to generate the requested permutation. */
44490 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
44491 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
44492 if (!d.one_operand_p)
44493 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
44494
44495 start_sequence ();
44496 ret = ix86_expand_vec_perm_const_1 (&d);
44497 end_sequence ();
44498
44499 return ret;
44500 }
44501
44502 void
44503 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
44504 {
44505 struct expand_vec_perm_d d;
44506 unsigned i, nelt;
44507
44508 d.target = targ;
44509 d.op0 = op0;
44510 d.op1 = op1;
44511 d.vmode = GET_MODE (targ);
44512 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44513 d.one_operand_p = false;
44514 d.testing_p = false;
44515
44516 for (i = 0; i < nelt; ++i)
44517 d.perm[i] = i * 2 + odd;
44518
44519 /* We'll either be able to implement the permutation directly... */
44520 if (expand_vec_perm_1 (&d))
44521 return;
44522
44523 /* ... or we use the special-case patterns. */
44524 expand_vec_perm_even_odd_1 (&d, odd);
44525 }
44526
44527 static void
44528 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
44529 {
44530 struct expand_vec_perm_d d;
44531 unsigned i, nelt, base;
44532 bool ok;
44533
44534 d.target = targ;
44535 d.op0 = op0;
44536 d.op1 = op1;
44537 d.vmode = GET_MODE (targ);
44538 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44539 d.one_operand_p = false;
44540 d.testing_p = false;
44541
44542 base = high_p ? nelt / 2 : 0;
44543 for (i = 0; i < nelt / 2; ++i)
44544 {
44545 d.perm[i * 2] = i + base;
44546 d.perm[i * 2 + 1] = i + base + nelt;
44547 }
44548
44549 /* Note that for AVX this isn't one instruction. */
44550 ok = ix86_expand_vec_perm_const_1 (&d);
44551 gcc_assert (ok);
44552 }
44553
44554
44555 /* Expand a vector operation CODE for a V*QImode in terms of the
44556 same operation on V*HImode. */
44557
44558 void
44559 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
44560 {
44561 enum machine_mode qimode = GET_MODE (dest);
44562 enum machine_mode himode;
44563 rtx (*gen_il) (rtx, rtx, rtx);
44564 rtx (*gen_ih) (rtx, rtx, rtx);
44565 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
44566 struct expand_vec_perm_d d;
44567 bool ok, full_interleave;
44568 bool uns_p = false;
44569 int i;
44570
44571 switch (qimode)
44572 {
44573 case V16QImode:
44574 himode = V8HImode;
44575 gen_il = gen_vec_interleave_lowv16qi;
44576 gen_ih = gen_vec_interleave_highv16qi;
44577 break;
44578 case V32QImode:
44579 himode = V16HImode;
44580 gen_il = gen_avx2_interleave_lowv32qi;
44581 gen_ih = gen_avx2_interleave_highv32qi;
44582 break;
44583 default:
44584 gcc_unreachable ();
44585 }
44586
44587 op2_l = op2_h = op2;
44588 switch (code)
44589 {
44590 case MULT:
44591 /* Unpack data such that we've got a source byte in each low byte of
44592 each word. We don't care what goes into the high byte of each word.
44593 Rather than trying to get zero in there, most convenient is to let
44594 it be a copy of the low byte. */
44595 op2_l = gen_reg_rtx (qimode);
44596 op2_h = gen_reg_rtx (qimode);
44597 emit_insn (gen_il (op2_l, op2, op2));
44598 emit_insn (gen_ih (op2_h, op2, op2));
44599 /* FALLTHRU */
44600
44601 op1_l = gen_reg_rtx (qimode);
44602 op1_h = gen_reg_rtx (qimode);
44603 emit_insn (gen_il (op1_l, op1, op1));
44604 emit_insn (gen_ih (op1_h, op1, op1));
44605 full_interleave = qimode == V16QImode;
44606 break;
44607
44608 case ASHIFT:
44609 case LSHIFTRT:
44610 uns_p = true;
44611 /* FALLTHRU */
44612 case ASHIFTRT:
44613 op1_l = gen_reg_rtx (himode);
44614 op1_h = gen_reg_rtx (himode);
44615 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
44616 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
44617 full_interleave = true;
44618 break;
44619 default:
44620 gcc_unreachable ();
44621 }
44622
44623 /* Perform the operation. */
44624 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
44625 1, OPTAB_DIRECT);
44626 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
44627 1, OPTAB_DIRECT);
44628 gcc_assert (res_l && res_h);
44629
44630 /* Merge the data back into the right place. */
44631 d.target = dest;
44632 d.op0 = gen_lowpart (qimode, res_l);
44633 d.op1 = gen_lowpart (qimode, res_h);
44634 d.vmode = qimode;
44635 d.nelt = GET_MODE_NUNITS (qimode);
44636 d.one_operand_p = false;
44637 d.testing_p = false;
44638
44639 if (full_interleave)
44640 {
44641 /* For SSE2, we used an full interleave, so the desired
44642 results are in the even elements. */
44643 for (i = 0; i < 32; ++i)
44644 d.perm[i] = i * 2;
44645 }
44646 else
44647 {
44648 /* For AVX, the interleave used above was not cross-lane. So the
44649 extraction is evens but with the second and third quarter swapped.
44650 Happily, that is even one insn shorter than even extraction. */
44651 for (i = 0; i < 32; ++i)
44652 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
44653 }
44654
44655 ok = ix86_expand_vec_perm_const_1 (&d);
44656 gcc_assert (ok);
44657
44658 set_unique_reg_note (get_last_insn (), REG_EQUAL,
44659 gen_rtx_fmt_ee (code, qimode, op1, op2));
44660 }
44661
44662 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
44663 if op is CONST_VECTOR with all odd elements equal to their
44664 preceding element. */
44665
44666 static bool
44667 const_vector_equal_evenodd_p (rtx op)
44668 {
44669 enum machine_mode mode = GET_MODE (op);
44670 int i, nunits = GET_MODE_NUNITS (mode);
44671 if (GET_CODE (op) != CONST_VECTOR
44672 || nunits != CONST_VECTOR_NUNITS (op))
44673 return false;
44674 for (i = 0; i < nunits; i += 2)
44675 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
44676 return false;
44677 return true;
44678 }
44679
44680 void
44681 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
44682 bool uns_p, bool odd_p)
44683 {
44684 enum machine_mode mode = GET_MODE (op1);
44685 enum machine_mode wmode = GET_MODE (dest);
44686 rtx x;
44687 rtx orig_op1 = op1, orig_op2 = op2;
44688
44689 if (!nonimmediate_operand (op1, mode))
44690 op1 = force_reg (mode, op1);
44691 if (!nonimmediate_operand (op2, mode))
44692 op2 = force_reg (mode, op2);
44693
44694 /* We only play even/odd games with vectors of SImode. */
44695 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
44696
44697 /* If we're looking for the odd results, shift those members down to
44698 the even slots. For some cpus this is faster than a PSHUFD. */
44699 if (odd_p)
44700 {
44701 /* For XOP use vpmacsdqh, but only for smult, as it is only
44702 signed. */
44703 if (TARGET_XOP && mode == V4SImode && !uns_p)
44704 {
44705 x = force_reg (wmode, CONST0_RTX (wmode));
44706 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
44707 return;
44708 }
44709
44710 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
44711 if (!const_vector_equal_evenodd_p (orig_op1))
44712 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
44713 x, NULL, 1, OPTAB_DIRECT);
44714 if (!const_vector_equal_evenodd_p (orig_op2))
44715 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
44716 x, NULL, 1, OPTAB_DIRECT);
44717 op1 = gen_lowpart (mode, op1);
44718 op2 = gen_lowpart (mode, op2);
44719 }
44720
44721 if (mode == V16SImode)
44722 {
44723 if (uns_p)
44724 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
44725 else
44726 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
44727 }
44728 else if (mode == V8SImode)
44729 {
44730 if (uns_p)
44731 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
44732 else
44733 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
44734 }
44735 else if (uns_p)
44736 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
44737 else if (TARGET_SSE4_1)
44738 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
44739 else
44740 {
44741 rtx s1, s2, t0, t1, t2;
44742
44743 /* The easiest way to implement this without PMULDQ is to go through
44744 the motions as if we are performing a full 64-bit multiply. With
44745 the exception that we need to do less shuffling of the elements. */
44746
44747 /* Compute the sign-extension, aka highparts, of the two operands. */
44748 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
44749 op1, pc_rtx, pc_rtx);
44750 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
44751 op2, pc_rtx, pc_rtx);
44752
44753 /* Multiply LO(A) * HI(B), and vice-versa. */
44754 t1 = gen_reg_rtx (wmode);
44755 t2 = gen_reg_rtx (wmode);
44756 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
44757 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
44758
44759 /* Multiply LO(A) * LO(B). */
44760 t0 = gen_reg_rtx (wmode);
44761 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
44762
44763 /* Combine and shift the highparts into place. */
44764 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
44765 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
44766 1, OPTAB_DIRECT);
44767
44768 /* Combine high and low parts. */
44769 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
44770 return;
44771 }
44772 emit_insn (x);
44773 }
44774
44775 void
44776 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
44777 bool uns_p, bool high_p)
44778 {
44779 enum machine_mode wmode = GET_MODE (dest);
44780 enum machine_mode mode = GET_MODE (op1);
44781 rtx t1, t2, t3, t4, mask;
44782
44783 switch (mode)
44784 {
44785 case V4SImode:
44786 t1 = gen_reg_rtx (mode);
44787 t2 = gen_reg_rtx (mode);
44788 if (TARGET_XOP && !uns_p)
44789 {
44790 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
44791 shuffle the elements once so that all elements are in the right
44792 place for immediate use: { A C B D }. */
44793 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
44794 const1_rtx, GEN_INT (3)));
44795 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
44796 const1_rtx, GEN_INT (3)));
44797 }
44798 else
44799 {
44800 /* Put the elements into place for the multiply. */
44801 ix86_expand_vec_interleave (t1, op1, op1, high_p);
44802 ix86_expand_vec_interleave (t2, op2, op2, high_p);
44803 high_p = false;
44804 }
44805 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
44806 break;
44807
44808 case V8SImode:
44809 /* Shuffle the elements between the lanes. After this we
44810 have { A B E F | C D G H } for each operand. */
44811 t1 = gen_reg_rtx (V4DImode);
44812 t2 = gen_reg_rtx (V4DImode);
44813 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
44814 const0_rtx, const2_rtx,
44815 const1_rtx, GEN_INT (3)));
44816 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
44817 const0_rtx, const2_rtx,
44818 const1_rtx, GEN_INT (3)));
44819
44820 /* Shuffle the elements within the lanes. After this we
44821 have { A A B B | C C D D } or { E E F F | G G H H }. */
44822 t3 = gen_reg_rtx (V8SImode);
44823 t4 = gen_reg_rtx (V8SImode);
44824 mask = GEN_INT (high_p
44825 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
44826 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
44827 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
44828 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
44829
44830 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
44831 break;
44832
44833 case V8HImode:
44834 case V16HImode:
44835 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
44836 uns_p, OPTAB_DIRECT);
44837 t2 = expand_binop (mode,
44838 uns_p ? umul_highpart_optab : smul_highpart_optab,
44839 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
44840 gcc_assert (t1 && t2);
44841
44842 t3 = gen_reg_rtx (mode);
44843 ix86_expand_vec_interleave (t3, t1, t2, high_p);
44844 emit_move_insn (dest, gen_lowpart (wmode, t3));
44845 break;
44846
44847 case V16QImode:
44848 case V32QImode:
44849 t1 = gen_reg_rtx (wmode);
44850 t2 = gen_reg_rtx (wmode);
44851 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
44852 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
44853
44854 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
44855 break;
44856
44857 default:
44858 gcc_unreachable ();
44859 }
44860 }
44861
44862 void
44863 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
44864 {
44865 rtx res_1, res_2, res_3, res_4;
44866
44867 res_1 = gen_reg_rtx (V4SImode);
44868 res_2 = gen_reg_rtx (V4SImode);
44869 res_3 = gen_reg_rtx (V2DImode);
44870 res_4 = gen_reg_rtx (V2DImode);
44871 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
44872 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
44873
44874 /* Move the results in element 2 down to element 1; we don't care
44875 what goes in elements 2 and 3. Then we can merge the parts
44876 back together with an interleave.
44877
44878 Note that two other sequences were tried:
44879 (1) Use interleaves at the start instead of psrldq, which allows
44880 us to use a single shufps to merge things back at the end.
44881 (2) Use shufps here to combine the two vectors, then pshufd to
44882 put the elements in the correct order.
44883 In both cases the cost of the reformatting stall was too high
44884 and the overall sequence slower. */
44885
44886 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
44887 const0_rtx, const2_rtx,
44888 const0_rtx, const0_rtx));
44889 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
44890 const0_rtx, const2_rtx,
44891 const0_rtx, const0_rtx));
44892 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
44893
44894 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
44895 }
44896
44897 void
44898 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
44899 {
44900 enum machine_mode mode = GET_MODE (op0);
44901 rtx t1, t2, t3, t4, t5, t6;
44902
44903 if (TARGET_XOP && mode == V2DImode)
44904 {
44905 /* op1: A,B,C,D, op2: E,F,G,H */
44906 op1 = gen_lowpart (V4SImode, op1);
44907 op2 = gen_lowpart (V4SImode, op2);
44908
44909 t1 = gen_reg_rtx (V4SImode);
44910 t2 = gen_reg_rtx (V4SImode);
44911 t3 = gen_reg_rtx (V2DImode);
44912 t4 = gen_reg_rtx (V2DImode);
44913
44914 /* t1: B,A,D,C */
44915 emit_insn (gen_sse2_pshufd_1 (t1, op1,
44916 GEN_INT (1),
44917 GEN_INT (0),
44918 GEN_INT (3),
44919 GEN_INT (2)));
44920
44921 /* t2: (B*E),(A*F),(D*G),(C*H) */
44922 emit_insn (gen_mulv4si3 (t2, t1, op2));
44923
44924 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
44925 emit_insn (gen_xop_phadddq (t3, t2));
44926
44927 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
44928 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
44929
44930 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
44931 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
44932 }
44933 else
44934 {
44935 enum machine_mode nmode;
44936 rtx (*umul) (rtx, rtx, rtx);
44937
44938 if (mode == V2DImode)
44939 {
44940 umul = gen_vec_widen_umult_even_v4si;
44941 nmode = V4SImode;
44942 }
44943 else if (mode == V4DImode)
44944 {
44945 umul = gen_vec_widen_umult_even_v8si;
44946 nmode = V8SImode;
44947 }
44948 else if (mode == V8DImode)
44949 {
44950 umul = gen_vec_widen_umult_even_v16si;
44951 nmode = V16SImode;
44952 }
44953 else
44954 gcc_unreachable ();
44955
44956
44957 /* Multiply low parts. */
44958 t1 = gen_reg_rtx (mode);
44959 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
44960
44961 /* Shift input vectors right 32 bits so we can multiply high parts. */
44962 t6 = GEN_INT (32);
44963 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
44964 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
44965
44966 /* Multiply high parts by low parts. */
44967 t4 = gen_reg_rtx (mode);
44968 t5 = gen_reg_rtx (mode);
44969 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
44970 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
44971
44972 /* Combine and shift the highparts back. */
44973 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
44974 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
44975
44976 /* Combine high and low parts. */
44977 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
44978 }
44979
44980 set_unique_reg_note (get_last_insn (), REG_EQUAL,
44981 gen_rtx_MULT (mode, op1, op2));
44982 }
44983
44984 /* Calculate integer abs() using only SSE2 instructions. */
44985
44986 void
44987 ix86_expand_sse2_abs (rtx target, rtx input)
44988 {
44989 enum machine_mode mode = GET_MODE (target);
44990 rtx tmp0, tmp1, x;
44991
44992 switch (mode)
44993 {
44994 /* For 32-bit signed integer X, the best way to calculate the absolute
44995 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
44996 case V4SImode:
44997 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
44998 GEN_INT (GET_MODE_BITSIZE
44999 (GET_MODE_INNER (mode)) - 1),
45000 NULL, 0, OPTAB_DIRECT);
45001 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
45002 NULL, 0, OPTAB_DIRECT);
45003 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
45004 target, 0, OPTAB_DIRECT);
45005 break;
45006
45007 /* For 16-bit signed integer X, the best way to calculate the absolute
45008 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
45009 case V8HImode:
45010 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45011
45012 x = expand_simple_binop (mode, SMAX, tmp0, input,
45013 target, 0, OPTAB_DIRECT);
45014 break;
45015
45016 /* For 8-bit signed integer X, the best way to calculate the absolute
45017 value of X is min ((unsigned char) X, (unsigned char) (-X)),
45018 as SSE2 provides the PMINUB insn. */
45019 case V16QImode:
45020 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45021
45022 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
45023 target, 0, OPTAB_DIRECT);
45024 break;
45025
45026 default:
45027 gcc_unreachable ();
45028 }
45029
45030 if (x != target)
45031 emit_move_insn (target, x);
45032 }
45033
45034 /* Expand an insert into a vector register through pinsr insn.
45035 Return true if successful. */
45036
45037 bool
45038 ix86_expand_pinsr (rtx *operands)
45039 {
45040 rtx dst = operands[0];
45041 rtx src = operands[3];
45042
45043 unsigned int size = INTVAL (operands[1]);
45044 unsigned int pos = INTVAL (operands[2]);
45045
45046 if (GET_CODE (dst) == SUBREG)
45047 {
45048 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
45049 dst = SUBREG_REG (dst);
45050 }
45051
45052 if (GET_CODE (src) == SUBREG)
45053 src = SUBREG_REG (src);
45054
45055 switch (GET_MODE (dst))
45056 {
45057 case V16QImode:
45058 case V8HImode:
45059 case V4SImode:
45060 case V2DImode:
45061 {
45062 enum machine_mode srcmode, dstmode;
45063 rtx (*pinsr)(rtx, rtx, rtx, rtx);
45064
45065 srcmode = mode_for_size (size, MODE_INT, 0);
45066
45067 switch (srcmode)
45068 {
45069 case QImode:
45070 if (!TARGET_SSE4_1)
45071 return false;
45072 dstmode = V16QImode;
45073 pinsr = gen_sse4_1_pinsrb;
45074 break;
45075
45076 case HImode:
45077 if (!TARGET_SSE2)
45078 return false;
45079 dstmode = V8HImode;
45080 pinsr = gen_sse2_pinsrw;
45081 break;
45082
45083 case SImode:
45084 if (!TARGET_SSE4_1)
45085 return false;
45086 dstmode = V4SImode;
45087 pinsr = gen_sse4_1_pinsrd;
45088 break;
45089
45090 case DImode:
45091 gcc_assert (TARGET_64BIT);
45092 if (!TARGET_SSE4_1)
45093 return false;
45094 dstmode = V2DImode;
45095 pinsr = gen_sse4_1_pinsrq;
45096 break;
45097
45098 default:
45099 return false;
45100 }
45101
45102 rtx d = dst;
45103 if (GET_MODE (dst) != dstmode)
45104 d = gen_reg_rtx (dstmode);
45105 src = gen_lowpart (srcmode, src);
45106
45107 pos /= size;
45108
45109 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
45110 GEN_INT (1 << pos)));
45111 if (d != dst)
45112 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
45113 return true;
45114 }
45115
45116 default:
45117 return false;
45118 }
45119 }
45120 \f
45121 /* This function returns the calling abi specific va_list type node.
45122 It returns the FNDECL specific va_list type. */
45123
45124 static tree
45125 ix86_fn_abi_va_list (tree fndecl)
45126 {
45127 if (!TARGET_64BIT)
45128 return va_list_type_node;
45129 gcc_assert (fndecl != NULL_TREE);
45130
45131 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
45132 return ms_va_list_type_node;
45133 else
45134 return sysv_va_list_type_node;
45135 }
45136
45137 /* Returns the canonical va_list type specified by TYPE. If there
45138 is no valid TYPE provided, it return NULL_TREE. */
45139
45140 static tree
45141 ix86_canonical_va_list_type (tree type)
45142 {
45143 tree wtype, htype;
45144
45145 /* Resolve references and pointers to va_list type. */
45146 if (TREE_CODE (type) == MEM_REF)
45147 type = TREE_TYPE (type);
45148 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
45149 type = TREE_TYPE (type);
45150 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
45151 type = TREE_TYPE (type);
45152
45153 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
45154 {
45155 wtype = va_list_type_node;
45156 gcc_assert (wtype != NULL_TREE);
45157 htype = type;
45158 if (TREE_CODE (wtype) == ARRAY_TYPE)
45159 {
45160 /* If va_list is an array type, the argument may have decayed
45161 to a pointer type, e.g. by being passed to another function.
45162 In that case, unwrap both types so that we can compare the
45163 underlying records. */
45164 if (TREE_CODE (htype) == ARRAY_TYPE
45165 || POINTER_TYPE_P (htype))
45166 {
45167 wtype = TREE_TYPE (wtype);
45168 htype = TREE_TYPE (htype);
45169 }
45170 }
45171 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45172 return va_list_type_node;
45173 wtype = sysv_va_list_type_node;
45174 gcc_assert (wtype != NULL_TREE);
45175 htype = type;
45176 if (TREE_CODE (wtype) == ARRAY_TYPE)
45177 {
45178 /* If va_list is an array type, the argument may have decayed
45179 to a pointer type, e.g. by being passed to another function.
45180 In that case, unwrap both types so that we can compare the
45181 underlying records. */
45182 if (TREE_CODE (htype) == ARRAY_TYPE
45183 || POINTER_TYPE_P (htype))
45184 {
45185 wtype = TREE_TYPE (wtype);
45186 htype = TREE_TYPE (htype);
45187 }
45188 }
45189 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45190 return sysv_va_list_type_node;
45191 wtype = ms_va_list_type_node;
45192 gcc_assert (wtype != NULL_TREE);
45193 htype = type;
45194 if (TREE_CODE (wtype) == ARRAY_TYPE)
45195 {
45196 /* If va_list is an array type, the argument may have decayed
45197 to a pointer type, e.g. by being passed to another function.
45198 In that case, unwrap both types so that we can compare the
45199 underlying records. */
45200 if (TREE_CODE (htype) == ARRAY_TYPE
45201 || POINTER_TYPE_P (htype))
45202 {
45203 wtype = TREE_TYPE (wtype);
45204 htype = TREE_TYPE (htype);
45205 }
45206 }
45207 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45208 return ms_va_list_type_node;
45209 return NULL_TREE;
45210 }
45211 return std_canonical_va_list_type (type);
45212 }
45213
45214 /* Iterate through the target-specific builtin types for va_list.
45215 IDX denotes the iterator, *PTREE is set to the result type of
45216 the va_list builtin, and *PNAME to its internal type.
45217 Returns zero if there is no element for this index, otherwise
45218 IDX should be increased upon the next call.
45219 Note, do not iterate a base builtin's name like __builtin_va_list.
45220 Used from c_common_nodes_and_builtins. */
45221
45222 static int
45223 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
45224 {
45225 if (TARGET_64BIT)
45226 {
45227 switch (idx)
45228 {
45229 default:
45230 break;
45231
45232 case 0:
45233 *ptree = ms_va_list_type_node;
45234 *pname = "__builtin_ms_va_list";
45235 return 1;
45236
45237 case 1:
45238 *ptree = sysv_va_list_type_node;
45239 *pname = "__builtin_sysv_va_list";
45240 return 1;
45241 }
45242 }
45243
45244 return 0;
45245 }
45246
45247 #undef TARGET_SCHED_DISPATCH
45248 #define TARGET_SCHED_DISPATCH has_dispatch
45249 #undef TARGET_SCHED_DISPATCH_DO
45250 #define TARGET_SCHED_DISPATCH_DO do_dispatch
45251 #undef TARGET_SCHED_REASSOCIATION_WIDTH
45252 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
45253 #undef TARGET_SCHED_REORDER
45254 #define TARGET_SCHED_REORDER ix86_sched_reorder
45255 #undef TARGET_SCHED_ADJUST_PRIORITY
45256 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
45257 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
45258 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
45259 ix86_dependencies_evaluation_hook
45260
45261 /* The size of the dispatch window is the total number of bytes of
45262 object code allowed in a window. */
45263 #define DISPATCH_WINDOW_SIZE 16
45264
45265 /* Number of dispatch windows considered for scheduling. */
45266 #define MAX_DISPATCH_WINDOWS 3
45267
45268 /* Maximum number of instructions in a window. */
45269 #define MAX_INSN 4
45270
45271 /* Maximum number of immediate operands in a window. */
45272 #define MAX_IMM 4
45273
45274 /* Maximum number of immediate bits allowed in a window. */
45275 #define MAX_IMM_SIZE 128
45276
45277 /* Maximum number of 32 bit immediates allowed in a window. */
45278 #define MAX_IMM_32 4
45279
45280 /* Maximum number of 64 bit immediates allowed in a window. */
45281 #define MAX_IMM_64 2
45282
45283 /* Maximum total of loads or prefetches allowed in a window. */
45284 #define MAX_LOAD 2
45285
45286 /* Maximum total of stores allowed in a window. */
45287 #define MAX_STORE 1
45288
45289 #undef BIG
45290 #define BIG 100
45291
45292
45293 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
45294 enum dispatch_group {
45295 disp_no_group = 0,
45296 disp_load,
45297 disp_store,
45298 disp_load_store,
45299 disp_prefetch,
45300 disp_imm,
45301 disp_imm_32,
45302 disp_imm_64,
45303 disp_branch,
45304 disp_cmp,
45305 disp_jcc,
45306 disp_last
45307 };
45308
45309 /* Number of allowable groups in a dispatch window. It is an array
45310 indexed by dispatch_group enum. 100 is used as a big number,
45311 because the number of these kind of operations does not have any
45312 effect in dispatch window, but we need them for other reasons in
45313 the table. */
45314 static unsigned int num_allowable_groups[disp_last] = {
45315 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
45316 };
45317
45318 char group_name[disp_last + 1][16] = {
45319 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
45320 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
45321 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
45322 };
45323
45324 /* Instruction path. */
45325 enum insn_path {
45326 no_path = 0,
45327 path_single, /* Single micro op. */
45328 path_double, /* Double micro op. */
45329 path_multi, /* Instructions with more than 2 micro op.. */
45330 last_path
45331 };
45332
45333 /* sched_insn_info defines a window to the instructions scheduled in
45334 the basic block. It contains a pointer to the insn_info table and
45335 the instruction scheduled.
45336
45337 Windows are allocated for each basic block and are linked
45338 together. */
45339 typedef struct sched_insn_info_s {
45340 rtx insn;
45341 enum dispatch_group group;
45342 enum insn_path path;
45343 int byte_len;
45344 int imm_bytes;
45345 } sched_insn_info;
45346
45347 /* Linked list of dispatch windows. This is a two way list of
45348 dispatch windows of a basic block. It contains information about
45349 the number of uops in the window and the total number of
45350 instructions and of bytes in the object code for this dispatch
45351 window. */
45352 typedef struct dispatch_windows_s {
45353 int num_insn; /* Number of insn in the window. */
45354 int num_uops; /* Number of uops in the window. */
45355 int window_size; /* Number of bytes in the window. */
45356 int window_num; /* Window number between 0 or 1. */
45357 int num_imm; /* Number of immediates in an insn. */
45358 int num_imm_32; /* Number of 32 bit immediates in an insn. */
45359 int num_imm_64; /* Number of 64 bit immediates in an insn. */
45360 int imm_size; /* Total immediates in the window. */
45361 int num_loads; /* Total memory loads in the window. */
45362 int num_stores; /* Total memory stores in the window. */
45363 int violation; /* Violation exists in window. */
45364 sched_insn_info *window; /* Pointer to the window. */
45365 struct dispatch_windows_s *next;
45366 struct dispatch_windows_s *prev;
45367 } dispatch_windows;
45368
45369 /* Immediate valuse used in an insn. */
45370 typedef struct imm_info_s
45371 {
45372 int imm;
45373 int imm32;
45374 int imm64;
45375 } imm_info;
45376
45377 static dispatch_windows *dispatch_window_list;
45378 static dispatch_windows *dispatch_window_list1;
45379
45380 /* Get dispatch group of insn. */
45381
45382 static enum dispatch_group
45383 get_mem_group (rtx insn)
45384 {
45385 enum attr_memory memory;
45386
45387 if (INSN_CODE (insn) < 0)
45388 return disp_no_group;
45389 memory = get_attr_memory (insn);
45390 if (memory == MEMORY_STORE)
45391 return disp_store;
45392
45393 if (memory == MEMORY_LOAD)
45394 return disp_load;
45395
45396 if (memory == MEMORY_BOTH)
45397 return disp_load_store;
45398
45399 return disp_no_group;
45400 }
45401
45402 /* Return true if insn is a compare instruction. */
45403
45404 static bool
45405 is_cmp (rtx insn)
45406 {
45407 enum attr_type type;
45408
45409 type = get_attr_type (insn);
45410 return (type == TYPE_TEST
45411 || type == TYPE_ICMP
45412 || type == TYPE_FCMP
45413 || GET_CODE (PATTERN (insn)) == COMPARE);
45414 }
45415
45416 /* Return true if a dispatch violation encountered. */
45417
45418 static bool
45419 dispatch_violation (void)
45420 {
45421 if (dispatch_window_list->next)
45422 return dispatch_window_list->next->violation;
45423 return dispatch_window_list->violation;
45424 }
45425
45426 /* Return true if insn is a branch instruction. */
45427
45428 static bool
45429 is_branch (rtx insn)
45430 {
45431 return (CALL_P (insn) || JUMP_P (insn));
45432 }
45433
45434 /* Return true if insn is a prefetch instruction. */
45435
45436 static bool
45437 is_prefetch (rtx insn)
45438 {
45439 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
45440 }
45441
45442 /* This function initializes a dispatch window and the list container holding a
45443 pointer to the window. */
45444
45445 static void
45446 init_window (int window_num)
45447 {
45448 int i;
45449 dispatch_windows *new_list;
45450
45451 if (window_num == 0)
45452 new_list = dispatch_window_list;
45453 else
45454 new_list = dispatch_window_list1;
45455
45456 new_list->num_insn = 0;
45457 new_list->num_uops = 0;
45458 new_list->window_size = 0;
45459 new_list->next = NULL;
45460 new_list->prev = NULL;
45461 new_list->window_num = window_num;
45462 new_list->num_imm = 0;
45463 new_list->num_imm_32 = 0;
45464 new_list->num_imm_64 = 0;
45465 new_list->imm_size = 0;
45466 new_list->num_loads = 0;
45467 new_list->num_stores = 0;
45468 new_list->violation = false;
45469
45470 for (i = 0; i < MAX_INSN; i++)
45471 {
45472 new_list->window[i].insn = NULL;
45473 new_list->window[i].group = disp_no_group;
45474 new_list->window[i].path = no_path;
45475 new_list->window[i].byte_len = 0;
45476 new_list->window[i].imm_bytes = 0;
45477 }
45478 return;
45479 }
45480
45481 /* This function allocates and initializes a dispatch window and the
45482 list container holding a pointer to the window. */
45483
45484 static dispatch_windows *
45485 allocate_window (void)
45486 {
45487 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
45488 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
45489
45490 return new_list;
45491 }
45492
45493 /* This routine initializes the dispatch scheduling information. It
45494 initiates building dispatch scheduler tables and constructs the
45495 first dispatch window. */
45496
45497 static void
45498 init_dispatch_sched (void)
45499 {
45500 /* Allocate a dispatch list and a window. */
45501 dispatch_window_list = allocate_window ();
45502 dispatch_window_list1 = allocate_window ();
45503 init_window (0);
45504 init_window (1);
45505 }
45506
45507 /* This function returns true if a branch is detected. End of a basic block
45508 does not have to be a branch, but here we assume only branches end a
45509 window. */
45510
45511 static bool
45512 is_end_basic_block (enum dispatch_group group)
45513 {
45514 return group == disp_branch;
45515 }
45516
45517 /* This function is called when the end of a window processing is reached. */
45518
45519 static void
45520 process_end_window (void)
45521 {
45522 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
45523 if (dispatch_window_list->next)
45524 {
45525 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
45526 gcc_assert (dispatch_window_list->window_size
45527 + dispatch_window_list1->window_size <= 48);
45528 init_window (1);
45529 }
45530 init_window (0);
45531 }
45532
45533 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
45534 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
45535 for 48 bytes of instructions. Note that these windows are not dispatch
45536 windows that their sizes are DISPATCH_WINDOW_SIZE. */
45537
45538 static dispatch_windows *
45539 allocate_next_window (int window_num)
45540 {
45541 if (window_num == 0)
45542 {
45543 if (dispatch_window_list->next)
45544 init_window (1);
45545 init_window (0);
45546 return dispatch_window_list;
45547 }
45548
45549 dispatch_window_list->next = dispatch_window_list1;
45550 dispatch_window_list1->prev = dispatch_window_list;
45551
45552 return dispatch_window_list1;
45553 }
45554
45555 /* Increment the number of immediate operands of an instruction. */
45556
45557 static int
45558 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
45559 {
45560 if (*in_rtx == 0)
45561 return 0;
45562
45563 switch ( GET_CODE (*in_rtx))
45564 {
45565 case CONST:
45566 case SYMBOL_REF:
45567 case CONST_INT:
45568 (imm_values->imm)++;
45569 if (x86_64_immediate_operand (*in_rtx, SImode))
45570 (imm_values->imm32)++;
45571 else
45572 (imm_values->imm64)++;
45573 break;
45574
45575 case CONST_DOUBLE:
45576 (imm_values->imm)++;
45577 (imm_values->imm64)++;
45578 break;
45579
45580 case CODE_LABEL:
45581 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
45582 {
45583 (imm_values->imm)++;
45584 (imm_values->imm32)++;
45585 }
45586 break;
45587
45588 default:
45589 break;
45590 }
45591
45592 return 0;
45593 }
45594
45595 /* Compute number of immediate operands of an instruction. */
45596
45597 static void
45598 find_constant (rtx in_rtx, imm_info *imm_values)
45599 {
45600 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
45601 (rtx_function) find_constant_1, (void *) imm_values);
45602 }
45603
45604 /* Return total size of immediate operands of an instruction along with number
45605 of corresponding immediate-operands. It initializes its parameters to zero
45606 befor calling FIND_CONSTANT.
45607 INSN is the input instruction. IMM is the total of immediates.
45608 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
45609 bit immediates. */
45610
45611 static int
45612 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
45613 {
45614 imm_info imm_values = {0, 0, 0};
45615
45616 find_constant (insn, &imm_values);
45617 *imm = imm_values.imm;
45618 *imm32 = imm_values.imm32;
45619 *imm64 = imm_values.imm64;
45620 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
45621 }
45622
45623 /* This function indicates if an operand of an instruction is an
45624 immediate. */
45625
45626 static bool
45627 has_immediate (rtx insn)
45628 {
45629 int num_imm_operand;
45630 int num_imm32_operand;
45631 int num_imm64_operand;
45632
45633 if (insn)
45634 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
45635 &num_imm64_operand);
45636 return false;
45637 }
45638
45639 /* Return single or double path for instructions. */
45640
45641 static enum insn_path
45642 get_insn_path (rtx insn)
45643 {
45644 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
45645
45646 if ((int)path == 0)
45647 return path_single;
45648
45649 if ((int)path == 1)
45650 return path_double;
45651
45652 return path_multi;
45653 }
45654
45655 /* Return insn dispatch group. */
45656
45657 static enum dispatch_group
45658 get_insn_group (rtx insn)
45659 {
45660 enum dispatch_group group = get_mem_group (insn);
45661 if (group)
45662 return group;
45663
45664 if (is_branch (insn))
45665 return disp_branch;
45666
45667 if (is_cmp (insn))
45668 return disp_cmp;
45669
45670 if (has_immediate (insn))
45671 return disp_imm;
45672
45673 if (is_prefetch (insn))
45674 return disp_prefetch;
45675
45676 return disp_no_group;
45677 }
45678
45679 /* Count number of GROUP restricted instructions in a dispatch
45680 window WINDOW_LIST. */
45681
45682 static int
45683 count_num_restricted (rtx insn, dispatch_windows *window_list)
45684 {
45685 enum dispatch_group group = get_insn_group (insn);
45686 int imm_size;
45687 int num_imm_operand;
45688 int num_imm32_operand;
45689 int num_imm64_operand;
45690
45691 if (group == disp_no_group)
45692 return 0;
45693
45694 if (group == disp_imm)
45695 {
45696 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
45697 &num_imm64_operand);
45698 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
45699 || num_imm_operand + window_list->num_imm > MAX_IMM
45700 || (num_imm32_operand > 0
45701 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
45702 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
45703 || (num_imm64_operand > 0
45704 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
45705 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
45706 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
45707 && num_imm64_operand > 0
45708 && ((window_list->num_imm_64 > 0
45709 && window_list->num_insn >= 2)
45710 || window_list->num_insn >= 3)))
45711 return BIG;
45712
45713 return 1;
45714 }
45715
45716 if ((group == disp_load_store
45717 && (window_list->num_loads >= MAX_LOAD
45718 || window_list->num_stores >= MAX_STORE))
45719 || ((group == disp_load
45720 || group == disp_prefetch)
45721 && window_list->num_loads >= MAX_LOAD)
45722 || (group == disp_store
45723 && window_list->num_stores >= MAX_STORE))
45724 return BIG;
45725
45726 return 1;
45727 }
45728
45729 /* This function returns true if insn satisfies dispatch rules on the
45730 last window scheduled. */
45731
45732 static bool
45733 fits_dispatch_window (rtx insn)
45734 {
45735 dispatch_windows *window_list = dispatch_window_list;
45736 dispatch_windows *window_list_next = dispatch_window_list->next;
45737 unsigned int num_restrict;
45738 enum dispatch_group group = get_insn_group (insn);
45739 enum insn_path path = get_insn_path (insn);
45740 int sum;
45741
45742 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
45743 instructions should be given the lowest priority in the
45744 scheduling process in Haifa scheduler to make sure they will be
45745 scheduled in the same dispatch window as the reference to them. */
45746 if (group == disp_jcc || group == disp_cmp)
45747 return false;
45748
45749 /* Check nonrestricted. */
45750 if (group == disp_no_group || group == disp_branch)
45751 return true;
45752
45753 /* Get last dispatch window. */
45754 if (window_list_next)
45755 window_list = window_list_next;
45756
45757 if (window_list->window_num == 1)
45758 {
45759 sum = window_list->prev->window_size + window_list->window_size;
45760
45761 if (sum == 32
45762 || (min_insn_size (insn) + sum) >= 48)
45763 /* Window 1 is full. Go for next window. */
45764 return true;
45765 }
45766
45767 num_restrict = count_num_restricted (insn, window_list);
45768
45769 if (num_restrict > num_allowable_groups[group])
45770 return false;
45771
45772 /* See if it fits in the first window. */
45773 if (window_list->window_num == 0)
45774 {
45775 /* The first widow should have only single and double path
45776 uops. */
45777 if (path == path_double
45778 && (window_list->num_uops + 2) > MAX_INSN)
45779 return false;
45780 else if (path != path_single)
45781 return false;
45782 }
45783 return true;
45784 }
45785
45786 /* Add an instruction INSN with NUM_UOPS micro-operations to the
45787 dispatch window WINDOW_LIST. */
45788
45789 static void
45790 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
45791 {
45792 int byte_len = min_insn_size (insn);
45793 int num_insn = window_list->num_insn;
45794 int imm_size;
45795 sched_insn_info *window = window_list->window;
45796 enum dispatch_group group = get_insn_group (insn);
45797 enum insn_path path = get_insn_path (insn);
45798 int num_imm_operand;
45799 int num_imm32_operand;
45800 int num_imm64_operand;
45801
45802 if (!window_list->violation && group != disp_cmp
45803 && !fits_dispatch_window (insn))
45804 window_list->violation = true;
45805
45806 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
45807 &num_imm64_operand);
45808
45809 /* Initialize window with new instruction. */
45810 window[num_insn].insn = insn;
45811 window[num_insn].byte_len = byte_len;
45812 window[num_insn].group = group;
45813 window[num_insn].path = path;
45814 window[num_insn].imm_bytes = imm_size;
45815
45816 window_list->window_size += byte_len;
45817 window_list->num_insn = num_insn + 1;
45818 window_list->num_uops = window_list->num_uops + num_uops;
45819 window_list->imm_size += imm_size;
45820 window_list->num_imm += num_imm_operand;
45821 window_list->num_imm_32 += num_imm32_operand;
45822 window_list->num_imm_64 += num_imm64_operand;
45823
45824 if (group == disp_store)
45825 window_list->num_stores += 1;
45826 else if (group == disp_load
45827 || group == disp_prefetch)
45828 window_list->num_loads += 1;
45829 else if (group == disp_load_store)
45830 {
45831 window_list->num_stores += 1;
45832 window_list->num_loads += 1;
45833 }
45834 }
45835
45836 /* Adds a scheduled instruction, INSN, to the current dispatch window.
45837 If the total bytes of instructions or the number of instructions in
45838 the window exceed allowable, it allocates a new window. */
45839
45840 static void
45841 add_to_dispatch_window (rtx insn)
45842 {
45843 int byte_len;
45844 dispatch_windows *window_list;
45845 dispatch_windows *next_list;
45846 dispatch_windows *window0_list;
45847 enum insn_path path;
45848 enum dispatch_group insn_group;
45849 bool insn_fits;
45850 int num_insn;
45851 int num_uops;
45852 int window_num;
45853 int insn_num_uops;
45854 int sum;
45855
45856 if (INSN_CODE (insn) < 0)
45857 return;
45858
45859 byte_len = min_insn_size (insn);
45860 window_list = dispatch_window_list;
45861 next_list = window_list->next;
45862 path = get_insn_path (insn);
45863 insn_group = get_insn_group (insn);
45864
45865 /* Get the last dispatch window. */
45866 if (next_list)
45867 window_list = dispatch_window_list->next;
45868
45869 if (path == path_single)
45870 insn_num_uops = 1;
45871 else if (path == path_double)
45872 insn_num_uops = 2;
45873 else
45874 insn_num_uops = (int) path;
45875
45876 /* If current window is full, get a new window.
45877 Window number zero is full, if MAX_INSN uops are scheduled in it.
45878 Window number one is full, if window zero's bytes plus window
45879 one's bytes is 32, or if the bytes of the new instruction added
45880 to the total makes it greater than 48, or it has already MAX_INSN
45881 instructions in it. */
45882 num_insn = window_list->num_insn;
45883 num_uops = window_list->num_uops;
45884 window_num = window_list->window_num;
45885 insn_fits = fits_dispatch_window (insn);
45886
45887 if (num_insn >= MAX_INSN
45888 || num_uops + insn_num_uops > MAX_INSN
45889 || !(insn_fits))
45890 {
45891 window_num = ~window_num & 1;
45892 window_list = allocate_next_window (window_num);
45893 }
45894
45895 if (window_num == 0)
45896 {
45897 add_insn_window (insn, window_list, insn_num_uops);
45898 if (window_list->num_insn >= MAX_INSN
45899 && insn_group == disp_branch)
45900 {
45901 process_end_window ();
45902 return;
45903 }
45904 }
45905 else if (window_num == 1)
45906 {
45907 window0_list = window_list->prev;
45908 sum = window0_list->window_size + window_list->window_size;
45909 if (sum == 32
45910 || (byte_len + sum) >= 48)
45911 {
45912 process_end_window ();
45913 window_list = dispatch_window_list;
45914 }
45915
45916 add_insn_window (insn, window_list, insn_num_uops);
45917 }
45918 else
45919 gcc_unreachable ();
45920
45921 if (is_end_basic_block (insn_group))
45922 {
45923 /* End of basic block is reached do end-basic-block process. */
45924 process_end_window ();
45925 return;
45926 }
45927 }
45928
45929 /* Print the dispatch window, WINDOW_NUM, to FILE. */
45930
45931 DEBUG_FUNCTION static void
45932 debug_dispatch_window_file (FILE *file, int window_num)
45933 {
45934 dispatch_windows *list;
45935 int i;
45936
45937 if (window_num == 0)
45938 list = dispatch_window_list;
45939 else
45940 list = dispatch_window_list1;
45941
45942 fprintf (file, "Window #%d:\n", list->window_num);
45943 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
45944 list->num_insn, list->num_uops, list->window_size);
45945 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
45946 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
45947
45948 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
45949 list->num_stores);
45950 fprintf (file, " insn info:\n");
45951
45952 for (i = 0; i < MAX_INSN; i++)
45953 {
45954 if (!list->window[i].insn)
45955 break;
45956 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
45957 i, group_name[list->window[i].group],
45958 i, (void *)list->window[i].insn,
45959 i, list->window[i].path,
45960 i, list->window[i].byte_len,
45961 i, list->window[i].imm_bytes);
45962 }
45963 }
45964
45965 /* Print to stdout a dispatch window. */
45966
45967 DEBUG_FUNCTION void
45968 debug_dispatch_window (int window_num)
45969 {
45970 debug_dispatch_window_file (stdout, window_num);
45971 }
45972
45973 /* Print INSN dispatch information to FILE. */
45974
45975 DEBUG_FUNCTION static void
45976 debug_insn_dispatch_info_file (FILE *file, rtx insn)
45977 {
45978 int byte_len;
45979 enum insn_path path;
45980 enum dispatch_group group;
45981 int imm_size;
45982 int num_imm_operand;
45983 int num_imm32_operand;
45984 int num_imm64_operand;
45985
45986 if (INSN_CODE (insn) < 0)
45987 return;
45988
45989 byte_len = min_insn_size (insn);
45990 path = get_insn_path (insn);
45991 group = get_insn_group (insn);
45992 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
45993 &num_imm64_operand);
45994
45995 fprintf (file, " insn info:\n");
45996 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
45997 group_name[group], path, byte_len);
45998 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
45999 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
46000 }
46001
46002 /* Print to STDERR the status of the ready list with respect to
46003 dispatch windows. */
46004
46005 DEBUG_FUNCTION void
46006 debug_ready_dispatch (void)
46007 {
46008 int i;
46009 int no_ready = number_in_ready ();
46010
46011 fprintf (stdout, "Number of ready: %d\n", no_ready);
46012
46013 for (i = 0; i < no_ready; i++)
46014 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
46015 }
46016
46017 /* This routine is the driver of the dispatch scheduler. */
46018
46019 static void
46020 do_dispatch (rtx insn, int mode)
46021 {
46022 if (mode == DISPATCH_INIT)
46023 init_dispatch_sched ();
46024 else if (mode == ADD_TO_DISPATCH_WINDOW)
46025 add_to_dispatch_window (insn);
46026 }
46027
46028 /* Return TRUE if Dispatch Scheduling is supported. */
46029
46030 static bool
46031 has_dispatch (rtx insn, int action)
46032 {
46033 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
46034 && flag_dispatch_scheduler)
46035 switch (action)
46036 {
46037 default:
46038 return false;
46039
46040 case IS_DISPATCH_ON:
46041 return true;
46042 break;
46043
46044 case IS_CMP:
46045 return is_cmp (insn);
46046
46047 case DISPATCH_VIOLATION:
46048 return dispatch_violation ();
46049
46050 case FITS_DISPATCH_WINDOW:
46051 return fits_dispatch_window (insn);
46052 }
46053
46054 return false;
46055 }
46056
46057 /* Implementation of reassociation_width target hook used by
46058 reassoc phase to identify parallelism level in reassociated
46059 tree. Statements tree_code is passed in OPC. Arguments type
46060 is passed in MODE.
46061
46062 Currently parallel reassociation is enabled for Atom
46063 processors only and we set reassociation width to be 2
46064 because Atom may issue up to 2 instructions per cycle.
46065
46066 Return value should be fixed if parallel reassociation is
46067 enabled for other processors. */
46068
46069 static int
46070 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
46071 enum machine_mode mode)
46072 {
46073 int res = 1;
46074
46075 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
46076 res = 2;
46077 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
46078 res = 2;
46079
46080 return res;
46081 }
46082
46083 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
46084 place emms and femms instructions. */
46085
46086 static enum machine_mode
46087 ix86_preferred_simd_mode (enum machine_mode mode)
46088 {
46089 if (!TARGET_SSE)
46090 return word_mode;
46091
46092 switch (mode)
46093 {
46094 case QImode:
46095 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
46096 case HImode:
46097 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
46098 case SImode:
46099 return TARGET_AVX512F ? V16SImode :
46100 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
46101 case DImode:
46102 return TARGET_AVX512F ? V8DImode :
46103 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
46104
46105 case SFmode:
46106 if (TARGET_AVX512F)
46107 return V16SFmode;
46108 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46109 return V8SFmode;
46110 else
46111 return V4SFmode;
46112
46113 case DFmode:
46114 if (!TARGET_VECTORIZE_DOUBLE)
46115 return word_mode;
46116 else if (TARGET_AVX512F)
46117 return V8DFmode;
46118 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46119 return V4DFmode;
46120 else if (TARGET_SSE2)
46121 return V2DFmode;
46122 /* FALLTHRU */
46123
46124 default:
46125 return word_mode;
46126 }
46127 }
46128
46129 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
46130 vectors. If AVX512F is enabled then try vectorizing with 512bit,
46131 256bit and 128bit vectors. */
46132
46133 static unsigned int
46134 ix86_autovectorize_vector_sizes (void)
46135 {
46136 return TARGET_AVX512F ? 64 | 32 | 16 :
46137 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
46138 }
46139
46140 \f
46141
46142 /* Return class of registers which could be used for pseudo of MODE
46143 and of class RCLASS for spilling instead of memory. Return NO_REGS
46144 if it is not possible or non-profitable. */
46145 static reg_class_t
46146 ix86_spill_class (reg_class_t rclass, enum machine_mode mode)
46147 {
46148 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
46149 && (mode == SImode || (TARGET_64BIT && mode == DImode))
46150 && INTEGER_CLASS_P (rclass))
46151 return ALL_SSE_REGS;
46152 return NO_REGS;
46153 }
46154
46155 /* Implement targetm.vectorize.init_cost. */
46156
46157 static void *
46158 ix86_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
46159 {
46160 unsigned *cost = XNEWVEC (unsigned, 3);
46161 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
46162 return cost;
46163 }
46164
46165 /* Implement targetm.vectorize.add_stmt_cost. */
46166
46167 static unsigned
46168 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
46169 struct _stmt_vec_info *stmt_info, int misalign,
46170 enum vect_cost_model_location where)
46171 {
46172 unsigned *cost = (unsigned *) data;
46173 unsigned retval = 0;
46174
46175 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
46176 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
46177
46178 /* Statements in an inner loop relative to the loop being
46179 vectorized are weighted more heavily. The value here is
46180 arbitrary and could potentially be improved with analysis. */
46181 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
46182 count *= 50; /* FIXME. */
46183
46184 retval = (unsigned) (count * stmt_cost);
46185 cost[where] += retval;
46186
46187 return retval;
46188 }
46189
46190 /* Implement targetm.vectorize.finish_cost. */
46191
46192 static void
46193 ix86_finish_cost (void *data, unsigned *prologue_cost,
46194 unsigned *body_cost, unsigned *epilogue_cost)
46195 {
46196 unsigned *cost = (unsigned *) data;
46197 *prologue_cost = cost[vect_prologue];
46198 *body_cost = cost[vect_body];
46199 *epilogue_cost = cost[vect_epilogue];
46200 }
46201
46202 /* Implement targetm.vectorize.destroy_cost_data. */
46203
46204 static void
46205 ix86_destroy_cost_data (void *data)
46206 {
46207 free (data);
46208 }
46209
46210 /* Validate target specific memory model bits in VAL. */
46211
46212 static unsigned HOST_WIDE_INT
46213 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
46214 {
46215 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
46216 bool strong;
46217
46218 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
46219 |MEMMODEL_MASK)
46220 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
46221 {
46222 warning (OPT_Winvalid_memory_model,
46223 "Unknown architecture specific memory model");
46224 return MEMMODEL_SEQ_CST;
46225 }
46226 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
46227 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
46228 {
46229 warning (OPT_Winvalid_memory_model,
46230 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
46231 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
46232 }
46233 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
46234 {
46235 warning (OPT_Winvalid_memory_model,
46236 "HLE_RELEASE not used with RELEASE or stronger memory model");
46237 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
46238 }
46239 return val;
46240 }
46241
46242 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
46243 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
46244 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
46245 or number of vecsize_mangle variants that should be emitted. */
46246
46247 static int
46248 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
46249 struct cgraph_simd_clone *clonei,
46250 tree base_type, int num)
46251 {
46252 int ret = 1;
46253
46254 if (clonei->simdlen
46255 && (clonei->simdlen < 2
46256 || clonei->simdlen > 16
46257 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
46258 {
46259 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46260 "unsupported simdlen %d", clonei->simdlen);
46261 return 0;
46262 }
46263
46264 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
46265 if (TREE_CODE (ret_type) != VOID_TYPE)
46266 switch (TYPE_MODE (ret_type))
46267 {
46268 case QImode:
46269 case HImode:
46270 case SImode:
46271 case DImode:
46272 case SFmode:
46273 case DFmode:
46274 /* case SCmode: */
46275 /* case DCmode: */
46276 break;
46277 default:
46278 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46279 "unsupported return type %qT for simd\n", ret_type);
46280 return 0;
46281 }
46282
46283 tree t;
46284 int i;
46285
46286 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
46287 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
46288 switch (TYPE_MODE (TREE_TYPE (t)))
46289 {
46290 case QImode:
46291 case HImode:
46292 case SImode:
46293 case DImode:
46294 case SFmode:
46295 case DFmode:
46296 /* case SCmode: */
46297 /* case DCmode: */
46298 break;
46299 default:
46300 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46301 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
46302 return 0;
46303 }
46304
46305 if (clonei->cilk_elemental)
46306 {
46307 /* Parse here processor clause. If not present, default to 'b'. */
46308 clonei->vecsize_mangle = 'b';
46309 }
46310 else if (!TREE_PUBLIC (node->decl))
46311 {
46312 /* If the function isn't exported, we can pick up just one ISA
46313 for the clones. */
46314 if (TARGET_AVX2)
46315 clonei->vecsize_mangle = 'd';
46316 else if (TARGET_AVX)
46317 clonei->vecsize_mangle = 'c';
46318 else
46319 clonei->vecsize_mangle = 'b';
46320 ret = 1;
46321 }
46322 else
46323 {
46324 clonei->vecsize_mangle = "bcd"[num];
46325 ret = 3;
46326 }
46327 switch (clonei->vecsize_mangle)
46328 {
46329 case 'b':
46330 clonei->vecsize_int = 128;
46331 clonei->vecsize_float = 128;
46332 break;
46333 case 'c':
46334 clonei->vecsize_int = 128;
46335 clonei->vecsize_float = 256;
46336 break;
46337 case 'd':
46338 clonei->vecsize_int = 256;
46339 clonei->vecsize_float = 256;
46340 break;
46341 }
46342 if (clonei->simdlen == 0)
46343 {
46344 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
46345 clonei->simdlen = clonei->vecsize_int;
46346 else
46347 clonei->simdlen = clonei->vecsize_float;
46348 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
46349 if (clonei->simdlen > 16)
46350 clonei->simdlen = 16;
46351 }
46352 return ret;
46353 }
46354
46355 /* Add target attribute to SIMD clone NODE if needed. */
46356
46357 static void
46358 ix86_simd_clone_adjust (struct cgraph_node *node)
46359 {
46360 const char *str = NULL;
46361 gcc_assert (node->decl == cfun->decl);
46362 switch (node->simdclone->vecsize_mangle)
46363 {
46364 case 'b':
46365 if (!TARGET_SSE2)
46366 str = "sse2";
46367 break;
46368 case 'c':
46369 if (!TARGET_AVX)
46370 str = "avx";
46371 break;
46372 case 'd':
46373 if (!TARGET_AVX2)
46374 str = "avx2";
46375 break;
46376 default:
46377 gcc_unreachable ();
46378 }
46379 if (str == NULL)
46380 return;
46381 push_cfun (NULL);
46382 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
46383 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
46384 gcc_assert (ok);
46385 pop_cfun ();
46386 ix86_previous_fndecl = NULL_TREE;
46387 ix86_set_current_function (node->decl);
46388 }
46389
46390 /* If SIMD clone NODE can't be used in a vectorized loop
46391 in current function, return -1, otherwise return a badness of using it
46392 (0 if it is most desirable from vecsize_mangle point of view, 1
46393 slightly less desirable, etc.). */
46394
46395 static int
46396 ix86_simd_clone_usable (struct cgraph_node *node)
46397 {
46398 switch (node->simdclone->vecsize_mangle)
46399 {
46400 case 'b':
46401 if (!TARGET_SSE2)
46402 return -1;
46403 if (!TARGET_AVX)
46404 return 0;
46405 return TARGET_AVX2 ? 2 : 1;
46406 case 'c':
46407 if (!TARGET_AVX)
46408 return -1;
46409 return TARGET_AVX2 ? 1 : 0;
46410 break;
46411 case 'd':
46412 if (!TARGET_AVX2)
46413 return -1;
46414 return 0;
46415 default:
46416 gcc_unreachable ();
46417 }
46418 }
46419
46420 /* This function gives out the number of memory references.
46421 This value determines the unrolling factor for
46422 bdver3 and bdver4 architectures. */
46423
46424 static int
46425 ix86_loop_memcount (rtx *x, unsigned *mem_count)
46426 {
46427 if (*x != NULL_RTX && MEM_P (*x))
46428 {
46429 enum machine_mode mode;
46430 unsigned int n_words;
46431
46432 mode = GET_MODE (*x);
46433 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
46434
46435 if (n_words > 4)
46436 (*mem_count)+=2;
46437 else
46438 (*mem_count)+=1;
46439 }
46440 return 0;
46441 }
46442
46443 /* This function adjusts the unroll factor based on
46444 the hardware capabilities. For ex, bdver3 has
46445 a loop buffer which makes unrolling of smaller
46446 loops less important. This function decides the
46447 unroll factor using number of memory references
46448 (value 32 is used) as a heuristic. */
46449
46450 static unsigned
46451 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
46452 {
46453 basic_block *bbs;
46454 rtx insn;
46455 unsigned i;
46456 unsigned mem_count = 0;
46457
46458 if (!TARGET_ADJUST_UNROLL)
46459 return nunroll;
46460
46461 /* Count the number of memory references within the loop body. */
46462 bbs = get_loop_body (loop);
46463 for (i = 0; i < loop->num_nodes; i++)
46464 {
46465 for (insn = BB_HEAD (bbs[i]); insn != BB_END (bbs[i]); insn = NEXT_INSN (insn))
46466 if (NONDEBUG_INSN_P (insn))
46467 for_each_rtx (&insn, (rtx_function) ix86_loop_memcount, &mem_count);
46468 }
46469 free (bbs);
46470
46471 if (mem_count && mem_count <=32)
46472 return 32/mem_count;
46473
46474 return nunroll;
46475 }
46476
46477
46478 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
46479
46480 static bool
46481 ix86_float_exceptions_rounding_supported_p (void)
46482 {
46483 /* For x87 floating point with standard excess precision handling,
46484 there is no adddf3 pattern (since x87 floating point only has
46485 XFmode operations) so the default hook implementation gets this
46486 wrong. */
46487 return TARGET_80387 || TARGET_SSE_MATH;
46488 }
46489
46490 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
46491
46492 static void
46493 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
46494 {
46495 if (!TARGET_80387 && !TARGET_SSE_MATH)
46496 return;
46497 tree exceptions_var = create_tmp_var (integer_type_node, NULL);
46498 if (TARGET_80387)
46499 {
46500 tree fenv_index_type = build_index_type (size_int (6));
46501 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
46502 tree fenv_var = create_tmp_var (fenv_type, NULL);
46503 mark_addressable (fenv_var);
46504 tree fenv_ptr = build_pointer_type (fenv_type);
46505 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
46506 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
46507 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
46508 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
46509 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
46510 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
46511 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
46512 tree hold_fnclex = build_call_expr (fnclex, 0);
46513 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
46514 hold_fnclex);
46515 *clear = build_call_expr (fnclex, 0);
46516 tree sw_var = create_tmp_var (short_unsigned_type_node, NULL);
46517 mark_addressable (sw_var);
46518 tree su_ptr = build_pointer_type (short_unsigned_type_node);
46519 tree sw_addr = build1 (ADDR_EXPR, su_ptr, sw_var);
46520 tree fnstsw_call = build_call_expr (fnstsw, 1, sw_addr);
46521 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
46522 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
46523 exceptions_var, exceptions_x87);
46524 *update = build2 (COMPOUND_EXPR, integer_type_node,
46525 fnstsw_call, update_mod);
46526 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
46527 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
46528 }
46529 if (TARGET_SSE_MATH)
46530 {
46531 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node, NULL);
46532 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node, NULL);
46533 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
46534 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
46535 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
46536 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
46537 mxcsr_orig_var, stmxcsr_hold_call);
46538 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
46539 mxcsr_orig_var,
46540 build_int_cst (unsigned_type_node, 0x1f80));
46541 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
46542 build_int_cst (unsigned_type_node, 0xffffffc0));
46543 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
46544 mxcsr_mod_var, hold_mod_val);
46545 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
46546 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
46547 hold_assign_orig, hold_assign_mod);
46548 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
46549 ldmxcsr_hold_call);
46550 if (*hold)
46551 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
46552 else
46553 *hold = hold_all;
46554 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
46555 if (*clear)
46556 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
46557 ldmxcsr_clear_call);
46558 else
46559 *clear = ldmxcsr_clear_call;
46560 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
46561 tree exceptions_sse = fold_convert (integer_type_node,
46562 stxmcsr_update_call);
46563 if (*update)
46564 {
46565 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
46566 exceptions_var, exceptions_sse);
46567 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
46568 exceptions_var, exceptions_mod);
46569 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
46570 exceptions_assign);
46571 }
46572 else
46573 *update = build2 (MODIFY_EXPR, integer_type_node,
46574 exceptions_var, exceptions_sse);
46575 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
46576 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
46577 ldmxcsr_update_call);
46578 }
46579 tree atomic_feraiseexcept
46580 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
46581 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
46582 1, exceptions_var);
46583 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
46584 atomic_feraiseexcept_call);
46585 }
46586
46587 /* Initialize the GCC target structure. */
46588 #undef TARGET_RETURN_IN_MEMORY
46589 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
46590
46591 #undef TARGET_LEGITIMIZE_ADDRESS
46592 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
46593
46594 #undef TARGET_ATTRIBUTE_TABLE
46595 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
46596 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
46597 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
46598 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46599 # undef TARGET_MERGE_DECL_ATTRIBUTES
46600 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
46601 #endif
46602
46603 #undef TARGET_COMP_TYPE_ATTRIBUTES
46604 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
46605
46606 #undef TARGET_INIT_BUILTINS
46607 #define TARGET_INIT_BUILTINS ix86_init_builtins
46608 #undef TARGET_BUILTIN_DECL
46609 #define TARGET_BUILTIN_DECL ix86_builtin_decl
46610 #undef TARGET_EXPAND_BUILTIN
46611 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
46612
46613 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
46614 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
46615 ix86_builtin_vectorized_function
46616
46617 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
46618 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
46619
46620 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
46621 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
46622
46623 #undef TARGET_VECTORIZE_BUILTIN_GATHER
46624 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
46625
46626 #undef TARGET_BUILTIN_RECIPROCAL
46627 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
46628
46629 #undef TARGET_ASM_FUNCTION_EPILOGUE
46630 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
46631
46632 #undef TARGET_ENCODE_SECTION_INFO
46633 #ifndef SUBTARGET_ENCODE_SECTION_INFO
46634 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
46635 #else
46636 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
46637 #endif
46638
46639 #undef TARGET_ASM_OPEN_PAREN
46640 #define TARGET_ASM_OPEN_PAREN ""
46641 #undef TARGET_ASM_CLOSE_PAREN
46642 #define TARGET_ASM_CLOSE_PAREN ""
46643
46644 #undef TARGET_ASM_BYTE_OP
46645 #define TARGET_ASM_BYTE_OP ASM_BYTE
46646
46647 #undef TARGET_ASM_ALIGNED_HI_OP
46648 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
46649 #undef TARGET_ASM_ALIGNED_SI_OP
46650 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
46651 #ifdef ASM_QUAD
46652 #undef TARGET_ASM_ALIGNED_DI_OP
46653 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
46654 #endif
46655
46656 #undef TARGET_PROFILE_BEFORE_PROLOGUE
46657 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
46658
46659 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
46660 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
46661
46662 #undef TARGET_ASM_UNALIGNED_HI_OP
46663 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
46664 #undef TARGET_ASM_UNALIGNED_SI_OP
46665 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
46666 #undef TARGET_ASM_UNALIGNED_DI_OP
46667 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
46668
46669 #undef TARGET_PRINT_OPERAND
46670 #define TARGET_PRINT_OPERAND ix86_print_operand
46671 #undef TARGET_PRINT_OPERAND_ADDRESS
46672 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
46673 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
46674 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
46675 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
46676 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
46677
46678 #undef TARGET_SCHED_INIT_GLOBAL
46679 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
46680 #undef TARGET_SCHED_ADJUST_COST
46681 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
46682 #undef TARGET_SCHED_ISSUE_RATE
46683 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
46684 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
46685 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
46686 ia32_multipass_dfa_lookahead
46687 #undef TARGET_SCHED_MACRO_FUSION_P
46688 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
46689 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
46690 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
46691
46692 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
46693 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
46694
46695 #undef TARGET_MEMMODEL_CHECK
46696 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
46697
46698 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
46699 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
46700
46701 #ifdef HAVE_AS_TLS
46702 #undef TARGET_HAVE_TLS
46703 #define TARGET_HAVE_TLS true
46704 #endif
46705 #undef TARGET_CANNOT_FORCE_CONST_MEM
46706 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
46707 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
46708 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
46709
46710 #undef TARGET_DELEGITIMIZE_ADDRESS
46711 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
46712
46713 #undef TARGET_MS_BITFIELD_LAYOUT_P
46714 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
46715
46716 #if TARGET_MACHO
46717 #undef TARGET_BINDS_LOCAL_P
46718 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
46719 #endif
46720 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46721 #undef TARGET_BINDS_LOCAL_P
46722 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
46723 #endif
46724
46725 #undef TARGET_ASM_OUTPUT_MI_THUNK
46726 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
46727 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
46728 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
46729
46730 #undef TARGET_ASM_FILE_START
46731 #define TARGET_ASM_FILE_START x86_file_start
46732
46733 #undef TARGET_OPTION_OVERRIDE
46734 #define TARGET_OPTION_OVERRIDE ix86_option_override
46735
46736 #undef TARGET_REGISTER_MOVE_COST
46737 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
46738 #undef TARGET_MEMORY_MOVE_COST
46739 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
46740 #undef TARGET_RTX_COSTS
46741 #define TARGET_RTX_COSTS ix86_rtx_costs
46742 #undef TARGET_ADDRESS_COST
46743 #define TARGET_ADDRESS_COST ix86_address_cost
46744
46745 #undef TARGET_FIXED_CONDITION_CODE_REGS
46746 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
46747 #undef TARGET_CC_MODES_COMPATIBLE
46748 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
46749
46750 #undef TARGET_MACHINE_DEPENDENT_REORG
46751 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
46752
46753 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
46754 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
46755
46756 #undef TARGET_BUILD_BUILTIN_VA_LIST
46757 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
46758
46759 #undef TARGET_FOLD_BUILTIN
46760 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
46761
46762 #undef TARGET_COMPARE_VERSION_PRIORITY
46763 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
46764
46765 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
46766 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
46767 ix86_generate_version_dispatcher_body
46768
46769 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
46770 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
46771 ix86_get_function_versions_dispatcher
46772
46773 #undef TARGET_ENUM_VA_LIST_P
46774 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
46775
46776 #undef TARGET_FN_ABI_VA_LIST
46777 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
46778
46779 #undef TARGET_CANONICAL_VA_LIST_TYPE
46780 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
46781
46782 #undef TARGET_EXPAND_BUILTIN_VA_START
46783 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
46784
46785 #undef TARGET_MD_ASM_CLOBBERS
46786 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
46787
46788 #undef TARGET_PROMOTE_PROTOTYPES
46789 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
46790 #undef TARGET_SETUP_INCOMING_VARARGS
46791 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
46792 #undef TARGET_MUST_PASS_IN_STACK
46793 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
46794 #undef TARGET_FUNCTION_ARG_ADVANCE
46795 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
46796 #undef TARGET_FUNCTION_ARG
46797 #define TARGET_FUNCTION_ARG ix86_function_arg
46798 #undef TARGET_FUNCTION_ARG_BOUNDARY
46799 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
46800 #undef TARGET_PASS_BY_REFERENCE
46801 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
46802 #undef TARGET_INTERNAL_ARG_POINTER
46803 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
46804 #undef TARGET_UPDATE_STACK_BOUNDARY
46805 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
46806 #undef TARGET_GET_DRAP_RTX
46807 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
46808 #undef TARGET_STRICT_ARGUMENT_NAMING
46809 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
46810 #undef TARGET_STATIC_CHAIN
46811 #define TARGET_STATIC_CHAIN ix86_static_chain
46812 #undef TARGET_TRAMPOLINE_INIT
46813 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
46814 #undef TARGET_RETURN_POPS_ARGS
46815 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
46816
46817 #undef TARGET_LEGITIMATE_COMBINED_INSN
46818 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
46819
46820 #undef TARGET_ASAN_SHADOW_OFFSET
46821 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
46822
46823 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
46824 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
46825
46826 #undef TARGET_SCALAR_MODE_SUPPORTED_P
46827 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
46828
46829 #undef TARGET_VECTOR_MODE_SUPPORTED_P
46830 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
46831
46832 #undef TARGET_C_MODE_FOR_SUFFIX
46833 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
46834
46835 #ifdef HAVE_AS_TLS
46836 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
46837 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
46838 #endif
46839
46840 #ifdef SUBTARGET_INSERT_ATTRIBUTES
46841 #undef TARGET_INSERT_ATTRIBUTES
46842 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
46843 #endif
46844
46845 #undef TARGET_MANGLE_TYPE
46846 #define TARGET_MANGLE_TYPE ix86_mangle_type
46847
46848 #if !TARGET_MACHO
46849 #undef TARGET_STACK_PROTECT_FAIL
46850 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
46851 #endif
46852
46853 #undef TARGET_FUNCTION_VALUE
46854 #define TARGET_FUNCTION_VALUE ix86_function_value
46855
46856 #undef TARGET_FUNCTION_VALUE_REGNO_P
46857 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
46858
46859 #undef TARGET_PROMOTE_FUNCTION_MODE
46860 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
46861
46862 #undef TARGET_MEMBER_TYPE_FORCES_BLK
46863 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
46864
46865 #undef TARGET_INSTANTIATE_DECLS
46866 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
46867
46868 #undef TARGET_SECONDARY_RELOAD
46869 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
46870
46871 #undef TARGET_CLASS_MAX_NREGS
46872 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
46873
46874 #undef TARGET_PREFERRED_RELOAD_CLASS
46875 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
46876 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
46877 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
46878 #undef TARGET_CLASS_LIKELY_SPILLED_P
46879 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
46880
46881 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
46882 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
46883 ix86_builtin_vectorization_cost
46884 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
46885 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
46886 ix86_vectorize_vec_perm_const_ok
46887 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
46888 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
46889 ix86_preferred_simd_mode
46890 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
46891 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
46892 ix86_autovectorize_vector_sizes
46893 #undef TARGET_VECTORIZE_INIT_COST
46894 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
46895 #undef TARGET_VECTORIZE_ADD_STMT_COST
46896 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
46897 #undef TARGET_VECTORIZE_FINISH_COST
46898 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
46899 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
46900 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
46901
46902 #undef TARGET_SET_CURRENT_FUNCTION
46903 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
46904
46905 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
46906 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
46907
46908 #undef TARGET_OPTION_SAVE
46909 #define TARGET_OPTION_SAVE ix86_function_specific_save
46910
46911 #undef TARGET_OPTION_RESTORE
46912 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
46913
46914 #undef TARGET_OPTION_PRINT
46915 #define TARGET_OPTION_PRINT ix86_function_specific_print
46916
46917 #undef TARGET_OPTION_FUNCTION_VERSIONS
46918 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
46919
46920 #undef TARGET_CAN_INLINE_P
46921 #define TARGET_CAN_INLINE_P ix86_can_inline_p
46922
46923 #undef TARGET_EXPAND_TO_RTL_HOOK
46924 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
46925
46926 #undef TARGET_LEGITIMATE_ADDRESS_P
46927 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
46928
46929 #undef TARGET_LRA_P
46930 #define TARGET_LRA_P hook_bool_void_true
46931
46932 #undef TARGET_REGISTER_PRIORITY
46933 #define TARGET_REGISTER_PRIORITY ix86_register_priority
46934
46935 #undef TARGET_REGISTER_USAGE_LEVELING_P
46936 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
46937
46938 #undef TARGET_LEGITIMATE_CONSTANT_P
46939 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
46940
46941 #undef TARGET_FRAME_POINTER_REQUIRED
46942 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
46943
46944 #undef TARGET_CAN_ELIMINATE
46945 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
46946
46947 #undef TARGET_EXTRA_LIVE_ON_ENTRY
46948 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
46949
46950 #undef TARGET_ASM_CODE_END
46951 #define TARGET_ASM_CODE_END ix86_code_end
46952
46953 #undef TARGET_CONDITIONAL_REGISTER_USAGE
46954 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
46955
46956 #if TARGET_MACHO
46957 #undef TARGET_INIT_LIBFUNCS
46958 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
46959 #endif
46960
46961 #undef TARGET_LOOP_UNROLL_ADJUST
46962 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
46963
46964 #undef TARGET_SPILL_CLASS
46965 #define TARGET_SPILL_CLASS ix86_spill_class
46966
46967 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
46968 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
46969 ix86_simd_clone_compute_vecsize_and_simdlen
46970
46971 #undef TARGET_SIMD_CLONE_ADJUST
46972 #define TARGET_SIMD_CLONE_ADJUST \
46973 ix86_simd_clone_adjust
46974
46975 #undef TARGET_SIMD_CLONE_USABLE
46976 #define TARGET_SIMD_CLONE_USABLE \
46977 ix86_simd_clone_usable
46978
46979 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
46980 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
46981 ix86_float_exceptions_rounding_supported_p
46982
46983 struct gcc_target targetm = TARGET_INITIALIZER;
46984 \f
46985 #include "gt-i386.h"