re PR target/60205 (No ABI warning for AVX-512)
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2014 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "stringpool.h"
27 #include "attribs.h"
28 #include "calls.h"
29 #include "stor-layout.h"
30 #include "varasm.h"
31 #include "tm_p.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-codes.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "except.h"
41 #include "function.h"
42 #include "recog.h"
43 #include "expr.h"
44 #include "optabs.h"
45 #include "diagnostic-core.h"
46 #include "toplev.h"
47 #include "basic-block.h"
48 #include "ggc.h"
49 #include "target.h"
50 #include "target-def.h"
51 #include "common/common-target.h"
52 #include "langhooks.h"
53 #include "reload.h"
54 #include "cgraph.h"
55 #include "pointer-set.h"
56 #include "hash-table.h"
57 #include "vec.h"
58 #include "basic-block.h"
59 #include "tree-ssa-alias.h"
60 #include "internal-fn.h"
61 #include "gimple-fold.h"
62 #include "tree-eh.h"
63 #include "gimple-expr.h"
64 #include "is-a.h"
65 #include "gimple.h"
66 #include "gimplify.h"
67 #include "cfgloop.h"
68 #include "dwarf2.h"
69 #include "df.h"
70 #include "tm-constrs.h"
71 #include "params.h"
72 #include "cselib.h"
73 #include "debug.h"
74 #include "sched-int.h"
75 #include "sbitmap.h"
76 #include "fibheap.h"
77 #include "opts.h"
78 #include "diagnostic.h"
79 #include "dumpfile.h"
80 #include "tree-pass.h"
81 #include "context.h"
82 #include "pass_manager.h"
83 #include "target-globals.h"
84
85 static rtx legitimize_dllimport_symbol (rtx, bool);
86 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
87 static rtx legitimize_pe_coff_symbol (rtx, bool);
88
89 #ifndef CHECK_STACK_LIMIT
90 #define CHECK_STACK_LIMIT (-1)
91 #endif
92
93 /* Return index of given mode in mult and division cost tables. */
94 #define MODE_INDEX(mode) \
95 ((mode) == QImode ? 0 \
96 : (mode) == HImode ? 1 \
97 : (mode) == SImode ? 2 \
98 : (mode) == DImode ? 3 \
99 : 4)
100
101 /* Processor costs (relative to an add) */
102 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
103 #define COSTS_N_BYTES(N) ((N) * 2)
104
105 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
106
107 static stringop_algs ix86_size_memcpy[2] = {
108 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
109 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
110 static stringop_algs ix86_size_memset[2] = {
111 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
112 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
113
114 const
115 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
116 COSTS_N_BYTES (2), /* cost of an add instruction */
117 COSTS_N_BYTES (3), /* cost of a lea instruction */
118 COSTS_N_BYTES (2), /* variable shift costs */
119 COSTS_N_BYTES (3), /* constant shift costs */
120 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
121 COSTS_N_BYTES (3), /* HI */
122 COSTS_N_BYTES (3), /* SI */
123 COSTS_N_BYTES (3), /* DI */
124 COSTS_N_BYTES (5)}, /* other */
125 0, /* cost of multiply per each bit set */
126 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
127 COSTS_N_BYTES (3), /* HI */
128 COSTS_N_BYTES (3), /* SI */
129 COSTS_N_BYTES (3), /* DI */
130 COSTS_N_BYTES (5)}, /* other */
131 COSTS_N_BYTES (3), /* cost of movsx */
132 COSTS_N_BYTES (3), /* cost of movzx */
133 0, /* "large" insn */
134 2, /* MOVE_RATIO */
135 2, /* cost for loading QImode using movzbl */
136 {2, 2, 2}, /* cost of loading integer registers
137 in QImode, HImode and SImode.
138 Relative to reg-reg move (2). */
139 {2, 2, 2}, /* cost of storing integer registers */
140 2, /* cost of reg,reg fld/fst */
141 {2, 2, 2}, /* cost of loading fp registers
142 in SFmode, DFmode and XFmode */
143 {2, 2, 2}, /* cost of storing fp registers
144 in SFmode, DFmode and XFmode */
145 3, /* cost of moving MMX register */
146 {3, 3}, /* cost of loading MMX registers
147 in SImode and DImode */
148 {3, 3}, /* cost of storing MMX registers
149 in SImode and DImode */
150 3, /* cost of moving SSE register */
151 {3, 3, 3}, /* cost of loading SSE registers
152 in SImode, DImode and TImode */
153 {3, 3, 3}, /* cost of storing SSE registers
154 in SImode, DImode and TImode */
155 3, /* MMX or SSE register to integer */
156 0, /* size of l1 cache */
157 0, /* size of l2 cache */
158 0, /* size of prefetch block */
159 0, /* number of parallel prefetches */
160 2, /* Branch cost */
161 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
162 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
163 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
164 COSTS_N_BYTES (2), /* cost of FABS instruction. */
165 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
166 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
167 ix86_size_memcpy,
168 ix86_size_memset,
169 1, /* scalar_stmt_cost. */
170 1, /* scalar load_cost. */
171 1, /* scalar_store_cost. */
172 1, /* vec_stmt_cost. */
173 1, /* vec_to_scalar_cost. */
174 1, /* scalar_to_vec_cost. */
175 1, /* vec_align_load_cost. */
176 1, /* vec_unalign_load_cost. */
177 1, /* vec_store_cost. */
178 1, /* cond_taken_branch_cost. */
179 1, /* cond_not_taken_branch_cost. */
180 };
181
182 /* Processor costs (relative to an add) */
183 static stringop_algs i386_memcpy[2] = {
184 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
185 DUMMY_STRINGOP_ALGS};
186 static stringop_algs i386_memset[2] = {
187 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
188 DUMMY_STRINGOP_ALGS};
189
190 static const
191 struct processor_costs i386_cost = { /* 386 specific costs */
192 COSTS_N_INSNS (1), /* cost of an add instruction */
193 COSTS_N_INSNS (1), /* cost of a lea instruction */
194 COSTS_N_INSNS (3), /* variable shift costs */
195 COSTS_N_INSNS (2), /* constant shift costs */
196 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
197 COSTS_N_INSNS (6), /* HI */
198 COSTS_N_INSNS (6), /* SI */
199 COSTS_N_INSNS (6), /* DI */
200 COSTS_N_INSNS (6)}, /* other */
201 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
202 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
203 COSTS_N_INSNS (23), /* HI */
204 COSTS_N_INSNS (23), /* SI */
205 COSTS_N_INSNS (23), /* DI */
206 COSTS_N_INSNS (23)}, /* other */
207 COSTS_N_INSNS (3), /* cost of movsx */
208 COSTS_N_INSNS (2), /* cost of movzx */
209 15, /* "large" insn */
210 3, /* MOVE_RATIO */
211 4, /* cost for loading QImode using movzbl */
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
214 Relative to reg-reg move (2). */
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {8, 8, 8}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
219 {8, 8, 8}, /* cost of storing fp registers
220 in SFmode, DFmode and XFmode */
221 2, /* cost of moving MMX register */
222 {4, 8}, /* cost of loading MMX registers
223 in SImode and DImode */
224 {4, 8}, /* cost of storing MMX registers
225 in SImode and DImode */
226 2, /* cost of moving SSE register */
227 {4, 8, 16}, /* cost of loading SSE registers
228 in SImode, DImode and TImode */
229 {4, 8, 16}, /* cost of storing SSE registers
230 in SImode, DImode and TImode */
231 3, /* MMX or SSE register to integer */
232 0, /* size of l1 cache */
233 0, /* size of l2 cache */
234 0, /* size of prefetch block */
235 0, /* number of parallel prefetches */
236 1, /* Branch cost */
237 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
238 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
239 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
240 COSTS_N_INSNS (22), /* cost of FABS instruction. */
241 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
242 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
243 i386_memcpy,
244 i386_memset,
245 1, /* scalar_stmt_cost. */
246 1, /* scalar load_cost. */
247 1, /* scalar_store_cost. */
248 1, /* vec_stmt_cost. */
249 1, /* vec_to_scalar_cost. */
250 1, /* scalar_to_vec_cost. */
251 1, /* vec_align_load_cost. */
252 2, /* vec_unalign_load_cost. */
253 1, /* vec_store_cost. */
254 3, /* cond_taken_branch_cost. */
255 1, /* cond_not_taken_branch_cost. */
256 };
257
258 static stringop_algs i486_memcpy[2] = {
259 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
260 DUMMY_STRINGOP_ALGS};
261 static stringop_algs i486_memset[2] = {
262 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
263 DUMMY_STRINGOP_ALGS};
264
265 static const
266 struct processor_costs i486_cost = { /* 486 specific costs */
267 COSTS_N_INSNS (1), /* cost of an add instruction */
268 COSTS_N_INSNS (1), /* cost of a lea instruction */
269 COSTS_N_INSNS (3), /* variable shift costs */
270 COSTS_N_INSNS (2), /* constant shift costs */
271 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
272 COSTS_N_INSNS (12), /* HI */
273 COSTS_N_INSNS (12), /* SI */
274 COSTS_N_INSNS (12), /* DI */
275 COSTS_N_INSNS (12)}, /* other */
276 1, /* cost of multiply per each bit set */
277 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
278 COSTS_N_INSNS (40), /* HI */
279 COSTS_N_INSNS (40), /* SI */
280 COSTS_N_INSNS (40), /* DI */
281 COSTS_N_INSNS (40)}, /* other */
282 COSTS_N_INSNS (3), /* cost of movsx */
283 COSTS_N_INSNS (2), /* cost of movzx */
284 15, /* "large" insn */
285 3, /* MOVE_RATIO */
286 4, /* cost for loading QImode using movzbl */
287 {2, 4, 2}, /* cost of loading integer registers
288 in QImode, HImode and SImode.
289 Relative to reg-reg move (2). */
290 {2, 4, 2}, /* cost of storing integer registers */
291 2, /* cost of reg,reg fld/fst */
292 {8, 8, 8}, /* cost of loading fp registers
293 in SFmode, DFmode and XFmode */
294 {8, 8, 8}, /* cost of storing fp registers
295 in SFmode, DFmode and XFmode */
296 2, /* cost of moving MMX register */
297 {4, 8}, /* cost of loading MMX registers
298 in SImode and DImode */
299 {4, 8}, /* cost of storing MMX registers
300 in SImode and DImode */
301 2, /* cost of moving SSE register */
302 {4, 8, 16}, /* cost of loading SSE registers
303 in SImode, DImode and TImode */
304 {4, 8, 16}, /* cost of storing SSE registers
305 in SImode, DImode and TImode */
306 3, /* MMX or SSE register to integer */
307 4, /* size of l1 cache. 486 has 8kB cache
308 shared for code and data, so 4kB is
309 not really precise. */
310 4, /* size of l2 cache */
311 0, /* size of prefetch block */
312 0, /* number of parallel prefetches */
313 1, /* Branch cost */
314 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
315 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
316 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
317 COSTS_N_INSNS (3), /* cost of FABS instruction. */
318 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
319 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
320 i486_memcpy,
321 i486_memset,
322 1, /* scalar_stmt_cost. */
323 1, /* scalar load_cost. */
324 1, /* scalar_store_cost. */
325 1, /* vec_stmt_cost. */
326 1, /* vec_to_scalar_cost. */
327 1, /* scalar_to_vec_cost. */
328 1, /* vec_align_load_cost. */
329 2, /* vec_unalign_load_cost. */
330 1, /* vec_store_cost. */
331 3, /* cond_taken_branch_cost. */
332 1, /* cond_not_taken_branch_cost. */
333 };
334
335 static stringop_algs pentium_memcpy[2] = {
336 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
337 DUMMY_STRINGOP_ALGS};
338 static stringop_algs pentium_memset[2] = {
339 {libcall, {{-1, rep_prefix_4_byte, false}}},
340 DUMMY_STRINGOP_ALGS};
341
342 static const
343 struct processor_costs pentium_cost = {
344 COSTS_N_INSNS (1), /* cost of an add instruction */
345 COSTS_N_INSNS (1), /* cost of a lea instruction */
346 COSTS_N_INSNS (4), /* variable shift costs */
347 COSTS_N_INSNS (1), /* constant shift costs */
348 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
349 COSTS_N_INSNS (11), /* HI */
350 COSTS_N_INSNS (11), /* SI */
351 COSTS_N_INSNS (11), /* DI */
352 COSTS_N_INSNS (11)}, /* other */
353 0, /* cost of multiply per each bit set */
354 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
355 COSTS_N_INSNS (25), /* HI */
356 COSTS_N_INSNS (25), /* SI */
357 COSTS_N_INSNS (25), /* DI */
358 COSTS_N_INSNS (25)}, /* other */
359 COSTS_N_INSNS (3), /* cost of movsx */
360 COSTS_N_INSNS (2), /* cost of movzx */
361 8, /* "large" insn */
362 6, /* MOVE_RATIO */
363 6, /* cost for loading QImode using movzbl */
364 {2, 4, 2}, /* cost of loading integer registers
365 in QImode, HImode and SImode.
366 Relative to reg-reg move (2). */
367 {2, 4, 2}, /* cost of storing integer registers */
368 2, /* cost of reg,reg fld/fst */
369 {2, 2, 6}, /* cost of loading fp registers
370 in SFmode, DFmode and XFmode */
371 {4, 4, 6}, /* cost of storing fp registers
372 in SFmode, DFmode and XFmode */
373 8, /* cost of moving MMX register */
374 {8, 8}, /* cost of loading MMX registers
375 in SImode and DImode */
376 {8, 8}, /* cost of storing MMX registers
377 in SImode and DImode */
378 2, /* cost of moving SSE register */
379 {4, 8, 16}, /* cost of loading SSE registers
380 in SImode, DImode and TImode */
381 {4, 8, 16}, /* cost of storing SSE registers
382 in SImode, DImode and TImode */
383 3, /* MMX or SSE register to integer */
384 8, /* size of l1 cache. */
385 8, /* size of l2 cache */
386 0, /* size of prefetch block */
387 0, /* number of parallel prefetches */
388 2, /* Branch cost */
389 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
390 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
391 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
392 COSTS_N_INSNS (1), /* cost of FABS instruction. */
393 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
394 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
395 pentium_memcpy,
396 pentium_memset,
397 1, /* scalar_stmt_cost. */
398 1, /* scalar load_cost. */
399 1, /* scalar_store_cost. */
400 1, /* vec_stmt_cost. */
401 1, /* vec_to_scalar_cost. */
402 1, /* scalar_to_vec_cost. */
403 1, /* vec_align_load_cost. */
404 2, /* vec_unalign_load_cost. */
405 1, /* vec_store_cost. */
406 3, /* cond_taken_branch_cost. */
407 1, /* cond_not_taken_branch_cost. */
408 };
409
410 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
411 (we ensure the alignment). For small blocks inline loop is still a
412 noticeable win, for bigger blocks either rep movsl or rep movsb is
413 way to go. Rep movsb has apparently more expensive startup time in CPU,
414 but after 4K the difference is down in the noise. */
415 static stringop_algs pentiumpro_memcpy[2] = {
416 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
417 {8192, rep_prefix_4_byte, false},
418 {-1, rep_prefix_1_byte, false}}},
419 DUMMY_STRINGOP_ALGS};
420 static stringop_algs pentiumpro_memset[2] = {
421 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
422 {8192, rep_prefix_4_byte, false},
423 {-1, libcall, false}}},
424 DUMMY_STRINGOP_ALGS};
425 static const
426 struct processor_costs pentiumpro_cost = {
427 COSTS_N_INSNS (1), /* cost of an add instruction */
428 COSTS_N_INSNS (1), /* cost of a lea instruction */
429 COSTS_N_INSNS (1), /* variable shift costs */
430 COSTS_N_INSNS (1), /* constant shift costs */
431 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
432 COSTS_N_INSNS (4), /* HI */
433 COSTS_N_INSNS (4), /* SI */
434 COSTS_N_INSNS (4), /* DI */
435 COSTS_N_INSNS (4)}, /* other */
436 0, /* cost of multiply per each bit set */
437 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
438 COSTS_N_INSNS (17), /* HI */
439 COSTS_N_INSNS (17), /* SI */
440 COSTS_N_INSNS (17), /* DI */
441 COSTS_N_INSNS (17)}, /* other */
442 COSTS_N_INSNS (1), /* cost of movsx */
443 COSTS_N_INSNS (1), /* cost of movzx */
444 8, /* "large" insn */
445 6, /* MOVE_RATIO */
446 2, /* cost for loading QImode using movzbl */
447 {4, 4, 4}, /* cost of loading integer registers
448 in QImode, HImode and SImode.
449 Relative to reg-reg move (2). */
450 {2, 2, 2}, /* cost of storing integer registers */
451 2, /* cost of reg,reg fld/fst */
452 {2, 2, 6}, /* cost of loading fp registers
453 in SFmode, DFmode and XFmode */
454 {4, 4, 6}, /* cost of storing fp registers
455 in SFmode, DFmode and XFmode */
456 2, /* cost of moving MMX register */
457 {2, 2}, /* cost of loading MMX registers
458 in SImode and DImode */
459 {2, 2}, /* cost of storing MMX registers
460 in SImode and DImode */
461 2, /* cost of moving SSE register */
462 {2, 2, 8}, /* cost of loading SSE registers
463 in SImode, DImode and TImode */
464 {2, 2, 8}, /* cost of storing SSE registers
465 in SImode, DImode and TImode */
466 3, /* MMX or SSE register to integer */
467 8, /* size of l1 cache. */
468 256, /* size of l2 cache */
469 32, /* size of prefetch block */
470 6, /* number of parallel prefetches */
471 2, /* Branch cost */
472 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
473 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
474 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
475 COSTS_N_INSNS (2), /* cost of FABS instruction. */
476 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
477 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
478 pentiumpro_memcpy,
479 pentiumpro_memset,
480 1, /* scalar_stmt_cost. */
481 1, /* scalar load_cost. */
482 1, /* scalar_store_cost. */
483 1, /* vec_stmt_cost. */
484 1, /* vec_to_scalar_cost. */
485 1, /* scalar_to_vec_cost. */
486 1, /* vec_align_load_cost. */
487 2, /* vec_unalign_load_cost. */
488 1, /* vec_store_cost. */
489 3, /* cond_taken_branch_cost. */
490 1, /* cond_not_taken_branch_cost. */
491 };
492
493 static stringop_algs geode_memcpy[2] = {
494 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
495 DUMMY_STRINGOP_ALGS};
496 static stringop_algs geode_memset[2] = {
497 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
498 DUMMY_STRINGOP_ALGS};
499 static const
500 struct processor_costs geode_cost = {
501 COSTS_N_INSNS (1), /* cost of an add instruction */
502 COSTS_N_INSNS (1), /* cost of a lea instruction */
503 COSTS_N_INSNS (2), /* variable shift costs */
504 COSTS_N_INSNS (1), /* constant shift costs */
505 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
506 COSTS_N_INSNS (4), /* HI */
507 COSTS_N_INSNS (7), /* SI */
508 COSTS_N_INSNS (7), /* DI */
509 COSTS_N_INSNS (7)}, /* other */
510 0, /* cost of multiply per each bit set */
511 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
512 COSTS_N_INSNS (23), /* HI */
513 COSTS_N_INSNS (39), /* SI */
514 COSTS_N_INSNS (39), /* DI */
515 COSTS_N_INSNS (39)}, /* other */
516 COSTS_N_INSNS (1), /* cost of movsx */
517 COSTS_N_INSNS (1), /* cost of movzx */
518 8, /* "large" insn */
519 4, /* MOVE_RATIO */
520 1, /* cost for loading QImode using movzbl */
521 {1, 1, 1}, /* cost of loading integer registers
522 in QImode, HImode and SImode.
523 Relative to reg-reg move (2). */
524 {1, 1, 1}, /* cost of storing integer registers */
525 1, /* cost of reg,reg fld/fst */
526 {1, 1, 1}, /* cost of loading fp registers
527 in SFmode, DFmode and XFmode */
528 {4, 6, 6}, /* cost of storing fp registers
529 in SFmode, DFmode and XFmode */
530
531 1, /* cost of moving MMX register */
532 {1, 1}, /* cost of loading MMX registers
533 in SImode and DImode */
534 {1, 1}, /* cost of storing MMX registers
535 in SImode and DImode */
536 1, /* cost of moving SSE register */
537 {1, 1, 1}, /* cost of loading SSE registers
538 in SImode, DImode and TImode */
539 {1, 1, 1}, /* cost of storing SSE registers
540 in SImode, DImode and TImode */
541 1, /* MMX or SSE register to integer */
542 64, /* size of l1 cache. */
543 128, /* size of l2 cache. */
544 32, /* size of prefetch block */
545 1, /* number of parallel prefetches */
546 1, /* Branch cost */
547 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
548 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
549 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
550 COSTS_N_INSNS (1), /* cost of FABS instruction. */
551 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
552 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
553 geode_memcpy,
554 geode_memset,
555 1, /* scalar_stmt_cost. */
556 1, /* scalar load_cost. */
557 1, /* scalar_store_cost. */
558 1, /* vec_stmt_cost. */
559 1, /* vec_to_scalar_cost. */
560 1, /* scalar_to_vec_cost. */
561 1, /* vec_align_load_cost. */
562 2, /* vec_unalign_load_cost. */
563 1, /* vec_store_cost. */
564 3, /* cond_taken_branch_cost. */
565 1, /* cond_not_taken_branch_cost. */
566 };
567
568 static stringop_algs k6_memcpy[2] = {
569 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
570 DUMMY_STRINGOP_ALGS};
571 static stringop_algs k6_memset[2] = {
572 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
573 DUMMY_STRINGOP_ALGS};
574 static const
575 struct processor_costs k6_cost = {
576 COSTS_N_INSNS (1), /* cost of an add instruction */
577 COSTS_N_INSNS (2), /* cost of a lea instruction */
578 COSTS_N_INSNS (1), /* variable shift costs */
579 COSTS_N_INSNS (1), /* constant shift costs */
580 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
581 COSTS_N_INSNS (3), /* HI */
582 COSTS_N_INSNS (3), /* SI */
583 COSTS_N_INSNS (3), /* DI */
584 COSTS_N_INSNS (3)}, /* other */
585 0, /* cost of multiply per each bit set */
586 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
587 COSTS_N_INSNS (18), /* HI */
588 COSTS_N_INSNS (18), /* SI */
589 COSTS_N_INSNS (18), /* DI */
590 COSTS_N_INSNS (18)}, /* other */
591 COSTS_N_INSNS (2), /* cost of movsx */
592 COSTS_N_INSNS (2), /* cost of movzx */
593 8, /* "large" insn */
594 4, /* MOVE_RATIO */
595 3, /* cost for loading QImode using movzbl */
596 {4, 5, 4}, /* cost of loading integer registers
597 in QImode, HImode and SImode.
598 Relative to reg-reg move (2). */
599 {2, 3, 2}, /* cost of storing integer registers */
600 4, /* cost of reg,reg fld/fst */
601 {6, 6, 6}, /* cost of loading fp registers
602 in SFmode, DFmode and XFmode */
603 {4, 4, 4}, /* cost of storing fp registers
604 in SFmode, DFmode and XFmode */
605 2, /* cost of moving MMX register */
606 {2, 2}, /* cost of loading MMX registers
607 in SImode and DImode */
608 {2, 2}, /* cost of storing MMX registers
609 in SImode and DImode */
610 2, /* cost of moving SSE register */
611 {2, 2, 8}, /* cost of loading SSE registers
612 in SImode, DImode and TImode */
613 {2, 2, 8}, /* cost of storing SSE registers
614 in SImode, DImode and TImode */
615 6, /* MMX or SSE register to integer */
616 32, /* size of l1 cache. */
617 32, /* size of l2 cache. Some models
618 have integrated l2 cache, but
619 optimizing for k6 is not important
620 enough to worry about that. */
621 32, /* size of prefetch block */
622 1, /* number of parallel prefetches */
623 1, /* Branch cost */
624 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
625 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
626 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
627 COSTS_N_INSNS (2), /* cost of FABS instruction. */
628 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
629 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
630 k6_memcpy,
631 k6_memset,
632 1, /* scalar_stmt_cost. */
633 1, /* scalar load_cost. */
634 1, /* scalar_store_cost. */
635 1, /* vec_stmt_cost. */
636 1, /* vec_to_scalar_cost. */
637 1, /* scalar_to_vec_cost. */
638 1, /* vec_align_load_cost. */
639 2, /* vec_unalign_load_cost. */
640 1, /* vec_store_cost. */
641 3, /* cond_taken_branch_cost. */
642 1, /* cond_not_taken_branch_cost. */
643 };
644
645 /* For some reason, Athlon deals better with REP prefix (relative to loops)
646 compared to K8. Alignment becomes important after 8 bytes for memcpy and
647 128 bytes for memset. */
648 static stringop_algs athlon_memcpy[2] = {
649 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
650 DUMMY_STRINGOP_ALGS};
651 static stringop_algs athlon_memset[2] = {
652 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
653 DUMMY_STRINGOP_ALGS};
654 static const
655 struct processor_costs athlon_cost = {
656 COSTS_N_INSNS (1), /* cost of an add instruction */
657 COSTS_N_INSNS (2), /* cost of a lea instruction */
658 COSTS_N_INSNS (1), /* variable shift costs */
659 COSTS_N_INSNS (1), /* constant shift costs */
660 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
661 COSTS_N_INSNS (5), /* HI */
662 COSTS_N_INSNS (5), /* SI */
663 COSTS_N_INSNS (5), /* DI */
664 COSTS_N_INSNS (5)}, /* other */
665 0, /* cost of multiply per each bit set */
666 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
667 COSTS_N_INSNS (26), /* HI */
668 COSTS_N_INSNS (42), /* SI */
669 COSTS_N_INSNS (74), /* DI */
670 COSTS_N_INSNS (74)}, /* other */
671 COSTS_N_INSNS (1), /* cost of movsx */
672 COSTS_N_INSNS (1), /* cost of movzx */
673 8, /* "large" insn */
674 9, /* MOVE_RATIO */
675 4, /* cost for loading QImode using movzbl */
676 {3, 4, 3}, /* cost of loading integer registers
677 in QImode, HImode and SImode.
678 Relative to reg-reg move (2). */
679 {3, 4, 3}, /* cost of storing integer registers */
680 4, /* cost of reg,reg fld/fst */
681 {4, 4, 12}, /* cost of loading fp registers
682 in SFmode, DFmode and XFmode */
683 {6, 6, 8}, /* cost of storing fp registers
684 in SFmode, DFmode and XFmode */
685 2, /* cost of moving MMX register */
686 {4, 4}, /* cost of loading MMX registers
687 in SImode and DImode */
688 {4, 4}, /* cost of storing MMX registers
689 in SImode and DImode */
690 2, /* cost of moving SSE register */
691 {4, 4, 6}, /* cost of loading SSE registers
692 in SImode, DImode and TImode */
693 {4, 4, 5}, /* cost of storing SSE registers
694 in SImode, DImode and TImode */
695 5, /* MMX or SSE register to integer */
696 64, /* size of l1 cache. */
697 256, /* size of l2 cache. */
698 64, /* size of prefetch block */
699 6, /* number of parallel prefetches */
700 5, /* Branch cost */
701 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
702 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
703 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
704 COSTS_N_INSNS (2), /* cost of FABS instruction. */
705 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
706 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
707 athlon_memcpy,
708 athlon_memset,
709 1, /* scalar_stmt_cost. */
710 1, /* scalar load_cost. */
711 1, /* scalar_store_cost. */
712 1, /* vec_stmt_cost. */
713 1, /* vec_to_scalar_cost. */
714 1, /* scalar_to_vec_cost. */
715 1, /* vec_align_load_cost. */
716 2, /* vec_unalign_load_cost. */
717 1, /* vec_store_cost. */
718 3, /* cond_taken_branch_cost. */
719 1, /* cond_not_taken_branch_cost. */
720 };
721
722 /* K8 has optimized REP instruction for medium sized blocks, but for very
723 small blocks it is better to use loop. For large blocks, libcall can
724 do nontemporary accesses and beat inline considerably. */
725 static stringop_algs k8_memcpy[2] = {
726 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
727 {-1, rep_prefix_4_byte, false}}},
728 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
729 {-1, libcall, false}}}};
730 static stringop_algs k8_memset[2] = {
731 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
732 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
733 {libcall, {{48, unrolled_loop, false},
734 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
735 static const
736 struct processor_costs k8_cost = {
737 COSTS_N_INSNS (1), /* cost of an add instruction */
738 COSTS_N_INSNS (2), /* cost of a lea instruction */
739 COSTS_N_INSNS (1), /* variable shift costs */
740 COSTS_N_INSNS (1), /* constant shift costs */
741 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
742 COSTS_N_INSNS (4), /* HI */
743 COSTS_N_INSNS (3), /* SI */
744 COSTS_N_INSNS (4), /* DI */
745 COSTS_N_INSNS (5)}, /* other */
746 0, /* cost of multiply per each bit set */
747 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
748 COSTS_N_INSNS (26), /* HI */
749 COSTS_N_INSNS (42), /* SI */
750 COSTS_N_INSNS (74), /* DI */
751 COSTS_N_INSNS (74)}, /* other */
752 COSTS_N_INSNS (1), /* cost of movsx */
753 COSTS_N_INSNS (1), /* cost of movzx */
754 8, /* "large" insn */
755 9, /* MOVE_RATIO */
756 4, /* cost for loading QImode using movzbl */
757 {3, 4, 3}, /* cost of loading integer registers
758 in QImode, HImode and SImode.
759 Relative to reg-reg move (2). */
760 {3, 4, 3}, /* cost of storing integer registers */
761 4, /* cost of reg,reg fld/fst */
762 {4, 4, 12}, /* cost of loading fp registers
763 in SFmode, DFmode and XFmode */
764 {6, 6, 8}, /* cost of storing fp registers
765 in SFmode, DFmode and XFmode */
766 2, /* cost of moving MMX register */
767 {3, 3}, /* cost of loading MMX registers
768 in SImode and DImode */
769 {4, 4}, /* cost of storing MMX registers
770 in SImode and DImode */
771 2, /* cost of moving SSE register */
772 {4, 3, 6}, /* cost of loading SSE registers
773 in SImode, DImode and TImode */
774 {4, 4, 5}, /* cost of storing SSE registers
775 in SImode, DImode and TImode */
776 5, /* MMX or SSE register to integer */
777 64, /* size of l1 cache. */
778 512, /* size of l2 cache. */
779 64, /* size of prefetch block */
780 /* New AMD processors never drop prefetches; if they cannot be performed
781 immediately, they are queued. We set number of simultaneous prefetches
782 to a large constant to reflect this (it probably is not a good idea not
783 to limit number of prefetches at all, as their execution also takes some
784 time). */
785 100, /* number of parallel prefetches */
786 3, /* Branch cost */
787 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
788 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
789 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
790 COSTS_N_INSNS (2), /* cost of FABS instruction. */
791 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
792 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
793
794 k8_memcpy,
795 k8_memset,
796 4, /* scalar_stmt_cost. */
797 2, /* scalar load_cost. */
798 2, /* scalar_store_cost. */
799 5, /* vec_stmt_cost. */
800 0, /* vec_to_scalar_cost. */
801 2, /* scalar_to_vec_cost. */
802 2, /* vec_align_load_cost. */
803 3, /* vec_unalign_load_cost. */
804 3, /* vec_store_cost. */
805 3, /* cond_taken_branch_cost. */
806 2, /* cond_not_taken_branch_cost. */
807 };
808
809 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
810 very small blocks it is better to use loop. For large blocks, libcall can
811 do nontemporary accesses and beat inline considerably. */
812 static stringop_algs amdfam10_memcpy[2] = {
813 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
814 {-1, rep_prefix_4_byte, false}}},
815 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
816 {-1, libcall, false}}}};
817 static stringop_algs amdfam10_memset[2] = {
818 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
819 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
820 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
821 {-1, libcall, false}}}};
822 struct processor_costs amdfam10_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (2), /* cost of a lea instruction */
825 COSTS_N_INSNS (1), /* variable shift costs */
826 COSTS_N_INSNS (1), /* constant shift costs */
827 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (4), /* HI */
829 COSTS_N_INSNS (3), /* SI */
830 COSTS_N_INSNS (4), /* DI */
831 COSTS_N_INSNS (5)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (35), /* HI */
835 COSTS_N_INSNS (51), /* SI */
836 COSTS_N_INSNS (83), /* DI */
837 COSTS_N_INSNS (83)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 8, /* "large" insn */
841 9, /* MOVE_RATIO */
842 4, /* cost for loading QImode using movzbl */
843 {3, 4, 3}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {3, 4, 3}, /* cost of storing integer registers */
847 4, /* cost of reg,reg fld/fst */
848 {4, 4, 12}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {6, 6, 8}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {3, 3}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {4, 4}, /* cost of storing MMX registers
856 in SImode and DImode */
857 2, /* cost of moving SSE register */
858 {4, 4, 3}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {4, 4, 5}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 3, /* MMX or SSE register to integer */
863 /* On K8:
864 MOVD reg64, xmmreg Double FSTORE 4
865 MOVD reg32, xmmreg Double FSTORE 4
866 On AMDFAM10:
867 MOVD reg64, xmmreg Double FADD 3
868 1/1 1/1
869 MOVD reg32, xmmreg Double FADD 3
870 1/1 1/1 */
871 64, /* size of l1 cache. */
872 512, /* size of l2 cache. */
873 64, /* size of prefetch block */
874 /* New AMD processors never drop prefetches; if they cannot be performed
875 immediately, they are queued. We set number of simultaneous prefetches
876 to a large constant to reflect this (it probably is not a good idea not
877 to limit number of prefetches at all, as their execution also takes some
878 time). */
879 100, /* number of parallel prefetches */
880 2, /* Branch cost */
881 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
882 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
883 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
884 COSTS_N_INSNS (2), /* cost of FABS instruction. */
885 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
886 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
887
888 amdfam10_memcpy,
889 amdfam10_memset,
890 4, /* scalar_stmt_cost. */
891 2, /* scalar load_cost. */
892 2, /* scalar_store_cost. */
893 6, /* vec_stmt_cost. */
894 0, /* vec_to_scalar_cost. */
895 2, /* scalar_to_vec_cost. */
896 2, /* vec_align_load_cost. */
897 2, /* vec_unalign_load_cost. */
898 2, /* vec_store_cost. */
899 2, /* cond_taken_branch_cost. */
900 1, /* cond_not_taken_branch_cost. */
901 };
902
903 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
904 very small blocks it is better to use loop. For large blocks, libcall
905 can do nontemporary accesses and beat inline considerably. */
906 static stringop_algs bdver1_memcpy[2] = {
907 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
908 {-1, rep_prefix_4_byte, false}}},
909 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
910 {-1, libcall, false}}}};
911 static stringop_algs bdver1_memset[2] = {
912 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
913 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
914 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
915 {-1, libcall, false}}}};
916
917 const struct processor_costs bdver1_cost = {
918 COSTS_N_INSNS (1), /* cost of an add instruction */
919 COSTS_N_INSNS (1), /* cost of a lea instruction */
920 COSTS_N_INSNS (1), /* variable shift costs */
921 COSTS_N_INSNS (1), /* constant shift costs */
922 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
923 COSTS_N_INSNS (4), /* HI */
924 COSTS_N_INSNS (4), /* SI */
925 COSTS_N_INSNS (6), /* DI */
926 COSTS_N_INSNS (6)}, /* other */
927 0, /* cost of multiply per each bit set */
928 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
929 COSTS_N_INSNS (35), /* HI */
930 COSTS_N_INSNS (51), /* SI */
931 COSTS_N_INSNS (83), /* DI */
932 COSTS_N_INSNS (83)}, /* other */
933 COSTS_N_INSNS (1), /* cost of movsx */
934 COSTS_N_INSNS (1), /* cost of movzx */
935 8, /* "large" insn */
936 9, /* MOVE_RATIO */
937 4, /* cost for loading QImode using movzbl */
938 {5, 5, 4}, /* cost of loading integer registers
939 in QImode, HImode and SImode.
940 Relative to reg-reg move (2). */
941 {4, 4, 4}, /* cost of storing integer registers */
942 2, /* cost of reg,reg fld/fst */
943 {5, 5, 12}, /* cost of loading fp registers
944 in SFmode, DFmode and XFmode */
945 {4, 4, 8}, /* cost of storing fp registers
946 in SFmode, DFmode and XFmode */
947 2, /* cost of moving MMX register */
948 {4, 4}, /* cost of loading MMX registers
949 in SImode and DImode */
950 {4, 4}, /* cost of storing MMX registers
951 in SImode and DImode */
952 2, /* cost of moving SSE register */
953 {4, 4, 4}, /* cost of loading SSE registers
954 in SImode, DImode and TImode */
955 {4, 4, 4}, /* cost of storing SSE registers
956 in SImode, DImode and TImode */
957 2, /* MMX or SSE register to integer */
958 /* On K8:
959 MOVD reg64, xmmreg Double FSTORE 4
960 MOVD reg32, xmmreg Double FSTORE 4
961 On AMDFAM10:
962 MOVD reg64, xmmreg Double FADD 3
963 1/1 1/1
964 MOVD reg32, xmmreg Double FADD 3
965 1/1 1/1 */
966 16, /* size of l1 cache. */
967 2048, /* size of l2 cache. */
968 64, /* size of prefetch block */
969 /* New AMD processors never drop prefetches; if they cannot be performed
970 immediately, they are queued. We set number of simultaneous prefetches
971 to a large constant to reflect this (it probably is not a good idea not
972 to limit number of prefetches at all, as their execution also takes some
973 time). */
974 100, /* number of parallel prefetches */
975 2, /* Branch cost */
976 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
977 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
978 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
979 COSTS_N_INSNS (2), /* cost of FABS instruction. */
980 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
981 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
982
983 bdver1_memcpy,
984 bdver1_memset,
985 6, /* scalar_stmt_cost. */
986 4, /* scalar load_cost. */
987 4, /* scalar_store_cost. */
988 6, /* vec_stmt_cost. */
989 0, /* vec_to_scalar_cost. */
990 2, /* scalar_to_vec_cost. */
991 4, /* vec_align_load_cost. */
992 4, /* vec_unalign_load_cost. */
993 4, /* vec_store_cost. */
994 2, /* cond_taken_branch_cost. */
995 1, /* cond_not_taken_branch_cost. */
996 };
997
998 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
999 very small blocks it is better to use loop. For large blocks, libcall
1000 can do nontemporary accesses and beat inline considerably. */
1001
1002 static stringop_algs bdver2_memcpy[2] = {
1003 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1004 {-1, rep_prefix_4_byte, false}}},
1005 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1006 {-1, libcall, false}}}};
1007 static stringop_algs bdver2_memset[2] = {
1008 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1009 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1010 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1011 {-1, libcall, false}}}};
1012
1013 const struct processor_costs bdver2_cost = {
1014 COSTS_N_INSNS (1), /* cost of an add instruction */
1015 COSTS_N_INSNS (1), /* cost of a lea instruction */
1016 COSTS_N_INSNS (1), /* variable shift costs */
1017 COSTS_N_INSNS (1), /* constant shift costs */
1018 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1019 COSTS_N_INSNS (4), /* HI */
1020 COSTS_N_INSNS (4), /* SI */
1021 COSTS_N_INSNS (6), /* DI */
1022 COSTS_N_INSNS (6)}, /* other */
1023 0, /* cost of multiply per each bit set */
1024 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1025 COSTS_N_INSNS (35), /* HI */
1026 COSTS_N_INSNS (51), /* SI */
1027 COSTS_N_INSNS (83), /* DI */
1028 COSTS_N_INSNS (83)}, /* other */
1029 COSTS_N_INSNS (1), /* cost of movsx */
1030 COSTS_N_INSNS (1), /* cost of movzx */
1031 8, /* "large" insn */
1032 9, /* MOVE_RATIO */
1033 4, /* cost for loading QImode using movzbl */
1034 {5, 5, 4}, /* cost of loading integer registers
1035 in QImode, HImode and SImode.
1036 Relative to reg-reg move (2). */
1037 {4, 4, 4}, /* cost of storing integer registers */
1038 2, /* cost of reg,reg fld/fst */
1039 {5, 5, 12}, /* cost of loading fp registers
1040 in SFmode, DFmode and XFmode */
1041 {4, 4, 8}, /* cost of storing fp registers
1042 in SFmode, DFmode and XFmode */
1043 2, /* cost of moving MMX register */
1044 {4, 4}, /* cost of loading MMX registers
1045 in SImode and DImode */
1046 {4, 4}, /* cost of storing MMX registers
1047 in SImode and DImode */
1048 2, /* cost of moving SSE register */
1049 {4, 4, 4}, /* cost of loading SSE registers
1050 in SImode, DImode and TImode */
1051 {4, 4, 4}, /* cost of storing SSE registers
1052 in SImode, DImode and TImode */
1053 2, /* MMX or SSE register to integer */
1054 /* On K8:
1055 MOVD reg64, xmmreg Double FSTORE 4
1056 MOVD reg32, xmmreg Double FSTORE 4
1057 On AMDFAM10:
1058 MOVD reg64, xmmreg Double FADD 3
1059 1/1 1/1
1060 MOVD reg32, xmmreg Double FADD 3
1061 1/1 1/1 */
1062 16, /* size of l1 cache. */
1063 2048, /* size of l2 cache. */
1064 64, /* size of prefetch block */
1065 /* New AMD processors never drop prefetches; if they cannot be performed
1066 immediately, they are queued. We set number of simultaneous prefetches
1067 to a large constant to reflect this (it probably is not a good idea not
1068 to limit number of prefetches at all, as their execution also takes some
1069 time). */
1070 100, /* number of parallel prefetches */
1071 2, /* Branch cost */
1072 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1073 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1074 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1075 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1076 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1077 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1078
1079 bdver2_memcpy,
1080 bdver2_memset,
1081 6, /* scalar_stmt_cost. */
1082 4, /* scalar load_cost. */
1083 4, /* scalar_store_cost. */
1084 6, /* vec_stmt_cost. */
1085 0, /* vec_to_scalar_cost. */
1086 2, /* scalar_to_vec_cost. */
1087 4, /* vec_align_load_cost. */
1088 4, /* vec_unalign_load_cost. */
1089 4, /* vec_store_cost. */
1090 2, /* cond_taken_branch_cost. */
1091 1, /* cond_not_taken_branch_cost. */
1092 };
1093
1094
1095 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1096 very small blocks it is better to use loop. For large blocks, libcall
1097 can do nontemporary accesses and beat inline considerably. */
1098 static stringop_algs bdver3_memcpy[2] = {
1099 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1100 {-1, rep_prefix_4_byte, false}}},
1101 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1102 {-1, libcall, false}}}};
1103 static stringop_algs bdver3_memset[2] = {
1104 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1105 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1106 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1107 {-1, libcall, false}}}};
1108 struct processor_costs bdver3_cost = {
1109 COSTS_N_INSNS (1), /* cost of an add instruction */
1110 COSTS_N_INSNS (1), /* cost of a lea instruction */
1111 COSTS_N_INSNS (1), /* variable shift costs */
1112 COSTS_N_INSNS (1), /* constant shift costs */
1113 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1114 COSTS_N_INSNS (4), /* HI */
1115 COSTS_N_INSNS (4), /* SI */
1116 COSTS_N_INSNS (6), /* DI */
1117 COSTS_N_INSNS (6)}, /* other */
1118 0, /* cost of multiply per each bit set */
1119 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1120 COSTS_N_INSNS (35), /* HI */
1121 COSTS_N_INSNS (51), /* SI */
1122 COSTS_N_INSNS (83), /* DI */
1123 COSTS_N_INSNS (83)}, /* other */
1124 COSTS_N_INSNS (1), /* cost of movsx */
1125 COSTS_N_INSNS (1), /* cost of movzx */
1126 8, /* "large" insn */
1127 9, /* MOVE_RATIO */
1128 4, /* cost for loading QImode using movzbl */
1129 {5, 5, 4}, /* cost of loading integer registers
1130 in QImode, HImode and SImode.
1131 Relative to reg-reg move (2). */
1132 {4, 4, 4}, /* cost of storing integer registers */
1133 2, /* cost of reg,reg fld/fst */
1134 {5, 5, 12}, /* cost of loading fp registers
1135 in SFmode, DFmode and XFmode */
1136 {4, 4, 8}, /* cost of storing fp registers
1137 in SFmode, DFmode and XFmode */
1138 2, /* cost of moving MMX register */
1139 {4, 4}, /* cost of loading MMX registers
1140 in SImode and DImode */
1141 {4, 4}, /* cost of storing MMX registers
1142 in SImode and DImode */
1143 2, /* cost of moving SSE register */
1144 {4, 4, 4}, /* cost of loading SSE registers
1145 in SImode, DImode and TImode */
1146 {4, 4, 4}, /* cost of storing SSE registers
1147 in SImode, DImode and TImode */
1148 2, /* MMX or SSE register to integer */
1149 16, /* size of l1 cache. */
1150 2048, /* size of l2 cache. */
1151 64, /* size of prefetch block */
1152 /* New AMD processors never drop prefetches; if they cannot be performed
1153 immediately, they are queued. We set number of simultaneous prefetches
1154 to a large constant to reflect this (it probably is not a good idea not
1155 to limit number of prefetches at all, as their execution also takes some
1156 time). */
1157 100, /* number of parallel prefetches */
1158 2, /* Branch cost */
1159 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1160 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1161 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1162 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1163 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1164 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1165
1166 bdver3_memcpy,
1167 bdver3_memset,
1168 6, /* scalar_stmt_cost. */
1169 4, /* scalar load_cost. */
1170 4, /* scalar_store_cost. */
1171 6, /* vec_stmt_cost. */
1172 0, /* vec_to_scalar_cost. */
1173 2, /* scalar_to_vec_cost. */
1174 4, /* vec_align_load_cost. */
1175 4, /* vec_unalign_load_cost. */
1176 4, /* vec_store_cost. */
1177 2, /* cond_taken_branch_cost. */
1178 1, /* cond_not_taken_branch_cost. */
1179 };
1180
1181 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1182 very small blocks it is better to use loop. For large blocks, libcall
1183 can do nontemporary accesses and beat inline considerably. */
1184 static stringop_algs bdver4_memcpy[2] = {
1185 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1186 {-1, rep_prefix_4_byte, false}}},
1187 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1188 {-1, libcall, false}}}};
1189 static stringop_algs bdver4_memset[2] = {
1190 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1191 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1192 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1193 {-1, libcall, false}}}};
1194 struct processor_costs bdver4_cost = {
1195 COSTS_N_INSNS (1), /* cost of an add instruction */
1196 COSTS_N_INSNS (1), /* cost of a lea instruction */
1197 COSTS_N_INSNS (1), /* variable shift costs */
1198 COSTS_N_INSNS (1), /* constant shift costs */
1199 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1200 COSTS_N_INSNS (4), /* HI */
1201 COSTS_N_INSNS (4), /* SI */
1202 COSTS_N_INSNS (6), /* DI */
1203 COSTS_N_INSNS (6)}, /* other */
1204 0, /* cost of multiply per each bit set */
1205 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1206 COSTS_N_INSNS (35), /* HI */
1207 COSTS_N_INSNS (51), /* SI */
1208 COSTS_N_INSNS (83), /* DI */
1209 COSTS_N_INSNS (83)}, /* other */
1210 COSTS_N_INSNS (1), /* cost of movsx */
1211 COSTS_N_INSNS (1), /* cost of movzx */
1212 8, /* "large" insn */
1213 9, /* MOVE_RATIO */
1214 4, /* cost for loading QImode using movzbl */
1215 {5, 5, 4}, /* cost of loading integer registers
1216 in QImode, HImode and SImode.
1217 Relative to reg-reg move (2). */
1218 {4, 4, 4}, /* cost of storing integer registers */
1219 2, /* cost of reg,reg fld/fst */
1220 {5, 5, 12}, /* cost of loading fp registers
1221 in SFmode, DFmode and XFmode */
1222 {4, 4, 8}, /* cost of storing fp registers
1223 in SFmode, DFmode and XFmode */
1224 2, /* cost of moving MMX register */
1225 {4, 4}, /* cost of loading MMX registers
1226 in SImode and DImode */
1227 {4, 4}, /* cost of storing MMX registers
1228 in SImode and DImode */
1229 2, /* cost of moving SSE register */
1230 {4, 4, 4}, /* cost of loading SSE registers
1231 in SImode, DImode and TImode */
1232 {4, 4, 4}, /* cost of storing SSE registers
1233 in SImode, DImode and TImode */
1234 2, /* MMX or SSE register to integer */
1235 16, /* size of l1 cache. */
1236 2048, /* size of l2 cache. */
1237 64, /* size of prefetch block */
1238 /* New AMD processors never drop prefetches; if they cannot be performed
1239 immediately, they are queued. We set number of simultaneous prefetches
1240 to a large constant to reflect this (it probably is not a good idea not
1241 to limit number of prefetches at all, as their execution also takes some
1242 time). */
1243 100, /* number of parallel prefetches */
1244 2, /* Branch cost */
1245 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1246 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1247 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1248 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1249 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1250 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1251
1252 bdver4_memcpy,
1253 bdver4_memset,
1254 6, /* scalar_stmt_cost. */
1255 4, /* scalar load_cost. */
1256 4, /* scalar_store_cost. */
1257 6, /* vec_stmt_cost. */
1258 0, /* vec_to_scalar_cost. */
1259 2, /* scalar_to_vec_cost. */
1260 4, /* vec_align_load_cost. */
1261 4, /* vec_unalign_load_cost. */
1262 4, /* vec_store_cost. */
1263 2, /* cond_taken_branch_cost. */
1264 1, /* cond_not_taken_branch_cost. */
1265 };
1266
1267 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1268 very small blocks it is better to use loop. For large blocks, libcall can
1269 do nontemporary accesses and beat inline considerably. */
1270 static stringop_algs btver1_memcpy[2] = {
1271 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1272 {-1, rep_prefix_4_byte, false}}},
1273 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1274 {-1, libcall, false}}}};
1275 static stringop_algs btver1_memset[2] = {
1276 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1277 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1278 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1279 {-1, libcall, false}}}};
1280 const struct processor_costs btver1_cost = {
1281 COSTS_N_INSNS (1), /* cost of an add instruction */
1282 COSTS_N_INSNS (2), /* cost of a lea instruction */
1283 COSTS_N_INSNS (1), /* variable shift costs */
1284 COSTS_N_INSNS (1), /* constant shift costs */
1285 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1286 COSTS_N_INSNS (4), /* HI */
1287 COSTS_N_INSNS (3), /* SI */
1288 COSTS_N_INSNS (4), /* DI */
1289 COSTS_N_INSNS (5)}, /* other */
1290 0, /* cost of multiply per each bit set */
1291 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1292 COSTS_N_INSNS (35), /* HI */
1293 COSTS_N_INSNS (51), /* SI */
1294 COSTS_N_INSNS (83), /* DI */
1295 COSTS_N_INSNS (83)}, /* other */
1296 COSTS_N_INSNS (1), /* cost of movsx */
1297 COSTS_N_INSNS (1), /* cost of movzx */
1298 8, /* "large" insn */
1299 9, /* MOVE_RATIO */
1300 4, /* cost for loading QImode using movzbl */
1301 {3, 4, 3}, /* cost of loading integer registers
1302 in QImode, HImode and SImode.
1303 Relative to reg-reg move (2). */
1304 {3, 4, 3}, /* cost of storing integer registers */
1305 4, /* cost of reg,reg fld/fst */
1306 {4, 4, 12}, /* cost of loading fp registers
1307 in SFmode, DFmode and XFmode */
1308 {6, 6, 8}, /* cost of storing fp registers
1309 in SFmode, DFmode and XFmode */
1310 2, /* cost of moving MMX register */
1311 {3, 3}, /* cost of loading MMX registers
1312 in SImode and DImode */
1313 {4, 4}, /* cost of storing MMX registers
1314 in SImode and DImode */
1315 2, /* cost of moving SSE register */
1316 {4, 4, 3}, /* cost of loading SSE registers
1317 in SImode, DImode and TImode */
1318 {4, 4, 5}, /* cost of storing SSE registers
1319 in SImode, DImode and TImode */
1320 3, /* MMX or SSE register to integer */
1321 /* On K8:
1322 MOVD reg64, xmmreg Double FSTORE 4
1323 MOVD reg32, xmmreg Double FSTORE 4
1324 On AMDFAM10:
1325 MOVD reg64, xmmreg Double FADD 3
1326 1/1 1/1
1327 MOVD reg32, xmmreg Double FADD 3
1328 1/1 1/1 */
1329 32, /* size of l1 cache. */
1330 512, /* size of l2 cache. */
1331 64, /* size of prefetch block */
1332 100, /* number of parallel prefetches */
1333 2, /* Branch cost */
1334 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1335 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1336 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1337 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1338 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1339 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1340
1341 btver1_memcpy,
1342 btver1_memset,
1343 4, /* scalar_stmt_cost. */
1344 2, /* scalar load_cost. */
1345 2, /* scalar_store_cost. */
1346 6, /* vec_stmt_cost. */
1347 0, /* vec_to_scalar_cost. */
1348 2, /* scalar_to_vec_cost. */
1349 2, /* vec_align_load_cost. */
1350 2, /* vec_unalign_load_cost. */
1351 2, /* vec_store_cost. */
1352 2, /* cond_taken_branch_cost. */
1353 1, /* cond_not_taken_branch_cost. */
1354 };
1355
1356 static stringop_algs btver2_memcpy[2] = {
1357 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1358 {-1, rep_prefix_4_byte, false}}},
1359 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1360 {-1, libcall, false}}}};
1361 static stringop_algs btver2_memset[2] = {
1362 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1363 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1364 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1365 {-1, libcall, false}}}};
1366 const struct processor_costs btver2_cost = {
1367 COSTS_N_INSNS (1), /* cost of an add instruction */
1368 COSTS_N_INSNS (2), /* cost of a lea instruction */
1369 COSTS_N_INSNS (1), /* variable shift costs */
1370 COSTS_N_INSNS (1), /* constant shift costs */
1371 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1372 COSTS_N_INSNS (4), /* HI */
1373 COSTS_N_INSNS (3), /* SI */
1374 COSTS_N_INSNS (4), /* DI */
1375 COSTS_N_INSNS (5)}, /* other */
1376 0, /* cost of multiply per each bit set */
1377 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1378 COSTS_N_INSNS (35), /* HI */
1379 COSTS_N_INSNS (51), /* SI */
1380 COSTS_N_INSNS (83), /* DI */
1381 COSTS_N_INSNS (83)}, /* other */
1382 COSTS_N_INSNS (1), /* cost of movsx */
1383 COSTS_N_INSNS (1), /* cost of movzx */
1384 8, /* "large" insn */
1385 9, /* MOVE_RATIO */
1386 4, /* cost for loading QImode using movzbl */
1387 {3, 4, 3}, /* cost of loading integer registers
1388 in QImode, HImode and SImode.
1389 Relative to reg-reg move (2). */
1390 {3, 4, 3}, /* cost of storing integer registers */
1391 4, /* cost of reg,reg fld/fst */
1392 {4, 4, 12}, /* cost of loading fp registers
1393 in SFmode, DFmode and XFmode */
1394 {6, 6, 8}, /* cost of storing fp registers
1395 in SFmode, DFmode and XFmode */
1396 2, /* cost of moving MMX register */
1397 {3, 3}, /* cost of loading MMX registers
1398 in SImode and DImode */
1399 {4, 4}, /* cost of storing MMX registers
1400 in SImode and DImode */
1401 2, /* cost of moving SSE register */
1402 {4, 4, 3}, /* cost of loading SSE registers
1403 in SImode, DImode and TImode */
1404 {4, 4, 5}, /* cost of storing SSE registers
1405 in SImode, DImode and TImode */
1406 3, /* MMX or SSE register to integer */
1407 /* On K8:
1408 MOVD reg64, xmmreg Double FSTORE 4
1409 MOVD reg32, xmmreg Double FSTORE 4
1410 On AMDFAM10:
1411 MOVD reg64, xmmreg Double FADD 3
1412 1/1 1/1
1413 MOVD reg32, xmmreg Double FADD 3
1414 1/1 1/1 */
1415 32, /* size of l1 cache. */
1416 2048, /* size of l2 cache. */
1417 64, /* size of prefetch block */
1418 100, /* number of parallel prefetches */
1419 2, /* Branch cost */
1420 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1421 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1422 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1423 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1424 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1425 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1426 btver2_memcpy,
1427 btver2_memset,
1428 4, /* scalar_stmt_cost. */
1429 2, /* scalar load_cost. */
1430 2, /* scalar_store_cost. */
1431 6, /* vec_stmt_cost. */
1432 0, /* vec_to_scalar_cost. */
1433 2, /* scalar_to_vec_cost. */
1434 2, /* vec_align_load_cost. */
1435 2, /* vec_unalign_load_cost. */
1436 2, /* vec_store_cost. */
1437 2, /* cond_taken_branch_cost. */
1438 1, /* cond_not_taken_branch_cost. */
1439 };
1440
1441 static stringop_algs pentium4_memcpy[2] = {
1442 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1443 DUMMY_STRINGOP_ALGS};
1444 static stringop_algs pentium4_memset[2] = {
1445 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1446 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1447 DUMMY_STRINGOP_ALGS};
1448
1449 static const
1450 struct processor_costs pentium4_cost = {
1451 COSTS_N_INSNS (1), /* cost of an add instruction */
1452 COSTS_N_INSNS (3), /* cost of a lea instruction */
1453 COSTS_N_INSNS (4), /* variable shift costs */
1454 COSTS_N_INSNS (4), /* constant shift costs */
1455 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1456 COSTS_N_INSNS (15), /* HI */
1457 COSTS_N_INSNS (15), /* SI */
1458 COSTS_N_INSNS (15), /* DI */
1459 COSTS_N_INSNS (15)}, /* other */
1460 0, /* cost of multiply per each bit set */
1461 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1462 COSTS_N_INSNS (56), /* HI */
1463 COSTS_N_INSNS (56), /* SI */
1464 COSTS_N_INSNS (56), /* DI */
1465 COSTS_N_INSNS (56)}, /* other */
1466 COSTS_N_INSNS (1), /* cost of movsx */
1467 COSTS_N_INSNS (1), /* cost of movzx */
1468 16, /* "large" insn */
1469 6, /* MOVE_RATIO */
1470 2, /* cost for loading QImode using movzbl */
1471 {4, 5, 4}, /* cost of loading integer registers
1472 in QImode, HImode and SImode.
1473 Relative to reg-reg move (2). */
1474 {2, 3, 2}, /* cost of storing integer registers */
1475 2, /* cost of reg,reg fld/fst */
1476 {2, 2, 6}, /* cost of loading fp registers
1477 in SFmode, DFmode and XFmode */
1478 {4, 4, 6}, /* cost of storing fp registers
1479 in SFmode, DFmode and XFmode */
1480 2, /* cost of moving MMX register */
1481 {2, 2}, /* cost of loading MMX registers
1482 in SImode and DImode */
1483 {2, 2}, /* cost of storing MMX registers
1484 in SImode and DImode */
1485 12, /* cost of moving SSE register */
1486 {12, 12, 12}, /* cost of loading SSE registers
1487 in SImode, DImode and TImode */
1488 {2, 2, 8}, /* cost of storing SSE registers
1489 in SImode, DImode and TImode */
1490 10, /* MMX or SSE register to integer */
1491 8, /* size of l1 cache. */
1492 256, /* size of l2 cache. */
1493 64, /* size of prefetch block */
1494 6, /* number of parallel prefetches */
1495 2, /* Branch cost */
1496 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1497 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1498 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1499 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1500 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1501 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1502 pentium4_memcpy,
1503 pentium4_memset,
1504 1, /* scalar_stmt_cost. */
1505 1, /* scalar load_cost. */
1506 1, /* scalar_store_cost. */
1507 1, /* vec_stmt_cost. */
1508 1, /* vec_to_scalar_cost. */
1509 1, /* scalar_to_vec_cost. */
1510 1, /* vec_align_load_cost. */
1511 2, /* vec_unalign_load_cost. */
1512 1, /* vec_store_cost. */
1513 3, /* cond_taken_branch_cost. */
1514 1, /* cond_not_taken_branch_cost. */
1515 };
1516
1517 static stringop_algs nocona_memcpy[2] = {
1518 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1519 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1520 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1521
1522 static stringop_algs nocona_memset[2] = {
1523 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1524 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1525 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1526 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1527
1528 static const
1529 struct processor_costs nocona_cost = {
1530 COSTS_N_INSNS (1), /* cost of an add instruction */
1531 COSTS_N_INSNS (1), /* cost of a lea instruction */
1532 COSTS_N_INSNS (1), /* variable shift costs */
1533 COSTS_N_INSNS (1), /* constant shift costs */
1534 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1535 COSTS_N_INSNS (10), /* HI */
1536 COSTS_N_INSNS (10), /* SI */
1537 COSTS_N_INSNS (10), /* DI */
1538 COSTS_N_INSNS (10)}, /* other */
1539 0, /* cost of multiply per each bit set */
1540 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1541 COSTS_N_INSNS (66), /* HI */
1542 COSTS_N_INSNS (66), /* SI */
1543 COSTS_N_INSNS (66), /* DI */
1544 COSTS_N_INSNS (66)}, /* other */
1545 COSTS_N_INSNS (1), /* cost of movsx */
1546 COSTS_N_INSNS (1), /* cost of movzx */
1547 16, /* "large" insn */
1548 17, /* MOVE_RATIO */
1549 4, /* cost for loading QImode using movzbl */
1550 {4, 4, 4}, /* cost of loading integer registers
1551 in QImode, HImode and SImode.
1552 Relative to reg-reg move (2). */
1553 {4, 4, 4}, /* cost of storing integer registers */
1554 3, /* cost of reg,reg fld/fst */
1555 {12, 12, 12}, /* cost of loading fp registers
1556 in SFmode, DFmode and XFmode */
1557 {4, 4, 4}, /* cost of storing fp registers
1558 in SFmode, DFmode and XFmode */
1559 6, /* cost of moving MMX register */
1560 {12, 12}, /* cost of loading MMX registers
1561 in SImode and DImode */
1562 {12, 12}, /* cost of storing MMX registers
1563 in SImode and DImode */
1564 6, /* cost of moving SSE register */
1565 {12, 12, 12}, /* cost of loading SSE registers
1566 in SImode, DImode and TImode */
1567 {12, 12, 12}, /* cost of storing SSE registers
1568 in SImode, DImode and TImode */
1569 8, /* MMX or SSE register to integer */
1570 8, /* size of l1 cache. */
1571 1024, /* size of l2 cache. */
1572 64, /* size of prefetch block */
1573 8, /* number of parallel prefetches */
1574 1, /* Branch cost */
1575 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1576 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1577 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1578 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1579 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1580 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1581 nocona_memcpy,
1582 nocona_memset,
1583 1, /* scalar_stmt_cost. */
1584 1, /* scalar load_cost. */
1585 1, /* scalar_store_cost. */
1586 1, /* vec_stmt_cost. */
1587 1, /* vec_to_scalar_cost. */
1588 1, /* scalar_to_vec_cost. */
1589 1, /* vec_align_load_cost. */
1590 2, /* vec_unalign_load_cost. */
1591 1, /* vec_store_cost. */
1592 3, /* cond_taken_branch_cost. */
1593 1, /* cond_not_taken_branch_cost. */
1594 };
1595
1596 static stringop_algs atom_memcpy[2] = {
1597 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1598 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1599 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1600 static stringop_algs atom_memset[2] = {
1601 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1602 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1603 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1604 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1605 static const
1606 struct processor_costs atom_cost = {
1607 COSTS_N_INSNS (1), /* cost of an add instruction */
1608 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1609 COSTS_N_INSNS (1), /* variable shift costs */
1610 COSTS_N_INSNS (1), /* constant shift costs */
1611 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1612 COSTS_N_INSNS (4), /* HI */
1613 COSTS_N_INSNS (3), /* SI */
1614 COSTS_N_INSNS (4), /* DI */
1615 COSTS_N_INSNS (2)}, /* other */
1616 0, /* cost of multiply per each bit set */
1617 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1618 COSTS_N_INSNS (26), /* HI */
1619 COSTS_N_INSNS (42), /* SI */
1620 COSTS_N_INSNS (74), /* DI */
1621 COSTS_N_INSNS (74)}, /* other */
1622 COSTS_N_INSNS (1), /* cost of movsx */
1623 COSTS_N_INSNS (1), /* cost of movzx */
1624 8, /* "large" insn */
1625 17, /* MOVE_RATIO */
1626 4, /* cost for loading QImode using movzbl */
1627 {4, 4, 4}, /* cost of loading integer registers
1628 in QImode, HImode and SImode.
1629 Relative to reg-reg move (2). */
1630 {4, 4, 4}, /* cost of storing integer registers */
1631 4, /* cost of reg,reg fld/fst */
1632 {12, 12, 12}, /* cost of loading fp registers
1633 in SFmode, DFmode and XFmode */
1634 {6, 6, 8}, /* cost of storing fp registers
1635 in SFmode, DFmode and XFmode */
1636 2, /* cost of moving MMX register */
1637 {8, 8}, /* cost of loading MMX registers
1638 in SImode and DImode */
1639 {8, 8}, /* cost of storing MMX registers
1640 in SImode and DImode */
1641 2, /* cost of moving SSE register */
1642 {8, 8, 8}, /* cost of loading SSE registers
1643 in SImode, DImode and TImode */
1644 {8, 8, 8}, /* cost of storing SSE registers
1645 in SImode, DImode and TImode */
1646 5, /* MMX or SSE register to integer */
1647 32, /* size of l1 cache. */
1648 256, /* size of l2 cache. */
1649 64, /* size of prefetch block */
1650 6, /* number of parallel prefetches */
1651 3, /* Branch cost */
1652 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1653 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1654 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1655 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1656 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1657 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1658 atom_memcpy,
1659 atom_memset,
1660 1, /* scalar_stmt_cost. */
1661 1, /* scalar load_cost. */
1662 1, /* scalar_store_cost. */
1663 1, /* vec_stmt_cost. */
1664 1, /* vec_to_scalar_cost. */
1665 1, /* scalar_to_vec_cost. */
1666 1, /* vec_align_load_cost. */
1667 2, /* vec_unalign_load_cost. */
1668 1, /* vec_store_cost. */
1669 3, /* cond_taken_branch_cost. */
1670 1, /* cond_not_taken_branch_cost. */
1671 };
1672
1673 static stringop_algs slm_memcpy[2] = {
1674 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1675 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1676 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1677 static stringop_algs slm_memset[2] = {
1678 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1679 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1680 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1681 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1682 static const
1683 struct processor_costs slm_cost = {
1684 COSTS_N_INSNS (1), /* cost of an add instruction */
1685 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1686 COSTS_N_INSNS (1), /* variable shift costs */
1687 COSTS_N_INSNS (1), /* constant shift costs */
1688 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1689 COSTS_N_INSNS (3), /* HI */
1690 COSTS_N_INSNS (3), /* SI */
1691 COSTS_N_INSNS (4), /* DI */
1692 COSTS_N_INSNS (2)}, /* other */
1693 0, /* cost of multiply per each bit set */
1694 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1695 COSTS_N_INSNS (26), /* HI */
1696 COSTS_N_INSNS (42), /* SI */
1697 COSTS_N_INSNS (74), /* DI */
1698 COSTS_N_INSNS (74)}, /* other */
1699 COSTS_N_INSNS (1), /* cost of movsx */
1700 COSTS_N_INSNS (1), /* cost of movzx */
1701 8, /* "large" insn */
1702 17, /* MOVE_RATIO */
1703 4, /* cost for loading QImode using movzbl */
1704 {4, 4, 4}, /* cost of loading integer registers
1705 in QImode, HImode and SImode.
1706 Relative to reg-reg move (2). */
1707 {4, 4, 4}, /* cost of storing integer registers */
1708 4, /* cost of reg,reg fld/fst */
1709 {12, 12, 12}, /* cost of loading fp registers
1710 in SFmode, DFmode and XFmode */
1711 {6, 6, 8}, /* cost of storing fp registers
1712 in SFmode, DFmode and XFmode */
1713 2, /* cost of moving MMX register */
1714 {8, 8}, /* cost of loading MMX registers
1715 in SImode and DImode */
1716 {8, 8}, /* cost of storing MMX registers
1717 in SImode and DImode */
1718 2, /* cost of moving SSE register */
1719 {8, 8, 8}, /* cost of loading SSE registers
1720 in SImode, DImode and TImode */
1721 {8, 8, 8}, /* cost of storing SSE registers
1722 in SImode, DImode and TImode */
1723 5, /* MMX or SSE register to integer */
1724 32, /* size of l1 cache. */
1725 256, /* size of l2 cache. */
1726 64, /* size of prefetch block */
1727 6, /* number of parallel prefetches */
1728 3, /* Branch cost */
1729 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1730 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1731 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1732 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1733 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1734 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1735 slm_memcpy,
1736 slm_memset,
1737 1, /* scalar_stmt_cost. */
1738 1, /* scalar load_cost. */
1739 1, /* scalar_store_cost. */
1740 1, /* vec_stmt_cost. */
1741 1, /* vec_to_scalar_cost. */
1742 1, /* scalar_to_vec_cost. */
1743 1, /* vec_align_load_cost. */
1744 2, /* vec_unalign_load_cost. */
1745 1, /* vec_store_cost. */
1746 3, /* cond_taken_branch_cost. */
1747 1, /* cond_not_taken_branch_cost. */
1748 };
1749
1750 static stringop_algs intel_memcpy[2] = {
1751 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1752 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1753 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1754 static stringop_algs intel_memset[2] = {
1755 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1756 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1757 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1758 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1759 static const
1760 struct processor_costs intel_cost = {
1761 COSTS_N_INSNS (1), /* cost of an add instruction */
1762 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1763 COSTS_N_INSNS (1), /* variable shift costs */
1764 COSTS_N_INSNS (1), /* constant shift costs */
1765 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1766 COSTS_N_INSNS (3), /* HI */
1767 COSTS_N_INSNS (3), /* SI */
1768 COSTS_N_INSNS (4), /* DI */
1769 COSTS_N_INSNS (2)}, /* other */
1770 0, /* cost of multiply per each bit set */
1771 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1772 COSTS_N_INSNS (26), /* HI */
1773 COSTS_N_INSNS (42), /* SI */
1774 COSTS_N_INSNS (74), /* DI */
1775 COSTS_N_INSNS (74)}, /* other */
1776 COSTS_N_INSNS (1), /* cost of movsx */
1777 COSTS_N_INSNS (1), /* cost of movzx */
1778 8, /* "large" insn */
1779 17, /* MOVE_RATIO */
1780 4, /* cost for loading QImode using movzbl */
1781 {4, 4, 4}, /* cost of loading integer registers
1782 in QImode, HImode and SImode.
1783 Relative to reg-reg move (2). */
1784 {4, 4, 4}, /* cost of storing integer registers */
1785 4, /* cost of reg,reg fld/fst */
1786 {12, 12, 12}, /* cost of loading fp registers
1787 in SFmode, DFmode and XFmode */
1788 {6, 6, 8}, /* cost of storing fp registers
1789 in SFmode, DFmode and XFmode */
1790 2, /* cost of moving MMX register */
1791 {8, 8}, /* cost of loading MMX registers
1792 in SImode and DImode */
1793 {8, 8}, /* cost of storing MMX registers
1794 in SImode and DImode */
1795 2, /* cost of moving SSE register */
1796 {8, 8, 8}, /* cost of loading SSE registers
1797 in SImode, DImode and TImode */
1798 {8, 8, 8}, /* cost of storing SSE registers
1799 in SImode, DImode and TImode */
1800 5, /* MMX or SSE register to integer */
1801 32, /* size of l1 cache. */
1802 256, /* size of l2 cache. */
1803 64, /* size of prefetch block */
1804 6, /* number of parallel prefetches */
1805 3, /* Branch cost */
1806 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1807 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1808 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1809 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1810 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1811 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1812 intel_memcpy,
1813 intel_memset,
1814 1, /* scalar_stmt_cost. */
1815 1, /* scalar load_cost. */
1816 1, /* scalar_store_cost. */
1817 1, /* vec_stmt_cost. */
1818 1, /* vec_to_scalar_cost. */
1819 1, /* scalar_to_vec_cost. */
1820 1, /* vec_align_load_cost. */
1821 2, /* vec_unalign_load_cost. */
1822 1, /* vec_store_cost. */
1823 3, /* cond_taken_branch_cost. */
1824 1, /* cond_not_taken_branch_cost. */
1825 };
1826
1827 /* Generic should produce code tuned for Core-i7 (and newer chips)
1828 and btver1 (and newer chips). */
1829
1830 static stringop_algs generic_memcpy[2] = {
1831 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1832 {-1, libcall, false}}},
1833 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1834 {-1, libcall, false}}}};
1835 static stringop_algs generic_memset[2] = {
1836 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1837 {-1, libcall, false}}},
1838 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1839 {-1, libcall, false}}}};
1840 static const
1841 struct processor_costs generic_cost = {
1842 COSTS_N_INSNS (1), /* cost of an add instruction */
1843 /* On all chips taken into consideration lea is 2 cycles and more. With
1844 this cost however our current implementation of synth_mult results in
1845 use of unnecessary temporary registers causing regression on several
1846 SPECfp benchmarks. */
1847 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1848 COSTS_N_INSNS (1), /* variable shift costs */
1849 COSTS_N_INSNS (1), /* constant shift costs */
1850 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1851 COSTS_N_INSNS (4), /* HI */
1852 COSTS_N_INSNS (3), /* SI */
1853 COSTS_N_INSNS (4), /* DI */
1854 COSTS_N_INSNS (2)}, /* other */
1855 0, /* cost of multiply per each bit set */
1856 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1857 COSTS_N_INSNS (26), /* HI */
1858 COSTS_N_INSNS (42), /* SI */
1859 COSTS_N_INSNS (74), /* DI */
1860 COSTS_N_INSNS (74)}, /* other */
1861 COSTS_N_INSNS (1), /* cost of movsx */
1862 COSTS_N_INSNS (1), /* cost of movzx */
1863 8, /* "large" insn */
1864 17, /* MOVE_RATIO */
1865 4, /* cost for loading QImode using movzbl */
1866 {4, 4, 4}, /* cost of loading integer registers
1867 in QImode, HImode and SImode.
1868 Relative to reg-reg move (2). */
1869 {4, 4, 4}, /* cost of storing integer registers */
1870 4, /* cost of reg,reg fld/fst */
1871 {12, 12, 12}, /* cost of loading fp registers
1872 in SFmode, DFmode and XFmode */
1873 {6, 6, 8}, /* cost of storing fp registers
1874 in SFmode, DFmode and XFmode */
1875 2, /* cost of moving MMX register */
1876 {8, 8}, /* cost of loading MMX registers
1877 in SImode and DImode */
1878 {8, 8}, /* cost of storing MMX registers
1879 in SImode and DImode */
1880 2, /* cost of moving SSE register */
1881 {8, 8, 8}, /* cost of loading SSE registers
1882 in SImode, DImode and TImode */
1883 {8, 8, 8}, /* cost of storing SSE registers
1884 in SImode, DImode and TImode */
1885 5, /* MMX or SSE register to integer */
1886 32, /* size of l1 cache. */
1887 512, /* size of l2 cache. */
1888 64, /* size of prefetch block */
1889 6, /* number of parallel prefetches */
1890 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1891 value is increased to perhaps more appropriate value of 5. */
1892 3, /* Branch cost */
1893 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1894 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1895 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1896 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1897 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1898 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1899 generic_memcpy,
1900 generic_memset,
1901 1, /* scalar_stmt_cost. */
1902 1, /* scalar load_cost. */
1903 1, /* scalar_store_cost. */
1904 1, /* vec_stmt_cost. */
1905 1, /* vec_to_scalar_cost. */
1906 1, /* scalar_to_vec_cost. */
1907 1, /* vec_align_load_cost. */
1908 2, /* vec_unalign_load_cost. */
1909 1, /* vec_store_cost. */
1910 3, /* cond_taken_branch_cost. */
1911 1, /* cond_not_taken_branch_cost. */
1912 };
1913
1914 /* core_cost should produce code tuned for Core familly of CPUs. */
1915 static stringop_algs core_memcpy[2] = {
1916 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1917 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1918 {-1, libcall, false}}}};
1919 static stringop_algs core_memset[2] = {
1920 {libcall, {{6, loop_1_byte, true},
1921 {24, loop, true},
1922 {8192, rep_prefix_4_byte, true},
1923 {-1, libcall, false}}},
1924 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1925 {-1, libcall, false}}}};
1926
1927 static const
1928 struct processor_costs core_cost = {
1929 COSTS_N_INSNS (1), /* cost of an add instruction */
1930 /* On all chips taken into consideration lea is 2 cycles and more. With
1931 this cost however our current implementation of synth_mult results in
1932 use of unnecessary temporary registers causing regression on several
1933 SPECfp benchmarks. */
1934 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1935 COSTS_N_INSNS (1), /* variable shift costs */
1936 COSTS_N_INSNS (1), /* constant shift costs */
1937 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1938 COSTS_N_INSNS (4), /* HI */
1939 COSTS_N_INSNS (3), /* SI */
1940 COSTS_N_INSNS (4), /* DI */
1941 COSTS_N_INSNS (2)}, /* other */
1942 0, /* cost of multiply per each bit set */
1943 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1944 COSTS_N_INSNS (26), /* HI */
1945 COSTS_N_INSNS (42), /* SI */
1946 COSTS_N_INSNS (74), /* DI */
1947 COSTS_N_INSNS (74)}, /* other */
1948 COSTS_N_INSNS (1), /* cost of movsx */
1949 COSTS_N_INSNS (1), /* cost of movzx */
1950 8, /* "large" insn */
1951 17, /* MOVE_RATIO */
1952 4, /* cost for loading QImode using movzbl */
1953 {4, 4, 4}, /* cost of loading integer registers
1954 in QImode, HImode and SImode.
1955 Relative to reg-reg move (2). */
1956 {4, 4, 4}, /* cost of storing integer registers */
1957 4, /* cost of reg,reg fld/fst */
1958 {12, 12, 12}, /* cost of loading fp registers
1959 in SFmode, DFmode and XFmode */
1960 {6, 6, 8}, /* cost of storing fp registers
1961 in SFmode, DFmode and XFmode */
1962 2, /* cost of moving MMX register */
1963 {8, 8}, /* cost of loading MMX registers
1964 in SImode and DImode */
1965 {8, 8}, /* cost of storing MMX registers
1966 in SImode and DImode */
1967 2, /* cost of moving SSE register */
1968 {8, 8, 8}, /* cost of loading SSE registers
1969 in SImode, DImode and TImode */
1970 {8, 8, 8}, /* cost of storing SSE registers
1971 in SImode, DImode and TImode */
1972 5, /* MMX or SSE register to integer */
1973 64, /* size of l1 cache. */
1974 512, /* size of l2 cache. */
1975 64, /* size of prefetch block */
1976 6, /* number of parallel prefetches */
1977 /* FIXME perhaps more appropriate value is 5. */
1978 3, /* Branch cost */
1979 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1980 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1981 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1982 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1983 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1984 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1985 core_memcpy,
1986 core_memset,
1987 1, /* scalar_stmt_cost. */
1988 1, /* scalar load_cost. */
1989 1, /* scalar_store_cost. */
1990 1, /* vec_stmt_cost. */
1991 1, /* vec_to_scalar_cost. */
1992 1, /* scalar_to_vec_cost. */
1993 1, /* vec_align_load_cost. */
1994 2, /* vec_unalign_load_cost. */
1995 1, /* vec_store_cost. */
1996 3, /* cond_taken_branch_cost. */
1997 1, /* cond_not_taken_branch_cost. */
1998 };
1999
2000
2001 /* Set by -mtune. */
2002 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2003
2004 /* Set by -mtune or -Os. */
2005 const struct processor_costs *ix86_cost = &pentium_cost;
2006
2007 /* Processor feature/optimization bitmasks. */
2008 #define m_386 (1<<PROCESSOR_I386)
2009 #define m_486 (1<<PROCESSOR_I486)
2010 #define m_PENT (1<<PROCESSOR_PENTIUM)
2011 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2012 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2013 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2014 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2015 #define m_CORE2 (1<<PROCESSOR_CORE2)
2016 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2017 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2018 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2019 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2020 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2021 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2022 #define m_INTEL (1<<PROCESSOR_INTEL)
2023
2024 #define m_GEODE (1<<PROCESSOR_GEODE)
2025 #define m_K6 (1<<PROCESSOR_K6)
2026 #define m_K6_GEODE (m_K6 | m_GEODE)
2027 #define m_K8 (1<<PROCESSOR_K8)
2028 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2029 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2030 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2031 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2032 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2033 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2034 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2035 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2036 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2037 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2038 #define m_BTVER (m_BTVER1 | m_BTVER2)
2039 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2040
2041 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2042
2043 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2044 #undef DEF_TUNE
2045 #define DEF_TUNE(tune, name, selector) name,
2046 #include "x86-tune.def"
2047 #undef DEF_TUNE
2048 };
2049
2050 /* Feature tests against the various tunings. */
2051 unsigned char ix86_tune_features[X86_TUNE_LAST];
2052
2053 /* Feature tests against the various tunings used to create ix86_tune_features
2054 based on the processor mask. */
2055 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2056 #undef DEF_TUNE
2057 #define DEF_TUNE(tune, name, selector) selector,
2058 #include "x86-tune.def"
2059 #undef DEF_TUNE
2060 };
2061
2062 /* Feature tests against the various architecture variations. */
2063 unsigned char ix86_arch_features[X86_ARCH_LAST];
2064
2065 /* Feature tests against the various architecture variations, used to create
2066 ix86_arch_features based on the processor mask. */
2067 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2068 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2069 ~(m_386 | m_486 | m_PENT | m_K6),
2070
2071 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2072 ~m_386,
2073
2074 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2075 ~(m_386 | m_486),
2076
2077 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2078 ~m_386,
2079
2080 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2081 ~m_386,
2082 };
2083
2084 /* In case the average insn count for single function invocation is
2085 lower than this constant, emit fast (but longer) prologue and
2086 epilogue code. */
2087 #define FAST_PROLOGUE_INSN_COUNT 20
2088
2089 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2090 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2091 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2092 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2093
2094 /* Array of the smallest class containing reg number REGNO, indexed by
2095 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2096
2097 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2098 {
2099 /* ax, dx, cx, bx */
2100 AREG, DREG, CREG, BREG,
2101 /* si, di, bp, sp */
2102 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2103 /* FP registers */
2104 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2105 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2106 /* arg pointer */
2107 NON_Q_REGS,
2108 /* flags, fpsr, fpcr, frame */
2109 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2110 /* SSE registers */
2111 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2112 SSE_REGS, SSE_REGS,
2113 /* MMX registers */
2114 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2115 MMX_REGS, MMX_REGS,
2116 /* REX registers */
2117 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2118 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2119 /* SSE REX registers */
2120 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2121 SSE_REGS, SSE_REGS,
2122 /* AVX-512 SSE registers */
2123 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2124 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2125 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2126 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2127 /* Mask registers. */
2128 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2129 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2130 };
2131
2132 /* The "default" register map used in 32bit mode. */
2133
2134 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2135 {
2136 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2137 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2138 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2139 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2140 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2141 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2142 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2143 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2144 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2145 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2146 };
2147
2148 /* The "default" register map used in 64bit mode. */
2149
2150 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2151 {
2152 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2153 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2154 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2155 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2156 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2157 8,9,10,11,12,13,14,15, /* extended integer registers */
2158 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2159 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2160 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2161 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2162 };
2163
2164 /* Define the register numbers to be used in Dwarf debugging information.
2165 The SVR4 reference port C compiler uses the following register numbers
2166 in its Dwarf output code:
2167 0 for %eax (gcc regno = 0)
2168 1 for %ecx (gcc regno = 2)
2169 2 for %edx (gcc regno = 1)
2170 3 for %ebx (gcc regno = 3)
2171 4 for %esp (gcc regno = 7)
2172 5 for %ebp (gcc regno = 6)
2173 6 for %esi (gcc regno = 4)
2174 7 for %edi (gcc regno = 5)
2175 The following three DWARF register numbers are never generated by
2176 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2177 believes these numbers have these meanings.
2178 8 for %eip (no gcc equivalent)
2179 9 for %eflags (gcc regno = 17)
2180 10 for %trapno (no gcc equivalent)
2181 It is not at all clear how we should number the FP stack registers
2182 for the x86 architecture. If the version of SDB on x86/svr4 were
2183 a bit less brain dead with respect to floating-point then we would
2184 have a precedent to follow with respect to DWARF register numbers
2185 for x86 FP registers, but the SDB on x86/svr4 is so completely
2186 broken with respect to FP registers that it is hardly worth thinking
2187 of it as something to strive for compatibility with.
2188 The version of x86/svr4 SDB I have at the moment does (partially)
2189 seem to believe that DWARF register number 11 is associated with
2190 the x86 register %st(0), but that's about all. Higher DWARF
2191 register numbers don't seem to be associated with anything in
2192 particular, and even for DWARF regno 11, SDB only seems to under-
2193 stand that it should say that a variable lives in %st(0) (when
2194 asked via an `=' command) if we said it was in DWARF regno 11,
2195 but SDB still prints garbage when asked for the value of the
2196 variable in question (via a `/' command).
2197 (Also note that the labels SDB prints for various FP stack regs
2198 when doing an `x' command are all wrong.)
2199 Note that these problems generally don't affect the native SVR4
2200 C compiler because it doesn't allow the use of -O with -g and
2201 because when it is *not* optimizing, it allocates a memory
2202 location for each floating-point variable, and the memory
2203 location is what gets described in the DWARF AT_location
2204 attribute for the variable in question.
2205 Regardless of the severe mental illness of the x86/svr4 SDB, we
2206 do something sensible here and we use the following DWARF
2207 register numbers. Note that these are all stack-top-relative
2208 numbers.
2209 11 for %st(0) (gcc regno = 8)
2210 12 for %st(1) (gcc regno = 9)
2211 13 for %st(2) (gcc regno = 10)
2212 14 for %st(3) (gcc regno = 11)
2213 15 for %st(4) (gcc regno = 12)
2214 16 for %st(5) (gcc regno = 13)
2215 17 for %st(6) (gcc regno = 14)
2216 18 for %st(7) (gcc regno = 15)
2217 */
2218 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2219 {
2220 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2221 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2222 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2223 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2224 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2225 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2226 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2227 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2228 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2229 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2230 };
2231
2232 /* Define parameter passing and return registers. */
2233
2234 static int const x86_64_int_parameter_registers[6] =
2235 {
2236 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2237 };
2238
2239 static int const x86_64_ms_abi_int_parameter_registers[4] =
2240 {
2241 CX_REG, DX_REG, R8_REG, R9_REG
2242 };
2243
2244 static int const x86_64_int_return_registers[4] =
2245 {
2246 AX_REG, DX_REG, DI_REG, SI_REG
2247 };
2248
2249 /* Additional registers that are clobbered by SYSV calls. */
2250
2251 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2252 {
2253 SI_REG, DI_REG,
2254 XMM6_REG, XMM7_REG,
2255 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2256 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2257 };
2258
2259 /* Define the structure for the machine field in struct function. */
2260
2261 struct GTY(()) stack_local_entry {
2262 unsigned short mode;
2263 unsigned short n;
2264 rtx rtl;
2265 struct stack_local_entry *next;
2266 };
2267
2268 /* Structure describing stack frame layout.
2269 Stack grows downward:
2270
2271 [arguments]
2272 <- ARG_POINTER
2273 saved pc
2274
2275 saved static chain if ix86_static_chain_on_stack
2276
2277 saved frame pointer if frame_pointer_needed
2278 <- HARD_FRAME_POINTER
2279 [saved regs]
2280 <- regs_save_offset
2281 [padding0]
2282
2283 [saved SSE regs]
2284 <- sse_regs_save_offset
2285 [padding1] |
2286 | <- FRAME_POINTER
2287 [va_arg registers] |
2288 |
2289 [frame] |
2290 |
2291 [padding2] | = to_allocate
2292 <- STACK_POINTER
2293 */
2294 struct ix86_frame
2295 {
2296 int nsseregs;
2297 int nregs;
2298 int va_arg_size;
2299 int red_zone_size;
2300 int outgoing_arguments_size;
2301
2302 /* The offsets relative to ARG_POINTER. */
2303 HOST_WIDE_INT frame_pointer_offset;
2304 HOST_WIDE_INT hard_frame_pointer_offset;
2305 HOST_WIDE_INT stack_pointer_offset;
2306 HOST_WIDE_INT hfp_save_offset;
2307 HOST_WIDE_INT reg_save_offset;
2308 HOST_WIDE_INT sse_reg_save_offset;
2309
2310 /* When save_regs_using_mov is set, emit prologue using
2311 move instead of push instructions. */
2312 bool save_regs_using_mov;
2313 };
2314
2315 /* Which cpu are we scheduling for. */
2316 enum attr_cpu ix86_schedule;
2317
2318 /* Which cpu are we optimizing for. */
2319 enum processor_type ix86_tune;
2320
2321 /* Which instruction set architecture to use. */
2322 enum processor_type ix86_arch;
2323
2324 /* True if processor has SSE prefetch instruction. */
2325 unsigned char x86_prefetch_sse;
2326
2327 /* -mstackrealign option */
2328 static const char ix86_force_align_arg_pointer_string[]
2329 = "force_align_arg_pointer";
2330
2331 static rtx (*ix86_gen_leave) (void);
2332 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2333 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2334 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2335 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2336 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2337 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2338 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2339 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2340 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2341 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2342 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2343
2344 /* Preferred alignment for stack boundary in bits. */
2345 unsigned int ix86_preferred_stack_boundary;
2346
2347 /* Alignment for incoming stack boundary in bits specified at
2348 command line. */
2349 static unsigned int ix86_user_incoming_stack_boundary;
2350
2351 /* Default alignment for incoming stack boundary in bits. */
2352 static unsigned int ix86_default_incoming_stack_boundary;
2353
2354 /* Alignment for incoming stack boundary in bits. */
2355 unsigned int ix86_incoming_stack_boundary;
2356
2357 /* Calling abi specific va_list type nodes. */
2358 static GTY(()) tree sysv_va_list_type_node;
2359 static GTY(()) tree ms_va_list_type_node;
2360
2361 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2362 char internal_label_prefix[16];
2363 int internal_label_prefix_len;
2364
2365 /* Fence to use after loop using movnt. */
2366 tree x86_mfence;
2367
2368 /* Register class used for passing given 64bit part of the argument.
2369 These represent classes as documented by the PS ABI, with the exception
2370 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2371 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2372
2373 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2374 whenever possible (upper half does contain padding). */
2375 enum x86_64_reg_class
2376 {
2377 X86_64_NO_CLASS,
2378 X86_64_INTEGER_CLASS,
2379 X86_64_INTEGERSI_CLASS,
2380 X86_64_SSE_CLASS,
2381 X86_64_SSESF_CLASS,
2382 X86_64_SSEDF_CLASS,
2383 X86_64_SSEUP_CLASS,
2384 X86_64_X87_CLASS,
2385 X86_64_X87UP_CLASS,
2386 X86_64_COMPLEX_X87_CLASS,
2387 X86_64_MEMORY_CLASS
2388 };
2389
2390 #define MAX_CLASSES 8
2391
2392 /* Table of constants used by fldpi, fldln2, etc.... */
2393 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2394 static bool ext_80387_constants_init = 0;
2395
2396 \f
2397 static struct machine_function * ix86_init_machine_status (void);
2398 static rtx ix86_function_value (const_tree, const_tree, bool);
2399 static bool ix86_function_value_regno_p (const unsigned int);
2400 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2401 const_tree);
2402 static rtx ix86_static_chain (const_tree, bool);
2403 static int ix86_function_regparm (const_tree, const_tree);
2404 static void ix86_compute_frame_layout (struct ix86_frame *);
2405 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2406 rtx, rtx, int);
2407 static void ix86_add_new_builtins (HOST_WIDE_INT);
2408 static tree ix86_canonical_va_list_type (tree);
2409 static void predict_jump (int);
2410 static unsigned int split_stack_prologue_scratch_regno (void);
2411 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2412
2413 enum ix86_function_specific_strings
2414 {
2415 IX86_FUNCTION_SPECIFIC_ARCH,
2416 IX86_FUNCTION_SPECIFIC_TUNE,
2417 IX86_FUNCTION_SPECIFIC_MAX
2418 };
2419
2420 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2421 const char *, enum fpmath_unit, bool);
2422 static void ix86_function_specific_save (struct cl_target_option *,
2423 struct gcc_options *opts);
2424 static void ix86_function_specific_restore (struct gcc_options *opts,
2425 struct cl_target_option *);
2426 static void ix86_function_specific_print (FILE *, int,
2427 struct cl_target_option *);
2428 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2429 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2430 struct gcc_options *,
2431 struct gcc_options *,
2432 struct gcc_options *);
2433 static bool ix86_can_inline_p (tree, tree);
2434 static void ix86_set_current_function (tree);
2435 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2436
2437 static enum calling_abi ix86_function_abi (const_tree);
2438
2439 \f
2440 #ifndef SUBTARGET32_DEFAULT_CPU
2441 #define SUBTARGET32_DEFAULT_CPU "i386"
2442 #endif
2443
2444 /* Whether -mtune= or -march= were specified */
2445 static int ix86_tune_defaulted;
2446 static int ix86_arch_specified;
2447
2448 /* Vectorization library interface and handlers. */
2449 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2450
2451 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2452 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2453
2454 /* Processor target table, indexed by processor number */
2455 struct ptt
2456 {
2457 const char *const name; /* processor name */
2458 const struct processor_costs *cost; /* Processor costs */
2459 const int align_loop; /* Default alignments. */
2460 const int align_loop_max_skip;
2461 const int align_jump;
2462 const int align_jump_max_skip;
2463 const int align_func;
2464 };
2465
2466 /* This table must be in sync with enum processor_type in i386.h. */
2467 static const struct ptt processor_target_table[PROCESSOR_max] =
2468 {
2469 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2470 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2471 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2472 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2473 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2474 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2475 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2476 {"core2", &core_cost, 16, 10, 16, 10, 16},
2477 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2478 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2479 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2480 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2481 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2482 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2483 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2484 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2485 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2486 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2487 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2488 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2489 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2490 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2491 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2492 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2493 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2494 };
2495 \f
2496 static bool
2497 gate_insert_vzeroupper (void)
2498 {
2499 return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER;
2500 }
2501
2502 static unsigned int
2503 rest_of_handle_insert_vzeroupper (void)
2504 {
2505 int i;
2506
2507 /* vzeroupper instructions are inserted immediately after reload to
2508 account for possible spills from 256bit registers. The pass
2509 reuses mode switching infrastructure by re-running mode insertion
2510 pass, so disable entities that have already been processed. */
2511 for (i = 0; i < MAX_386_ENTITIES; i++)
2512 ix86_optimize_mode_switching[i] = 0;
2513
2514 ix86_optimize_mode_switching[AVX_U128] = 1;
2515
2516 /* Call optimize_mode_switching. */
2517 g->get_passes ()->execute_pass_mode_switching ();
2518 return 0;
2519 }
2520
2521 namespace {
2522
2523 const pass_data pass_data_insert_vzeroupper =
2524 {
2525 RTL_PASS, /* type */
2526 "vzeroupper", /* name */
2527 OPTGROUP_NONE, /* optinfo_flags */
2528 true, /* has_gate */
2529 true, /* has_execute */
2530 TV_NONE, /* tv_id */
2531 0, /* properties_required */
2532 0, /* properties_provided */
2533 0, /* properties_destroyed */
2534 0, /* todo_flags_start */
2535 ( TODO_df_finish | TODO_verify_rtl_sharing | 0 ), /* todo_flags_finish */
2536 };
2537
2538 class pass_insert_vzeroupper : public rtl_opt_pass
2539 {
2540 public:
2541 pass_insert_vzeroupper(gcc::context *ctxt)
2542 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2543 {}
2544
2545 /* opt_pass methods: */
2546 bool gate () { return gate_insert_vzeroupper (); }
2547 unsigned int execute () { return rest_of_handle_insert_vzeroupper (); }
2548
2549 }; // class pass_insert_vzeroupper
2550
2551 } // anon namespace
2552
2553 rtl_opt_pass *
2554 make_pass_insert_vzeroupper (gcc::context *ctxt)
2555 {
2556 return new pass_insert_vzeroupper (ctxt);
2557 }
2558
2559 /* Return true if a red-zone is in use. */
2560
2561 static inline bool
2562 ix86_using_red_zone (void)
2563 {
2564 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2565 }
2566 \f
2567 /* Return a string that documents the current -m options. The caller is
2568 responsible for freeing the string. */
2569
2570 static char *
2571 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2572 const char *tune, enum fpmath_unit fpmath,
2573 bool add_nl_p)
2574 {
2575 struct ix86_target_opts
2576 {
2577 const char *option; /* option string */
2578 HOST_WIDE_INT mask; /* isa mask options */
2579 };
2580
2581 /* This table is ordered so that options like -msse4.2 that imply
2582 preceding options while match those first. */
2583 static struct ix86_target_opts isa_opts[] =
2584 {
2585 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2586 { "-mfma", OPTION_MASK_ISA_FMA },
2587 { "-mxop", OPTION_MASK_ISA_XOP },
2588 { "-mlwp", OPTION_MASK_ISA_LWP },
2589 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2590 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2591 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2592 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2593 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2594 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2595 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2596 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2597 { "-msse3", OPTION_MASK_ISA_SSE3 },
2598 { "-msse2", OPTION_MASK_ISA_SSE2 },
2599 { "-msse", OPTION_MASK_ISA_SSE },
2600 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2601 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2602 { "-mmmx", OPTION_MASK_ISA_MMX },
2603 { "-mabm", OPTION_MASK_ISA_ABM },
2604 { "-mbmi", OPTION_MASK_ISA_BMI },
2605 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2606 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2607 { "-mhle", OPTION_MASK_ISA_HLE },
2608 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2609 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2610 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2611 { "-madx", OPTION_MASK_ISA_ADX },
2612 { "-mtbm", OPTION_MASK_ISA_TBM },
2613 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2614 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2615 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2616 { "-maes", OPTION_MASK_ISA_AES },
2617 { "-msha", OPTION_MASK_ISA_SHA },
2618 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2619 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2620 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2621 { "-mf16c", OPTION_MASK_ISA_F16C },
2622 { "-mrtm", OPTION_MASK_ISA_RTM },
2623 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2624 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2625 };
2626
2627 /* Flag options. */
2628 static struct ix86_target_opts flag_opts[] =
2629 {
2630 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2631 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2632 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2633 { "-m80387", MASK_80387 },
2634 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2635 { "-malign-double", MASK_ALIGN_DOUBLE },
2636 { "-mcld", MASK_CLD },
2637 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2638 { "-mieee-fp", MASK_IEEE_FP },
2639 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2640 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2641 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2642 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2643 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2644 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2645 { "-mno-red-zone", MASK_NO_RED_ZONE },
2646 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2647 { "-mrecip", MASK_RECIP },
2648 { "-mrtd", MASK_RTD },
2649 { "-msseregparm", MASK_SSEREGPARM },
2650 { "-mstack-arg-probe", MASK_STACK_PROBE },
2651 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2652 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2653 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2654 { "-mvzeroupper", MASK_VZEROUPPER },
2655 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2656 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2657 { "-mprefer-avx128", MASK_PREFER_AVX128},
2658 };
2659
2660 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2661
2662 char isa_other[40];
2663 char target_other[40];
2664 unsigned num = 0;
2665 unsigned i, j;
2666 char *ret;
2667 char *ptr;
2668 size_t len;
2669 size_t line_len;
2670 size_t sep_len;
2671 const char *abi;
2672
2673 memset (opts, '\0', sizeof (opts));
2674
2675 /* Add -march= option. */
2676 if (arch)
2677 {
2678 opts[num][0] = "-march=";
2679 opts[num++][1] = arch;
2680 }
2681
2682 /* Add -mtune= option. */
2683 if (tune)
2684 {
2685 opts[num][0] = "-mtune=";
2686 opts[num++][1] = tune;
2687 }
2688
2689 /* Add -m32/-m64/-mx32. */
2690 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2691 {
2692 if ((isa & OPTION_MASK_ABI_64) != 0)
2693 abi = "-m64";
2694 else
2695 abi = "-mx32";
2696 isa &= ~ (OPTION_MASK_ISA_64BIT
2697 | OPTION_MASK_ABI_64
2698 | OPTION_MASK_ABI_X32);
2699 }
2700 else
2701 abi = "-m32";
2702 opts[num++][0] = abi;
2703
2704 /* Pick out the options in isa options. */
2705 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2706 {
2707 if ((isa & isa_opts[i].mask) != 0)
2708 {
2709 opts[num++][0] = isa_opts[i].option;
2710 isa &= ~ isa_opts[i].mask;
2711 }
2712 }
2713
2714 if (isa && add_nl_p)
2715 {
2716 opts[num++][0] = isa_other;
2717 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2718 isa);
2719 }
2720
2721 /* Add flag options. */
2722 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2723 {
2724 if ((flags & flag_opts[i].mask) != 0)
2725 {
2726 opts[num++][0] = flag_opts[i].option;
2727 flags &= ~ flag_opts[i].mask;
2728 }
2729 }
2730
2731 if (flags && add_nl_p)
2732 {
2733 opts[num++][0] = target_other;
2734 sprintf (target_other, "(other flags: %#x)", flags);
2735 }
2736
2737 /* Add -fpmath= option. */
2738 if (fpmath)
2739 {
2740 opts[num][0] = "-mfpmath=";
2741 switch ((int) fpmath)
2742 {
2743 case FPMATH_387:
2744 opts[num++][1] = "387";
2745 break;
2746
2747 case FPMATH_SSE:
2748 opts[num++][1] = "sse";
2749 break;
2750
2751 case FPMATH_387 | FPMATH_SSE:
2752 opts[num++][1] = "sse+387";
2753 break;
2754
2755 default:
2756 gcc_unreachable ();
2757 }
2758 }
2759
2760 /* Any options? */
2761 if (num == 0)
2762 return NULL;
2763
2764 gcc_assert (num < ARRAY_SIZE (opts));
2765
2766 /* Size the string. */
2767 len = 0;
2768 sep_len = (add_nl_p) ? 3 : 1;
2769 for (i = 0; i < num; i++)
2770 {
2771 len += sep_len;
2772 for (j = 0; j < 2; j++)
2773 if (opts[i][j])
2774 len += strlen (opts[i][j]);
2775 }
2776
2777 /* Build the string. */
2778 ret = ptr = (char *) xmalloc (len);
2779 line_len = 0;
2780
2781 for (i = 0; i < num; i++)
2782 {
2783 size_t len2[2];
2784
2785 for (j = 0; j < 2; j++)
2786 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2787
2788 if (i != 0)
2789 {
2790 *ptr++ = ' ';
2791 line_len++;
2792
2793 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2794 {
2795 *ptr++ = '\\';
2796 *ptr++ = '\n';
2797 line_len = 0;
2798 }
2799 }
2800
2801 for (j = 0; j < 2; j++)
2802 if (opts[i][j])
2803 {
2804 memcpy (ptr, opts[i][j], len2[j]);
2805 ptr += len2[j];
2806 line_len += len2[j];
2807 }
2808 }
2809
2810 *ptr = '\0';
2811 gcc_assert (ret + len >= ptr);
2812
2813 return ret;
2814 }
2815
2816 /* Return true, if profiling code should be emitted before
2817 prologue. Otherwise it returns false.
2818 Note: For x86 with "hotfix" it is sorried. */
2819 static bool
2820 ix86_profile_before_prologue (void)
2821 {
2822 return flag_fentry != 0;
2823 }
2824
2825 /* Function that is callable from the debugger to print the current
2826 options. */
2827 void ATTRIBUTE_UNUSED
2828 ix86_debug_options (void)
2829 {
2830 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2831 ix86_arch_string, ix86_tune_string,
2832 ix86_fpmath, true);
2833
2834 if (opts)
2835 {
2836 fprintf (stderr, "%s\n\n", opts);
2837 free (opts);
2838 }
2839 else
2840 fputs ("<no options>\n\n", stderr);
2841
2842 return;
2843 }
2844
2845 static const char *stringop_alg_names[] = {
2846 #define DEF_ENUM
2847 #define DEF_ALG(alg, name) #name,
2848 #include "stringop.def"
2849 #undef DEF_ENUM
2850 #undef DEF_ALG
2851 };
2852
2853 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2854 The string is of the following form (or comma separated list of it):
2855
2856 strategy_alg:max_size:[align|noalign]
2857
2858 where the full size range for the strategy is either [0, max_size] or
2859 [min_size, max_size], in which min_size is the max_size + 1 of the
2860 preceding range. The last size range must have max_size == -1.
2861
2862 Examples:
2863
2864 1.
2865 -mmemcpy-strategy=libcall:-1:noalign
2866
2867 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2868
2869
2870 2.
2871 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2872
2873 This is to tell the compiler to use the following strategy for memset
2874 1) when the expected size is between [1, 16], use rep_8byte strategy;
2875 2) when the size is between [17, 2048], use vector_loop;
2876 3) when the size is > 2048, use libcall. */
2877
2878 struct stringop_size_range
2879 {
2880 int max;
2881 stringop_alg alg;
2882 bool noalign;
2883 };
2884
2885 static void
2886 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2887 {
2888 const struct stringop_algs *default_algs;
2889 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2890 char *curr_range_str, *next_range_str;
2891 int i = 0, n = 0;
2892
2893 if (is_memset)
2894 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2895 else
2896 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2897
2898 curr_range_str = strategy_str;
2899
2900 do
2901 {
2902 int maxs;
2903 char alg_name[128];
2904 char align[16];
2905 next_range_str = strchr (curr_range_str, ',');
2906 if (next_range_str)
2907 *next_range_str++ = '\0';
2908
2909 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2910 alg_name, &maxs, align))
2911 {
2912 error ("wrong arg %s to option %s", curr_range_str,
2913 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2914 return;
2915 }
2916
2917 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2918 {
2919 error ("size ranges of option %s should be increasing",
2920 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2921 return;
2922 }
2923
2924 for (i = 0; i < last_alg; i++)
2925 if (!strcmp (alg_name, stringop_alg_names[i]))
2926 break;
2927
2928 if (i == last_alg)
2929 {
2930 error ("wrong stringop strategy name %s specified for option %s",
2931 alg_name,
2932 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2933 return;
2934 }
2935
2936 input_ranges[n].max = maxs;
2937 input_ranges[n].alg = (stringop_alg) i;
2938 if (!strcmp (align, "align"))
2939 input_ranges[n].noalign = false;
2940 else if (!strcmp (align, "noalign"))
2941 input_ranges[n].noalign = true;
2942 else
2943 {
2944 error ("unknown alignment %s specified for option %s",
2945 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2946 return;
2947 }
2948 n++;
2949 curr_range_str = next_range_str;
2950 }
2951 while (curr_range_str);
2952
2953 if (input_ranges[n - 1].max != -1)
2954 {
2955 error ("the max value for the last size range should be -1"
2956 " for option %s",
2957 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2958 return;
2959 }
2960
2961 if (n > MAX_STRINGOP_ALGS)
2962 {
2963 error ("too many size ranges specified in option %s",
2964 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2965 return;
2966 }
2967
2968 /* Now override the default algs array. */
2969 for (i = 0; i < n; i++)
2970 {
2971 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
2972 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
2973 = input_ranges[i].alg;
2974 *const_cast<int *>(&default_algs->size[i].noalign)
2975 = input_ranges[i].noalign;
2976 }
2977 }
2978
2979 \f
2980 /* parse -mtune-ctrl= option. When DUMP is true,
2981 print the features that are explicitly set. */
2982
2983 static void
2984 parse_mtune_ctrl_str (bool dump)
2985 {
2986 if (!ix86_tune_ctrl_string)
2987 return;
2988
2989 char *next_feature_string = NULL;
2990 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
2991 char *orig = curr_feature_string;
2992 int i;
2993 do
2994 {
2995 bool clear = false;
2996
2997 next_feature_string = strchr (curr_feature_string, ',');
2998 if (next_feature_string)
2999 *next_feature_string++ = '\0';
3000 if (*curr_feature_string == '^')
3001 {
3002 curr_feature_string++;
3003 clear = true;
3004 }
3005 for (i = 0; i < X86_TUNE_LAST; i++)
3006 {
3007 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3008 {
3009 ix86_tune_features[i] = !clear;
3010 if (dump)
3011 fprintf (stderr, "Explicitly %s feature %s\n",
3012 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3013 break;
3014 }
3015 }
3016 if (i == X86_TUNE_LAST)
3017 error ("Unknown parameter to option -mtune-ctrl: %s",
3018 clear ? curr_feature_string - 1 : curr_feature_string);
3019 curr_feature_string = next_feature_string;
3020 }
3021 while (curr_feature_string);
3022 free (orig);
3023 }
3024
3025 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3026 processor type. */
3027
3028 static void
3029 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3030 {
3031 unsigned int ix86_tune_mask = 1u << ix86_tune;
3032 int i;
3033
3034 for (i = 0; i < X86_TUNE_LAST; ++i)
3035 {
3036 if (ix86_tune_no_default)
3037 ix86_tune_features[i] = 0;
3038 else
3039 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3040 }
3041
3042 if (dump)
3043 {
3044 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3045 for (i = 0; i < X86_TUNE_LAST; i++)
3046 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3047 ix86_tune_features[i] ? "on" : "off");
3048 }
3049
3050 parse_mtune_ctrl_str (dump);
3051 }
3052
3053
3054 /* Override various settings based on options. If MAIN_ARGS_P, the
3055 options are from the command line, otherwise they are from
3056 attributes. */
3057
3058 static void
3059 ix86_option_override_internal (bool main_args_p,
3060 struct gcc_options *opts,
3061 struct gcc_options *opts_set)
3062 {
3063 int i;
3064 unsigned int ix86_arch_mask;
3065 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3066 const char *prefix;
3067 const char *suffix;
3068 const char *sw;
3069
3070 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3071 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3072 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3073 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3074 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3075 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3076 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3077 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3078 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3079 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3080 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3081 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3082 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3083 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3084 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3085 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3086 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3087 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3088 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3089 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3090 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3091 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3092 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3093 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3094 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3095 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3096 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3097 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3098 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3099 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3100 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3101 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3102 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3103 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3104 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3105 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3106 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3107 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3108 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3109 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3110 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3111 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3112 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3113 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3114 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3115
3116 #define PTA_CORE2 \
3117 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3118 | PTA_CX16 | PTA_FXSR)
3119 #define PTA_NEHALEM \
3120 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3121 #define PTA_WESTMERE \
3122 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3123 #define PTA_SANDYBRIDGE \
3124 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3125 #define PTA_IVYBRIDGE \
3126 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3127 #define PTA_HASWELL \
3128 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3129 | PTA_FMA | PTA_MOVBE | PTA_RTM | PTA_HLE)
3130 #define PTA_BROADWELL \
3131 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3132 #define PTA_BONNELL \
3133 (PTA_CORE2 | PTA_MOVBE)
3134 #define PTA_SILVERMONT \
3135 (PTA_WESTMERE | PTA_MOVBE)
3136
3137 /* if this reaches 64, need to widen struct pta flags below */
3138
3139 static struct pta
3140 {
3141 const char *const name; /* processor name or nickname. */
3142 const enum processor_type processor;
3143 const enum attr_cpu schedule;
3144 const unsigned HOST_WIDE_INT flags;
3145 }
3146 const processor_alias_table[] =
3147 {
3148 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3149 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3150 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3151 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3152 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3153 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3154 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3155 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3156 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3157 PTA_MMX | PTA_SSE | PTA_FXSR},
3158 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3159 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3160 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3161 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3162 PTA_MMX | PTA_SSE | PTA_FXSR},
3163 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3164 PTA_MMX | PTA_SSE | PTA_FXSR},
3165 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3166 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3167 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3168 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3169 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3170 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3171 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3172 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3173 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3174 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3175 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3176 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3177 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3178 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3179 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3180 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3181 PTA_SANDYBRIDGE},
3182 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3183 PTA_SANDYBRIDGE},
3184 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3185 PTA_IVYBRIDGE},
3186 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3187 PTA_IVYBRIDGE},
3188 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3189 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3190 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3191 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3192 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3193 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3194 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3195 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3196 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3197 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3198 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3199 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3200 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3201 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3202 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3203 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3204 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3205 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3206 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3207 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3208 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3209 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3210 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3211 {"x86-64", PROCESSOR_K8, CPU_K8,
3212 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3213 {"k8", PROCESSOR_K8, CPU_K8,
3214 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3215 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3216 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3217 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3218 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3219 {"opteron", PROCESSOR_K8, CPU_K8,
3220 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3221 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3222 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3223 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3224 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3225 {"athlon64", PROCESSOR_K8, CPU_K8,
3226 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3227 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3228 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3229 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3230 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3231 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3232 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3233 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3234 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3235 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3236 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3237 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3238 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3239 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3240 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3241 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3242 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3243 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3244 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3245 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3246 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3247 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3248 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3249 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3250 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3251 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3252 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3253 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3254 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3255 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3256 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3257 | PTA_XSAVEOPT | PTA_FSGSBASE},
3258 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3259 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3260 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3261 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3262 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3263 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3264 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE},
3265 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3266 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3267 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3268 | PTA_FXSR | PTA_XSAVE},
3269 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3270 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3271 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3272 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3273 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3274 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3275
3276 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3277 PTA_64BIT
3278 | PTA_HLE /* flags are only used for -march switch. */ },
3279 };
3280
3281 /* -mrecip options. */
3282 static struct
3283 {
3284 const char *string; /* option name */
3285 unsigned int mask; /* mask bits to set */
3286 }
3287 const recip_options[] =
3288 {
3289 { "all", RECIP_MASK_ALL },
3290 { "none", RECIP_MASK_NONE },
3291 { "div", RECIP_MASK_DIV },
3292 { "sqrt", RECIP_MASK_SQRT },
3293 { "vec-div", RECIP_MASK_VEC_DIV },
3294 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3295 };
3296
3297 int const pta_size = ARRAY_SIZE (processor_alias_table);
3298
3299 /* Set up prefix/suffix so the error messages refer to either the command
3300 line argument, or the attribute(target). */
3301 if (main_args_p)
3302 {
3303 prefix = "-m";
3304 suffix = "";
3305 sw = "switch";
3306 }
3307 else
3308 {
3309 prefix = "option(\"";
3310 suffix = "\")";
3311 sw = "attribute";
3312 }
3313
3314 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3315 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3316 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3317 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3318 #ifdef TARGET_BI_ARCH
3319 else
3320 {
3321 #if TARGET_BI_ARCH == 1
3322 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3323 is on and OPTION_MASK_ABI_X32 is off. We turn off
3324 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3325 -mx32. */
3326 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3327 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3328 #else
3329 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3330 on and OPTION_MASK_ABI_64 is off. We turn off
3331 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3332 -m64. */
3333 if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3334 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3335 #endif
3336 }
3337 #endif
3338
3339 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3340 {
3341 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3342 OPTION_MASK_ABI_64 for TARGET_X32. */
3343 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3344 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3345 }
3346 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3347 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3348 | OPTION_MASK_ABI_X32
3349 | OPTION_MASK_ABI_64);
3350 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3351 {
3352 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3353 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3354 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3355 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3356 }
3357
3358 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3359 SUBTARGET_OVERRIDE_OPTIONS;
3360 #endif
3361
3362 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3363 SUBSUBTARGET_OVERRIDE_OPTIONS;
3364 #endif
3365
3366 /* -fPIC is the default for x86_64. */
3367 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3368 opts->x_flag_pic = 2;
3369
3370 /* Need to check -mtune=generic first. */
3371 if (opts->x_ix86_tune_string)
3372 {
3373 /* As special support for cross compilers we read -mtune=native
3374 as -mtune=generic. With native compilers we won't see the
3375 -mtune=native, as it was changed by the driver. */
3376 if (!strcmp (opts->x_ix86_tune_string, "native"))
3377 {
3378 opts->x_ix86_tune_string = "generic";
3379 }
3380 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3381 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3382 "%stune=k8%s or %stune=generic%s instead as appropriate",
3383 prefix, suffix, prefix, suffix, prefix, suffix);
3384 }
3385 else
3386 {
3387 if (opts->x_ix86_arch_string)
3388 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3389 if (!opts->x_ix86_tune_string)
3390 {
3391 opts->x_ix86_tune_string
3392 = processor_target_table[TARGET_CPU_DEFAULT].name;
3393 ix86_tune_defaulted = 1;
3394 }
3395
3396 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3397 or defaulted. We need to use a sensible tune option. */
3398 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3399 {
3400 opts->x_ix86_tune_string = "generic";
3401 }
3402 }
3403
3404 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3405 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3406 {
3407 /* rep; movq isn't available in 32-bit code. */
3408 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3409 opts->x_ix86_stringop_alg = no_stringop;
3410 }
3411
3412 if (!opts->x_ix86_arch_string)
3413 opts->x_ix86_arch_string
3414 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3415 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3416 else
3417 ix86_arch_specified = 1;
3418
3419 if (opts_set->x_ix86_pmode)
3420 {
3421 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3422 && opts->x_ix86_pmode == PMODE_SI)
3423 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3424 && opts->x_ix86_pmode == PMODE_DI))
3425 error ("address mode %qs not supported in the %s bit mode",
3426 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3427 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3428 }
3429 else
3430 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3431 ? PMODE_DI : PMODE_SI;
3432
3433 if (!opts_set->x_ix86_abi)
3434 opts->x_ix86_abi = DEFAULT_ABI;
3435
3436 /* For targets using ms ABI enable ms-extensions, if not
3437 explicit turned off. For non-ms ABI we turn off this
3438 option. */
3439 if (!opts_set->x_flag_ms_extensions)
3440 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3441
3442 if (opts_set->x_ix86_cmodel)
3443 {
3444 switch (opts->x_ix86_cmodel)
3445 {
3446 case CM_SMALL:
3447 case CM_SMALL_PIC:
3448 if (opts->x_flag_pic)
3449 opts->x_ix86_cmodel = CM_SMALL_PIC;
3450 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3451 error ("code model %qs not supported in the %s bit mode",
3452 "small", "32");
3453 break;
3454
3455 case CM_MEDIUM:
3456 case CM_MEDIUM_PIC:
3457 if (opts->x_flag_pic)
3458 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3459 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3460 error ("code model %qs not supported in the %s bit mode",
3461 "medium", "32");
3462 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3463 error ("code model %qs not supported in x32 mode",
3464 "medium");
3465 break;
3466
3467 case CM_LARGE:
3468 case CM_LARGE_PIC:
3469 if (opts->x_flag_pic)
3470 opts->x_ix86_cmodel = CM_LARGE_PIC;
3471 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3472 error ("code model %qs not supported in the %s bit mode",
3473 "large", "32");
3474 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3475 error ("code model %qs not supported in x32 mode",
3476 "large");
3477 break;
3478
3479 case CM_32:
3480 if (opts->x_flag_pic)
3481 error ("code model %s does not support PIC mode", "32");
3482 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3483 error ("code model %qs not supported in the %s bit mode",
3484 "32", "64");
3485 break;
3486
3487 case CM_KERNEL:
3488 if (opts->x_flag_pic)
3489 {
3490 error ("code model %s does not support PIC mode", "kernel");
3491 opts->x_ix86_cmodel = CM_32;
3492 }
3493 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3494 error ("code model %qs not supported in the %s bit mode",
3495 "kernel", "32");
3496 break;
3497
3498 default:
3499 gcc_unreachable ();
3500 }
3501 }
3502 else
3503 {
3504 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3505 use of rip-relative addressing. This eliminates fixups that
3506 would otherwise be needed if this object is to be placed in a
3507 DLL, and is essentially just as efficient as direct addressing. */
3508 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3509 && (TARGET_RDOS || TARGET_PECOFF))
3510 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3511 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3512 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3513 else
3514 opts->x_ix86_cmodel = CM_32;
3515 }
3516 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3517 {
3518 error ("-masm=intel not supported in this configuration");
3519 opts->x_ix86_asm_dialect = ASM_ATT;
3520 }
3521 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3522 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3523 sorry ("%i-bit mode not compiled in",
3524 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3525
3526 for (i = 0; i < pta_size; i++)
3527 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3528 {
3529 ix86_schedule = processor_alias_table[i].schedule;
3530 ix86_arch = processor_alias_table[i].processor;
3531 /* Default cpu tuning to the architecture. */
3532 ix86_tune = ix86_arch;
3533
3534 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3535 && !(processor_alias_table[i].flags & PTA_64BIT))
3536 error ("CPU you selected does not support x86-64 "
3537 "instruction set");
3538
3539 if (processor_alias_table[i].flags & PTA_MMX
3540 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3541 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3542 if (processor_alias_table[i].flags & PTA_3DNOW
3543 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3544 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3545 if (processor_alias_table[i].flags & PTA_3DNOW_A
3546 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3547 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3548 if (processor_alias_table[i].flags & PTA_SSE
3549 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3550 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3551 if (processor_alias_table[i].flags & PTA_SSE2
3552 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3553 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3554 if (processor_alias_table[i].flags & PTA_SSE3
3555 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3556 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3557 if (processor_alias_table[i].flags & PTA_SSSE3
3558 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3559 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3560 if (processor_alias_table[i].flags & PTA_SSE4_1
3561 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3562 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3563 if (processor_alias_table[i].flags & PTA_SSE4_2
3564 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3565 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3566 if (processor_alias_table[i].flags & PTA_AVX
3567 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3568 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3569 if (processor_alias_table[i].flags & PTA_AVX2
3570 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3571 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3572 if (processor_alias_table[i].flags & PTA_FMA
3573 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3574 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3575 if (processor_alias_table[i].flags & PTA_SSE4A
3576 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3577 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3578 if (processor_alias_table[i].flags & PTA_FMA4
3579 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3580 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3581 if (processor_alias_table[i].flags & PTA_XOP
3582 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3583 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3584 if (processor_alias_table[i].flags & PTA_LWP
3585 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3586 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3587 if (processor_alias_table[i].flags & PTA_ABM
3588 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3589 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3590 if (processor_alias_table[i].flags & PTA_BMI
3591 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3592 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3593 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3594 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3595 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3596 if (processor_alias_table[i].flags & PTA_TBM
3597 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3598 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3599 if (processor_alias_table[i].flags & PTA_BMI2
3600 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3601 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3602 if (processor_alias_table[i].flags & PTA_CX16
3603 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3604 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3605 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3606 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3607 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3608 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3609 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3610 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3611 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3612 if (processor_alias_table[i].flags & PTA_MOVBE
3613 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3614 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3615 if (processor_alias_table[i].flags & PTA_AES
3616 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3617 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3618 if (processor_alias_table[i].flags & PTA_SHA
3619 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3620 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3621 if (processor_alias_table[i].flags & PTA_PCLMUL
3622 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3623 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3624 if (processor_alias_table[i].flags & PTA_FSGSBASE
3625 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3626 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3627 if (processor_alias_table[i].flags & PTA_RDRND
3628 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3629 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3630 if (processor_alias_table[i].flags & PTA_F16C
3631 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3632 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3633 if (processor_alias_table[i].flags & PTA_RTM
3634 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3635 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3636 if (processor_alias_table[i].flags & PTA_HLE
3637 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3638 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3639 if (processor_alias_table[i].flags & PTA_PRFCHW
3640 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3641 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3642 if (processor_alias_table[i].flags & PTA_RDSEED
3643 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3644 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3645 if (processor_alias_table[i].flags & PTA_ADX
3646 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3647 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3648 if (processor_alias_table[i].flags & PTA_FXSR
3649 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3650 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3651 if (processor_alias_table[i].flags & PTA_XSAVE
3652 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3653 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3654 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3655 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3656 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3657 if (processor_alias_table[i].flags & PTA_AVX512F
3658 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3659 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3660 if (processor_alias_table[i].flags & PTA_AVX512ER
3661 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3662 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3663 if (processor_alias_table[i].flags & PTA_AVX512PF
3664 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3665 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3666 if (processor_alias_table[i].flags & PTA_AVX512CD
3667 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3668 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3669 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3670 x86_prefetch_sse = true;
3671
3672 break;
3673 }
3674
3675 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3676 error ("generic CPU can be used only for %stune=%s %s",
3677 prefix, suffix, sw);
3678 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3679 error ("intel CPU can be used only for %stune=%s %s",
3680 prefix, suffix, sw);
3681 else if (i == pta_size)
3682 error ("bad value (%s) for %sarch=%s %s",
3683 opts->x_ix86_arch_string, prefix, suffix, sw);
3684
3685 ix86_arch_mask = 1u << ix86_arch;
3686 for (i = 0; i < X86_ARCH_LAST; ++i)
3687 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3688
3689 for (i = 0; i < pta_size; i++)
3690 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3691 {
3692 ix86_schedule = processor_alias_table[i].schedule;
3693 ix86_tune = processor_alias_table[i].processor;
3694 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3695 {
3696 if (!(processor_alias_table[i].flags & PTA_64BIT))
3697 {
3698 if (ix86_tune_defaulted)
3699 {
3700 opts->x_ix86_tune_string = "x86-64";
3701 for (i = 0; i < pta_size; i++)
3702 if (! strcmp (opts->x_ix86_tune_string,
3703 processor_alias_table[i].name))
3704 break;
3705 ix86_schedule = processor_alias_table[i].schedule;
3706 ix86_tune = processor_alias_table[i].processor;
3707 }
3708 else
3709 error ("CPU you selected does not support x86-64 "
3710 "instruction set");
3711 }
3712 }
3713 /* Intel CPUs have always interpreted SSE prefetch instructions as
3714 NOPs; so, we can enable SSE prefetch instructions even when
3715 -mtune (rather than -march) points us to a processor that has them.
3716 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3717 higher processors. */
3718 if (TARGET_CMOV
3719 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3720 x86_prefetch_sse = true;
3721 break;
3722 }
3723
3724 if (ix86_tune_specified && i == pta_size)
3725 error ("bad value (%s) for %stune=%s %s",
3726 opts->x_ix86_tune_string, prefix, suffix, sw);
3727
3728 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3729
3730 #ifndef USE_IX86_FRAME_POINTER
3731 #define USE_IX86_FRAME_POINTER 0
3732 #endif
3733
3734 #ifndef USE_X86_64_FRAME_POINTER
3735 #define USE_X86_64_FRAME_POINTER 0
3736 #endif
3737
3738 /* Set the default values for switches whose default depends on TARGET_64BIT
3739 in case they weren't overwritten by command line options. */
3740 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3741 {
3742 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3743 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3744 if (opts->x_flag_asynchronous_unwind_tables
3745 && !opts_set->x_flag_unwind_tables
3746 && TARGET_64BIT_MS_ABI)
3747 opts->x_flag_unwind_tables = 1;
3748 if (opts->x_flag_asynchronous_unwind_tables == 2)
3749 opts->x_flag_unwind_tables
3750 = opts->x_flag_asynchronous_unwind_tables = 1;
3751 if (opts->x_flag_pcc_struct_return == 2)
3752 opts->x_flag_pcc_struct_return = 0;
3753 }
3754 else
3755 {
3756 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3757 opts->x_flag_omit_frame_pointer
3758 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3759 if (opts->x_flag_asynchronous_unwind_tables == 2)
3760 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3761 if (opts->x_flag_pcc_struct_return == 2)
3762 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3763 }
3764
3765 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3766 if (opts->x_optimize_size)
3767 ix86_cost = &ix86_size_cost;
3768 else
3769 ix86_cost = ix86_tune_cost;
3770
3771 /* Arrange to set up i386_stack_locals for all functions. */
3772 init_machine_status = ix86_init_machine_status;
3773
3774 /* Validate -mregparm= value. */
3775 if (opts_set->x_ix86_regparm)
3776 {
3777 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3778 warning (0, "-mregparm is ignored in 64-bit mode");
3779 if (opts->x_ix86_regparm > REGPARM_MAX)
3780 {
3781 error ("-mregparm=%d is not between 0 and %d",
3782 opts->x_ix86_regparm, REGPARM_MAX);
3783 opts->x_ix86_regparm = 0;
3784 }
3785 }
3786 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3787 opts->x_ix86_regparm = REGPARM_MAX;
3788
3789 /* Default align_* from the processor table. */
3790 if (opts->x_align_loops == 0)
3791 {
3792 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3793 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3794 }
3795 if (opts->x_align_jumps == 0)
3796 {
3797 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3798 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3799 }
3800 if (opts->x_align_functions == 0)
3801 {
3802 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3803 }
3804
3805 /* Provide default for -mbranch-cost= value. */
3806 if (!opts_set->x_ix86_branch_cost)
3807 opts->x_ix86_branch_cost = ix86_cost->branch_cost;
3808
3809 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3810 {
3811 opts->x_target_flags
3812 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3813
3814 /* Enable by default the SSE and MMX builtins. Do allow the user to
3815 explicitly disable any of these. In particular, disabling SSE and
3816 MMX for kernel code is extremely useful. */
3817 if (!ix86_arch_specified)
3818 opts->x_ix86_isa_flags
3819 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3820 | TARGET_SUBTARGET64_ISA_DEFAULT)
3821 & ~opts->x_ix86_isa_flags_explicit);
3822
3823 if (TARGET_RTD_P (opts->x_target_flags))
3824 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3825 }
3826 else
3827 {
3828 opts->x_target_flags
3829 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3830
3831 if (!ix86_arch_specified)
3832 opts->x_ix86_isa_flags
3833 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3834
3835 /* i386 ABI does not specify red zone. It still makes sense to use it
3836 when programmer takes care to stack from being destroyed. */
3837 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3838 opts->x_target_flags |= MASK_NO_RED_ZONE;
3839 }
3840
3841 /* Keep nonleaf frame pointers. */
3842 if (opts->x_flag_omit_frame_pointer)
3843 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3844 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3845 opts->x_flag_omit_frame_pointer = 1;
3846
3847 /* If we're doing fast math, we don't care about comparison order
3848 wrt NaNs. This lets us use a shorter comparison sequence. */
3849 if (opts->x_flag_finite_math_only)
3850 opts->x_target_flags &= ~MASK_IEEE_FP;
3851
3852 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3853 since the insns won't need emulation. */
3854 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3855 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3856
3857 /* Likewise, if the target doesn't have a 387, or we've specified
3858 software floating point, don't use 387 inline intrinsics. */
3859 if (!TARGET_80387_P (opts->x_target_flags))
3860 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3861
3862 /* Turn on MMX builtins for -msse. */
3863 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3864 opts->x_ix86_isa_flags
3865 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
3866
3867 /* Enable SSE prefetch. */
3868 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
3869 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
3870 x86_prefetch_sse = true;
3871
3872 /* Enable prefetch{,w} instructions for -m3dnow. */
3873 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags))
3874 opts->x_ix86_isa_flags
3875 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
3876
3877 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3878 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
3879 || TARGET_ABM_P (opts->x_ix86_isa_flags))
3880 opts->x_ix86_isa_flags
3881 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
3882
3883 /* Enable lzcnt instruction for -mabm. */
3884 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
3885 opts->x_ix86_isa_flags
3886 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
3887
3888 /* Validate -mpreferred-stack-boundary= value or default it to
3889 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3890 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3891 if (opts_set->x_ix86_preferred_stack_boundary_arg)
3892 {
3893 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3894 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
3895 int max = (TARGET_SEH ? 4 : 12);
3896
3897 if (opts->x_ix86_preferred_stack_boundary_arg < min
3898 || opts->x_ix86_preferred_stack_boundary_arg > max)
3899 {
3900 if (min == max)
3901 error ("-mpreferred-stack-boundary is not supported "
3902 "for this target");
3903 else
3904 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3905 opts->x_ix86_preferred_stack_boundary_arg, min, max);
3906 }
3907 else
3908 ix86_preferred_stack_boundary
3909 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3910 }
3911
3912 /* Set the default value for -mstackrealign. */
3913 if (opts->x_ix86_force_align_arg_pointer == -1)
3914 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3915
3916 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3917
3918 /* Validate -mincoming-stack-boundary= value or default it to
3919 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3920 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3921 if (opts_set->x_ix86_incoming_stack_boundary_arg)
3922 {
3923 if (opts->x_ix86_incoming_stack_boundary_arg
3924 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
3925 || opts->x_ix86_incoming_stack_boundary_arg > 12)
3926 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3927 opts->x_ix86_incoming_stack_boundary_arg,
3928 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
3929 else
3930 {
3931 ix86_user_incoming_stack_boundary
3932 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3933 ix86_incoming_stack_boundary
3934 = ix86_user_incoming_stack_boundary;
3935 }
3936 }
3937
3938 /* Accept -msseregparm only if at least SSE support is enabled. */
3939 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
3940 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
3941 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3942
3943 if (opts_set->x_ix86_fpmath)
3944 {
3945 if (opts->x_ix86_fpmath & FPMATH_SSE)
3946 {
3947 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
3948 {
3949 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3950 opts->x_ix86_fpmath = FPMATH_387;
3951 }
3952 else if ((opts->x_ix86_fpmath & FPMATH_387)
3953 && !TARGET_80387_P (opts->x_target_flags))
3954 {
3955 warning (0, "387 instruction set disabled, using SSE arithmetics");
3956 opts->x_ix86_fpmath = FPMATH_SSE;
3957 }
3958 }
3959 }
3960 /* For all chips supporting SSE2, -mfpmath=sse performs better than
3961 fpmath=387. The second is however default at many targets since the
3962 extra 80bit precision of temporaries is considered to be part of ABI.
3963 Overwrite the default at least for -ffast-math.
3964 TODO: -mfpmath=both seems to produce same performing code with bit
3965 smaller binaries. It is however not clear if register allocation is
3966 ready for this setting.
3967 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
3968 codegen. We may switch to 387 with -ffast-math for size optimized
3969 functions. */
3970 else if (fast_math_flags_set_p (&global_options)
3971 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
3972 opts->x_ix86_fpmath = FPMATH_SSE;
3973 else
3974 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
3975
3976 /* If the i387 is disabled, then do not return values in it. */
3977 if (!TARGET_80387_P (opts->x_target_flags))
3978 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
3979
3980 /* Use external vectorized library in vectorizing intrinsics. */
3981 if (opts_set->x_ix86_veclibabi_type)
3982 switch (opts->x_ix86_veclibabi_type)
3983 {
3984 case ix86_veclibabi_type_svml:
3985 ix86_veclib_handler = ix86_veclibabi_svml;
3986 break;
3987
3988 case ix86_veclibabi_type_acml:
3989 ix86_veclib_handler = ix86_veclibabi_acml;
3990 break;
3991
3992 default:
3993 gcc_unreachable ();
3994 }
3995
3996 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
3997 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
3998 && !opts->x_optimize_size)
3999 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4000
4001 /* If stack probes are required, the space used for large function
4002 arguments on the stack must also be probed, so enable
4003 -maccumulate-outgoing-args so this happens in the prologue. */
4004 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4005 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4006 {
4007 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4008 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4009 "for correctness", prefix, suffix);
4010 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4011 }
4012
4013 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4014 {
4015 char *p;
4016 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4017 p = strchr (internal_label_prefix, 'X');
4018 internal_label_prefix_len = p - internal_label_prefix;
4019 *p = '\0';
4020 }
4021
4022 /* When scheduling description is not available, disable scheduler pass
4023 so it won't slow down the compilation and make x87 code slower. */
4024 if (!TARGET_SCHEDULE)
4025 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4026
4027 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4028 ix86_tune_cost->simultaneous_prefetches,
4029 opts->x_param_values,
4030 opts_set->x_param_values);
4031 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4032 ix86_tune_cost->prefetch_block,
4033 opts->x_param_values,
4034 opts_set->x_param_values);
4035 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4036 ix86_tune_cost->l1_cache_size,
4037 opts->x_param_values,
4038 opts_set->x_param_values);
4039 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4040 ix86_tune_cost->l2_cache_size,
4041 opts->x_param_values,
4042 opts_set->x_param_values);
4043
4044 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4045 if (opts->x_flag_prefetch_loop_arrays < 0
4046 && HAVE_prefetch
4047 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4048 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4049 opts->x_flag_prefetch_loop_arrays = 1;
4050
4051 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4052 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4053 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4054 targetm.expand_builtin_va_start = NULL;
4055
4056 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4057 {
4058 ix86_gen_leave = gen_leave_rex64;
4059 if (Pmode == DImode)
4060 {
4061 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4062 ix86_gen_tls_local_dynamic_base_64
4063 = gen_tls_local_dynamic_base_64_di;
4064 }
4065 else
4066 {
4067 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4068 ix86_gen_tls_local_dynamic_base_64
4069 = gen_tls_local_dynamic_base_64_si;
4070 }
4071 }
4072 else
4073 ix86_gen_leave = gen_leave;
4074
4075 if (Pmode == DImode)
4076 {
4077 ix86_gen_add3 = gen_adddi3;
4078 ix86_gen_sub3 = gen_subdi3;
4079 ix86_gen_sub3_carry = gen_subdi3_carry;
4080 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4081 ix86_gen_andsp = gen_anddi3;
4082 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4083 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4084 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4085 ix86_gen_monitor = gen_sse3_monitor_di;
4086 }
4087 else
4088 {
4089 ix86_gen_add3 = gen_addsi3;
4090 ix86_gen_sub3 = gen_subsi3;
4091 ix86_gen_sub3_carry = gen_subsi3_carry;
4092 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4093 ix86_gen_andsp = gen_andsi3;
4094 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4095 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4096 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4097 ix86_gen_monitor = gen_sse3_monitor_si;
4098 }
4099
4100 #ifdef USE_IX86_CLD
4101 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4102 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4103 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4104 #endif
4105
4106 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4107 {
4108 if (opts->x_flag_fentry > 0)
4109 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4110 "with -fpic");
4111 opts->x_flag_fentry = 0;
4112 }
4113 else if (TARGET_SEH)
4114 {
4115 if (opts->x_flag_fentry == 0)
4116 sorry ("-mno-fentry isn%'t compatible with SEH");
4117 opts->x_flag_fentry = 1;
4118 }
4119 else if (opts->x_flag_fentry < 0)
4120 {
4121 #if defined(PROFILE_BEFORE_PROLOGUE)
4122 opts->x_flag_fentry = 1;
4123 #else
4124 opts->x_flag_fentry = 0;
4125 #endif
4126 }
4127
4128 /* When not opts->x_optimize for size, enable vzeroupper optimization for
4129 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4130 AVX unaligned load/store. */
4131 if (!opts->x_optimize_size)
4132 {
4133 if (flag_expensive_optimizations
4134 && !(opts_set->x_target_flags & MASK_VZEROUPPER))
4135 opts->x_target_flags |= MASK_VZEROUPPER;
4136 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4137 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4138 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4139 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4140 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4141 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4142 /* Enable 128-bit AVX instruction generation
4143 for the auto-vectorizer. */
4144 if (TARGET_AVX128_OPTIMAL
4145 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4146 opts->x_target_flags |= MASK_PREFER_AVX128;
4147 }
4148
4149 if (opts->x_ix86_recip_name)
4150 {
4151 char *p = ASTRDUP (opts->x_ix86_recip_name);
4152 char *q;
4153 unsigned int mask, i;
4154 bool invert;
4155
4156 while ((q = strtok (p, ",")) != NULL)
4157 {
4158 p = NULL;
4159 if (*q == '!')
4160 {
4161 invert = true;
4162 q++;
4163 }
4164 else
4165 invert = false;
4166
4167 if (!strcmp (q, "default"))
4168 mask = RECIP_MASK_ALL;
4169 else
4170 {
4171 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4172 if (!strcmp (q, recip_options[i].string))
4173 {
4174 mask = recip_options[i].mask;
4175 break;
4176 }
4177
4178 if (i == ARRAY_SIZE (recip_options))
4179 {
4180 error ("unknown option for -mrecip=%s", q);
4181 invert = false;
4182 mask = RECIP_MASK_NONE;
4183 }
4184 }
4185
4186 opts->x_recip_mask_explicit |= mask;
4187 if (invert)
4188 opts->x_recip_mask &= ~mask;
4189 else
4190 opts->x_recip_mask |= mask;
4191 }
4192 }
4193
4194 if (TARGET_RECIP_P (opts->x_target_flags))
4195 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4196 else if (opts_set->x_target_flags & MASK_RECIP)
4197 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4198
4199 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4200 for 64-bit Bionic. */
4201 if (TARGET_HAS_BIONIC
4202 && !(opts_set->x_target_flags
4203 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4204 opts->x_target_flags |= (TARGET_64BIT
4205 ? MASK_LONG_DOUBLE_128
4206 : MASK_LONG_DOUBLE_64);
4207
4208 /* Only one of them can be active. */
4209 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4210 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4211
4212 /* Save the initial options in case the user does function specific
4213 options. */
4214 if (main_args_p)
4215 target_option_default_node = target_option_current_node
4216 = build_target_option_node (opts);
4217
4218 /* Handle stack protector */
4219 if (!opts_set->x_ix86_stack_protector_guard)
4220 opts->x_ix86_stack_protector_guard
4221 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4222
4223 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4224 if (opts->x_ix86_tune_memcpy_strategy)
4225 {
4226 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4227 ix86_parse_stringop_strategy_string (str, false);
4228 free (str);
4229 }
4230
4231 if (opts->x_ix86_tune_memset_strategy)
4232 {
4233 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4234 ix86_parse_stringop_strategy_string (str, true);
4235 free (str);
4236 }
4237 }
4238
4239 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4240
4241 static void
4242 ix86_option_override (void)
4243 {
4244 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4245 static struct register_pass_info insert_vzeroupper_info
4246 = { pass_insert_vzeroupper, "reload",
4247 1, PASS_POS_INSERT_AFTER
4248 };
4249
4250 ix86_option_override_internal (true, &global_options, &global_options_set);
4251
4252
4253 /* This needs to be done at start up. It's convenient to do it here. */
4254 register_pass (&insert_vzeroupper_info);
4255 }
4256
4257 /* Update register usage after having seen the compiler flags. */
4258
4259 static void
4260 ix86_conditional_register_usage (void)
4261 {
4262 int i, c_mask;
4263 unsigned int j;
4264
4265 /* The PIC register, if it exists, is fixed. */
4266 j = PIC_OFFSET_TABLE_REGNUM;
4267 if (j != INVALID_REGNUM)
4268 fixed_regs[j] = call_used_regs[j] = 1;
4269
4270 /* For 32-bit targets, squash the REX registers. */
4271 if (! TARGET_64BIT)
4272 {
4273 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4274 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4275 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4276 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4277 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4278 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4279 }
4280
4281 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4282 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4283 : TARGET_64BIT ? (1 << 2)
4284 : (1 << 1));
4285
4286 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4287
4288 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4289 {
4290 /* Set/reset conditionally defined registers from
4291 CALL_USED_REGISTERS initializer. */
4292 if (call_used_regs[i] > 1)
4293 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4294
4295 /* Calculate registers of CLOBBERED_REGS register set
4296 as call used registers from GENERAL_REGS register set. */
4297 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4298 && call_used_regs[i])
4299 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4300 }
4301
4302 /* If MMX is disabled, squash the registers. */
4303 if (! TARGET_MMX)
4304 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4305 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4306 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4307
4308 /* If SSE is disabled, squash the registers. */
4309 if (! TARGET_SSE)
4310 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4311 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4312 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4313
4314 /* If the FPU is disabled, squash the registers. */
4315 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4316 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4317 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4318 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4319
4320 /* If AVX512F is disabled, squash the registers. */
4321 if (! TARGET_AVX512F)
4322 {
4323 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4324 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4325
4326 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4327 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4328 }
4329 }
4330
4331 \f
4332 /* Save the current options */
4333
4334 static void
4335 ix86_function_specific_save (struct cl_target_option *ptr,
4336 struct gcc_options *opts)
4337 {
4338 ptr->arch = ix86_arch;
4339 ptr->schedule = ix86_schedule;
4340 ptr->tune = ix86_tune;
4341 ptr->branch_cost = ix86_branch_cost;
4342 ptr->tune_defaulted = ix86_tune_defaulted;
4343 ptr->arch_specified = ix86_arch_specified;
4344 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4345 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4346 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4347 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4348 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4349 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4350 ptr->x_ix86_abi = opts->x_ix86_abi;
4351 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4352 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4353 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4354 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4355 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4356 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4357 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4358 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4359 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4360 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4361 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4362 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4363 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4364 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4365 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4366 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4367 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4368 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4369 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4370 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4371
4372 /* The fields are char but the variables are not; make sure the
4373 values fit in the fields. */
4374 gcc_assert (ptr->arch == ix86_arch);
4375 gcc_assert (ptr->schedule == ix86_schedule);
4376 gcc_assert (ptr->tune == ix86_tune);
4377 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4378 }
4379
4380 /* Restore the current options */
4381
4382 static void
4383 ix86_function_specific_restore (struct gcc_options *opts,
4384 struct cl_target_option *ptr)
4385 {
4386 enum processor_type old_tune = ix86_tune;
4387 enum processor_type old_arch = ix86_arch;
4388 unsigned int ix86_arch_mask;
4389 int i;
4390
4391 /* We don't change -fPIC. */
4392 opts->x_flag_pic = flag_pic;
4393
4394 ix86_arch = (enum processor_type) ptr->arch;
4395 ix86_schedule = (enum attr_cpu) ptr->schedule;
4396 ix86_tune = (enum processor_type) ptr->tune;
4397 opts->x_ix86_branch_cost = ptr->branch_cost;
4398 ix86_tune_defaulted = ptr->tune_defaulted;
4399 ix86_arch_specified = ptr->arch_specified;
4400 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4401 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4402 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4403 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4404 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4405 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4406 opts->x_ix86_abi = ptr->x_ix86_abi;
4407 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4408 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4409 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4410 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4411 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4412 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4413 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4414 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4415 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4416 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4417 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4418 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4419 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4420 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4421 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4422 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4423 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4424 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4425 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4426 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4427
4428 /* Recreate the arch feature tests if the arch changed */
4429 if (old_arch != ix86_arch)
4430 {
4431 ix86_arch_mask = 1u << ix86_arch;
4432 for (i = 0; i < X86_ARCH_LAST; ++i)
4433 ix86_arch_features[i]
4434 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4435 }
4436
4437 /* Recreate the tune optimization tests */
4438 if (old_tune != ix86_tune)
4439 set_ix86_tune_features (ix86_tune, false);
4440 }
4441
4442 /* Print the current options */
4443
4444 static void
4445 ix86_function_specific_print (FILE *file, int indent,
4446 struct cl_target_option *ptr)
4447 {
4448 char *target_string
4449 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4450 NULL, NULL, ptr->x_ix86_fpmath, false);
4451
4452 gcc_assert (ptr->arch < PROCESSOR_max);
4453 fprintf (file, "%*sarch = %d (%s)\n",
4454 indent, "",
4455 ptr->arch, processor_target_table[ptr->arch].name);
4456
4457 gcc_assert (ptr->tune < PROCESSOR_max);
4458 fprintf (file, "%*stune = %d (%s)\n",
4459 indent, "",
4460 ptr->tune, processor_target_table[ptr->tune].name);
4461
4462 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4463
4464 if (target_string)
4465 {
4466 fprintf (file, "%*s%s\n", indent, "", target_string);
4467 free (target_string);
4468 }
4469 }
4470
4471 \f
4472 /* Inner function to process the attribute((target(...))), take an argument and
4473 set the current options from the argument. If we have a list, recursively go
4474 over the list. */
4475
4476 static bool
4477 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4478 struct gcc_options *opts,
4479 struct gcc_options *opts_set,
4480 struct gcc_options *enum_opts_set)
4481 {
4482 char *next_optstr;
4483 bool ret = true;
4484
4485 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4486 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4487 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4488 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4489 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4490
4491 enum ix86_opt_type
4492 {
4493 ix86_opt_unknown,
4494 ix86_opt_yes,
4495 ix86_opt_no,
4496 ix86_opt_str,
4497 ix86_opt_enum,
4498 ix86_opt_isa
4499 };
4500
4501 static const struct
4502 {
4503 const char *string;
4504 size_t len;
4505 enum ix86_opt_type type;
4506 int opt;
4507 int mask;
4508 } attrs[] = {
4509 /* isa options */
4510 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4511 IX86_ATTR_ISA ("abm", OPT_mabm),
4512 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4513 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4514 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4515 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4516 IX86_ATTR_ISA ("aes", OPT_maes),
4517 IX86_ATTR_ISA ("sha", OPT_msha),
4518 IX86_ATTR_ISA ("avx", OPT_mavx),
4519 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4520 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4521 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4522 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4523 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4524 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4525 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4526 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4527 IX86_ATTR_ISA ("sse", OPT_msse),
4528 IX86_ATTR_ISA ("sse2", OPT_msse2),
4529 IX86_ATTR_ISA ("sse3", OPT_msse3),
4530 IX86_ATTR_ISA ("sse4", OPT_msse4),
4531 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4532 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4533 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4534 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4535 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4536 IX86_ATTR_ISA ("fma", OPT_mfma),
4537 IX86_ATTR_ISA ("xop", OPT_mxop),
4538 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4539 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4540 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4541 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4542 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4543 IX86_ATTR_ISA ("hle", OPT_mhle),
4544 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4545 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4546 IX86_ATTR_ISA ("adx", OPT_madx),
4547 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4548 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4549 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4550
4551 /* enum options */
4552 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4553
4554 /* string options */
4555 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4556 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4557
4558 /* flag options */
4559 IX86_ATTR_YES ("cld",
4560 OPT_mcld,
4561 MASK_CLD),
4562
4563 IX86_ATTR_NO ("fancy-math-387",
4564 OPT_mfancy_math_387,
4565 MASK_NO_FANCY_MATH_387),
4566
4567 IX86_ATTR_YES ("ieee-fp",
4568 OPT_mieee_fp,
4569 MASK_IEEE_FP),
4570
4571 IX86_ATTR_YES ("inline-all-stringops",
4572 OPT_minline_all_stringops,
4573 MASK_INLINE_ALL_STRINGOPS),
4574
4575 IX86_ATTR_YES ("inline-stringops-dynamically",
4576 OPT_minline_stringops_dynamically,
4577 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4578
4579 IX86_ATTR_NO ("align-stringops",
4580 OPT_mno_align_stringops,
4581 MASK_NO_ALIGN_STRINGOPS),
4582
4583 IX86_ATTR_YES ("recip",
4584 OPT_mrecip,
4585 MASK_RECIP),
4586
4587 };
4588
4589 /* If this is a list, recurse to get the options. */
4590 if (TREE_CODE (args) == TREE_LIST)
4591 {
4592 bool ret = true;
4593
4594 for (; args; args = TREE_CHAIN (args))
4595 if (TREE_VALUE (args)
4596 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4597 p_strings, opts, opts_set,
4598 enum_opts_set))
4599 ret = false;
4600
4601 return ret;
4602 }
4603
4604 else if (TREE_CODE (args) != STRING_CST)
4605 {
4606 error ("attribute %<target%> argument not a string");
4607 return false;
4608 }
4609
4610 /* Handle multiple arguments separated by commas. */
4611 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4612
4613 while (next_optstr && *next_optstr != '\0')
4614 {
4615 char *p = next_optstr;
4616 char *orig_p = p;
4617 char *comma = strchr (next_optstr, ',');
4618 const char *opt_string;
4619 size_t len, opt_len;
4620 int opt;
4621 bool opt_set_p;
4622 char ch;
4623 unsigned i;
4624 enum ix86_opt_type type = ix86_opt_unknown;
4625 int mask = 0;
4626
4627 if (comma)
4628 {
4629 *comma = '\0';
4630 len = comma - next_optstr;
4631 next_optstr = comma + 1;
4632 }
4633 else
4634 {
4635 len = strlen (p);
4636 next_optstr = NULL;
4637 }
4638
4639 /* Recognize no-xxx. */
4640 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4641 {
4642 opt_set_p = false;
4643 p += 3;
4644 len -= 3;
4645 }
4646 else
4647 opt_set_p = true;
4648
4649 /* Find the option. */
4650 ch = *p;
4651 opt = N_OPTS;
4652 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4653 {
4654 type = attrs[i].type;
4655 opt_len = attrs[i].len;
4656 if (ch == attrs[i].string[0]
4657 && ((type != ix86_opt_str && type != ix86_opt_enum)
4658 ? len == opt_len
4659 : len > opt_len)
4660 && memcmp (p, attrs[i].string, opt_len) == 0)
4661 {
4662 opt = attrs[i].opt;
4663 mask = attrs[i].mask;
4664 opt_string = attrs[i].string;
4665 break;
4666 }
4667 }
4668
4669 /* Process the option. */
4670 if (opt == N_OPTS)
4671 {
4672 error ("attribute(target(\"%s\")) is unknown", orig_p);
4673 ret = false;
4674 }
4675
4676 else if (type == ix86_opt_isa)
4677 {
4678 struct cl_decoded_option decoded;
4679
4680 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4681 ix86_handle_option (opts, opts_set,
4682 &decoded, input_location);
4683 }
4684
4685 else if (type == ix86_opt_yes || type == ix86_opt_no)
4686 {
4687 if (type == ix86_opt_no)
4688 opt_set_p = !opt_set_p;
4689
4690 if (opt_set_p)
4691 opts->x_target_flags |= mask;
4692 else
4693 opts->x_target_flags &= ~mask;
4694 }
4695
4696 else if (type == ix86_opt_str)
4697 {
4698 if (p_strings[opt])
4699 {
4700 error ("option(\"%s\") was already specified", opt_string);
4701 ret = false;
4702 }
4703 else
4704 p_strings[opt] = xstrdup (p + opt_len);
4705 }
4706
4707 else if (type == ix86_opt_enum)
4708 {
4709 bool arg_ok;
4710 int value;
4711
4712 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4713 if (arg_ok)
4714 set_option (opts, enum_opts_set, opt, value,
4715 p + opt_len, DK_UNSPECIFIED, input_location,
4716 global_dc);
4717 else
4718 {
4719 error ("attribute(target(\"%s\")) is unknown", orig_p);
4720 ret = false;
4721 }
4722 }
4723
4724 else
4725 gcc_unreachable ();
4726 }
4727
4728 return ret;
4729 }
4730
4731 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4732
4733 tree
4734 ix86_valid_target_attribute_tree (tree args,
4735 struct gcc_options *opts,
4736 struct gcc_options *opts_set)
4737 {
4738 const char *orig_arch_string = opts->x_ix86_arch_string;
4739 const char *orig_tune_string = opts->x_ix86_tune_string;
4740 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4741 int orig_tune_defaulted = ix86_tune_defaulted;
4742 int orig_arch_specified = ix86_arch_specified;
4743 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4744 tree t = NULL_TREE;
4745 int i;
4746 struct cl_target_option *def
4747 = TREE_TARGET_OPTION (target_option_default_node);
4748 struct gcc_options enum_opts_set;
4749
4750 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4751
4752 /* Process each of the options on the chain. */
4753 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4754 opts_set, &enum_opts_set))
4755 return error_mark_node;
4756
4757 /* If the changed options are different from the default, rerun
4758 ix86_option_override_internal, and then save the options away.
4759 The string options are are attribute options, and will be undone
4760 when we copy the save structure. */
4761 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4762 || opts->x_target_flags != def->x_target_flags
4763 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4764 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4765 || enum_opts_set.x_ix86_fpmath)
4766 {
4767 /* If we are using the default tune= or arch=, undo the string assigned,
4768 and use the default. */
4769 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4770 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4771 else if (!orig_arch_specified)
4772 opts->x_ix86_arch_string = NULL;
4773
4774 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4775 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4776 else if (orig_tune_defaulted)
4777 opts->x_ix86_tune_string = NULL;
4778
4779 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4780 if (enum_opts_set.x_ix86_fpmath)
4781 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4782 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4783 && TARGET_SSE_P (opts->x_ix86_isa_flags))
4784 {
4785 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4786 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4787 }
4788
4789 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4790 ix86_option_override_internal (false, opts, opts_set);
4791
4792 /* Add any builtin functions with the new isa if any. */
4793 ix86_add_new_builtins (opts->x_ix86_isa_flags);
4794
4795 /* Save the current options unless we are validating options for
4796 #pragma. */
4797 t = build_target_option_node (opts);
4798
4799 opts->x_ix86_arch_string = orig_arch_string;
4800 opts->x_ix86_tune_string = orig_tune_string;
4801 opts_set->x_ix86_fpmath = orig_fpmath_set;
4802
4803 /* Free up memory allocated to hold the strings */
4804 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4805 free (option_strings[i]);
4806 }
4807
4808 return t;
4809 }
4810
4811 /* Hook to validate attribute((target("string"))). */
4812
4813 static bool
4814 ix86_valid_target_attribute_p (tree fndecl,
4815 tree ARG_UNUSED (name),
4816 tree args,
4817 int ARG_UNUSED (flags))
4818 {
4819 struct gcc_options func_options;
4820 tree new_target, new_optimize;
4821 bool ret = true;
4822
4823 /* attribute((target("default"))) does nothing, beyond
4824 affecting multi-versioning. */
4825 if (TREE_VALUE (args)
4826 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
4827 && TREE_CHAIN (args) == NULL_TREE
4828 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
4829 return true;
4830
4831 tree old_optimize = build_optimization_node (&global_options);
4832
4833 /* Get the optimization options of the current function. */
4834 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4835
4836 if (!func_optimize)
4837 func_optimize = old_optimize;
4838
4839 /* Init func_options. */
4840 memset (&func_options, 0, sizeof (func_options));
4841 init_options_struct (&func_options, NULL);
4842 lang_hooks.init_options_struct (&func_options);
4843
4844 cl_optimization_restore (&func_options,
4845 TREE_OPTIMIZATION (func_optimize));
4846
4847 /* Initialize func_options to the default before its target options can
4848 be set. */
4849 cl_target_option_restore (&func_options,
4850 TREE_TARGET_OPTION (target_option_default_node));
4851
4852 new_target = ix86_valid_target_attribute_tree (args, &func_options,
4853 &global_options_set);
4854
4855 new_optimize = build_optimization_node (&func_options);
4856
4857 if (new_target == error_mark_node)
4858 ret = false;
4859
4860 else if (fndecl && new_target)
4861 {
4862 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4863
4864 if (old_optimize != new_optimize)
4865 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4866 }
4867
4868 return ret;
4869 }
4870
4871 \f
4872 /* Hook to determine if one function can safely inline another. */
4873
4874 static bool
4875 ix86_can_inline_p (tree caller, tree callee)
4876 {
4877 bool ret = false;
4878 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4879 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4880
4881 /* If callee has no option attributes, then it is ok to inline. */
4882 if (!callee_tree)
4883 ret = true;
4884
4885 /* If caller has no option attributes, but callee does then it is not ok to
4886 inline. */
4887 else if (!caller_tree)
4888 ret = false;
4889
4890 else
4891 {
4892 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4893 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4894
4895 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4896 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4897 function. */
4898 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4899 != callee_opts->x_ix86_isa_flags)
4900 ret = false;
4901
4902 /* See if we have the same non-isa options. */
4903 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4904 ret = false;
4905
4906 /* See if arch, tune, etc. are the same. */
4907 else if (caller_opts->arch != callee_opts->arch)
4908 ret = false;
4909
4910 else if (caller_opts->tune != callee_opts->tune)
4911 ret = false;
4912
4913 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4914 ret = false;
4915
4916 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4917 ret = false;
4918
4919 else
4920 ret = true;
4921 }
4922
4923 return ret;
4924 }
4925
4926 \f
4927 /* Remember the last target of ix86_set_current_function. */
4928 static GTY(()) tree ix86_previous_fndecl;
4929
4930 /* Invalidate ix86_previous_fndecl cache. */
4931 void
4932 ix86_reset_previous_fndecl (void)
4933 {
4934 ix86_previous_fndecl = NULL_TREE;
4935 }
4936
4937 /* Establish appropriate back-end context for processing the function
4938 FNDECL. The argument might be NULL to indicate processing at top
4939 level, outside of any function scope. */
4940 static void
4941 ix86_set_current_function (tree fndecl)
4942 {
4943 /* Only change the context if the function changes. This hook is called
4944 several times in the course of compiling a function, and we don't want to
4945 slow things down too much or call target_reinit when it isn't safe. */
4946 if (fndecl && fndecl != ix86_previous_fndecl)
4947 {
4948 tree old_tree = (ix86_previous_fndecl
4949 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4950 : NULL_TREE);
4951
4952 tree new_tree = (fndecl
4953 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4954 : NULL_TREE);
4955
4956 ix86_previous_fndecl = fndecl;
4957 if (old_tree == new_tree)
4958 ;
4959
4960 else if (new_tree)
4961 {
4962 cl_target_option_restore (&global_options,
4963 TREE_TARGET_OPTION (new_tree));
4964 if (TREE_TARGET_GLOBALS (new_tree))
4965 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
4966 else
4967 TREE_TARGET_GLOBALS (new_tree)
4968 = save_target_globals_default_opts ();
4969 }
4970
4971 else if (old_tree)
4972 {
4973 new_tree = target_option_current_node;
4974 cl_target_option_restore (&global_options,
4975 TREE_TARGET_OPTION (new_tree));
4976 if (TREE_TARGET_GLOBALS (new_tree))
4977 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
4978 else if (new_tree == target_option_default_node)
4979 restore_target_globals (&default_target_globals);
4980 else
4981 TREE_TARGET_GLOBALS (new_tree)
4982 = save_target_globals_default_opts ();
4983 }
4984 }
4985 }
4986
4987 \f
4988 /* Return true if this goes in large data/bss. */
4989
4990 static bool
4991 ix86_in_large_data_p (tree exp)
4992 {
4993 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4994 return false;
4995
4996 /* Functions are never large data. */
4997 if (TREE_CODE (exp) == FUNCTION_DECL)
4998 return false;
4999
5000 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5001 {
5002 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
5003 if (strcmp (section, ".ldata") == 0
5004 || strcmp (section, ".lbss") == 0)
5005 return true;
5006 return false;
5007 }
5008 else
5009 {
5010 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5011
5012 /* If this is an incomplete type with size 0, then we can't put it
5013 in data because it might be too big when completed. */
5014 if (!size || size > ix86_section_threshold)
5015 return true;
5016 }
5017
5018 return false;
5019 }
5020
5021 /* Switch to the appropriate section for output of DECL.
5022 DECL is either a `VAR_DECL' node or a constant of some sort.
5023 RELOC indicates whether forming the initial value of DECL requires
5024 link-time relocations. */
5025
5026 ATTRIBUTE_UNUSED static section *
5027 x86_64_elf_select_section (tree decl, int reloc,
5028 unsigned HOST_WIDE_INT align)
5029 {
5030 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5031 && ix86_in_large_data_p (decl))
5032 {
5033 const char *sname = NULL;
5034 unsigned int flags = SECTION_WRITE;
5035 switch (categorize_decl_for_section (decl, reloc))
5036 {
5037 case SECCAT_DATA:
5038 sname = ".ldata";
5039 break;
5040 case SECCAT_DATA_REL:
5041 sname = ".ldata.rel";
5042 break;
5043 case SECCAT_DATA_REL_LOCAL:
5044 sname = ".ldata.rel.local";
5045 break;
5046 case SECCAT_DATA_REL_RO:
5047 sname = ".ldata.rel.ro";
5048 break;
5049 case SECCAT_DATA_REL_RO_LOCAL:
5050 sname = ".ldata.rel.ro.local";
5051 break;
5052 case SECCAT_BSS:
5053 sname = ".lbss";
5054 flags |= SECTION_BSS;
5055 break;
5056 case SECCAT_RODATA:
5057 case SECCAT_RODATA_MERGE_STR:
5058 case SECCAT_RODATA_MERGE_STR_INIT:
5059 case SECCAT_RODATA_MERGE_CONST:
5060 sname = ".lrodata";
5061 flags = 0;
5062 break;
5063 case SECCAT_SRODATA:
5064 case SECCAT_SDATA:
5065 case SECCAT_SBSS:
5066 gcc_unreachable ();
5067 case SECCAT_TEXT:
5068 case SECCAT_TDATA:
5069 case SECCAT_TBSS:
5070 /* We don't split these for medium model. Place them into
5071 default sections and hope for best. */
5072 break;
5073 }
5074 if (sname)
5075 {
5076 /* We might get called with string constants, but get_named_section
5077 doesn't like them as they are not DECLs. Also, we need to set
5078 flags in that case. */
5079 if (!DECL_P (decl))
5080 return get_section (sname, flags, NULL);
5081 return get_named_section (decl, sname, reloc);
5082 }
5083 }
5084 return default_elf_select_section (decl, reloc, align);
5085 }
5086
5087 /* Select a set of attributes for section NAME based on the properties
5088 of DECL and whether or not RELOC indicates that DECL's initializer
5089 might contain runtime relocations. */
5090
5091 static unsigned int ATTRIBUTE_UNUSED
5092 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5093 {
5094 unsigned int flags = default_section_type_flags (decl, name, reloc);
5095
5096 if (decl == NULL_TREE
5097 && (strcmp (name, ".ldata.rel.ro") == 0
5098 || strcmp (name, ".ldata.rel.ro.local") == 0))
5099 flags |= SECTION_RELRO;
5100
5101 if (strcmp (name, ".lbss") == 0
5102 || strncmp (name, ".lbss.", 5) == 0
5103 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5104 flags |= SECTION_BSS;
5105
5106 return flags;
5107 }
5108
5109 /* Build up a unique section name, expressed as a
5110 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5111 RELOC indicates whether the initial value of EXP requires
5112 link-time relocations. */
5113
5114 static void ATTRIBUTE_UNUSED
5115 x86_64_elf_unique_section (tree decl, int reloc)
5116 {
5117 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5118 && ix86_in_large_data_p (decl))
5119 {
5120 const char *prefix = NULL;
5121 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5122 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
5123
5124 switch (categorize_decl_for_section (decl, reloc))
5125 {
5126 case SECCAT_DATA:
5127 case SECCAT_DATA_REL:
5128 case SECCAT_DATA_REL_LOCAL:
5129 case SECCAT_DATA_REL_RO:
5130 case SECCAT_DATA_REL_RO_LOCAL:
5131 prefix = one_only ? ".ld" : ".ldata";
5132 break;
5133 case SECCAT_BSS:
5134 prefix = one_only ? ".lb" : ".lbss";
5135 break;
5136 case SECCAT_RODATA:
5137 case SECCAT_RODATA_MERGE_STR:
5138 case SECCAT_RODATA_MERGE_STR_INIT:
5139 case SECCAT_RODATA_MERGE_CONST:
5140 prefix = one_only ? ".lr" : ".lrodata";
5141 break;
5142 case SECCAT_SRODATA:
5143 case SECCAT_SDATA:
5144 case SECCAT_SBSS:
5145 gcc_unreachable ();
5146 case SECCAT_TEXT:
5147 case SECCAT_TDATA:
5148 case SECCAT_TBSS:
5149 /* We don't split these for medium model. Place them into
5150 default sections and hope for best. */
5151 break;
5152 }
5153 if (prefix)
5154 {
5155 const char *name, *linkonce;
5156 char *string;
5157
5158 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5159 name = targetm.strip_name_encoding (name);
5160
5161 /* If we're using one_only, then there needs to be a .gnu.linkonce
5162 prefix to the section name. */
5163 linkonce = one_only ? ".gnu.linkonce" : "";
5164
5165 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5166
5167 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
5168 return;
5169 }
5170 }
5171 default_unique_section (decl, reloc);
5172 }
5173
5174 #ifdef COMMON_ASM_OP
5175 /* This says how to output assembler code to declare an
5176 uninitialized external linkage data object.
5177
5178 For medium model x86-64 we need to use .largecomm opcode for
5179 large objects. */
5180 void
5181 x86_elf_aligned_common (FILE *file,
5182 const char *name, unsigned HOST_WIDE_INT size,
5183 int align)
5184 {
5185 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5186 && size > (unsigned int)ix86_section_threshold)
5187 fputs (".largecomm\t", file);
5188 else
5189 fputs (COMMON_ASM_OP, file);
5190 assemble_name (file, name);
5191 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5192 size, align / BITS_PER_UNIT);
5193 }
5194 #endif
5195
5196 /* Utility function for targets to use in implementing
5197 ASM_OUTPUT_ALIGNED_BSS. */
5198
5199 void
5200 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
5201 const char *name, unsigned HOST_WIDE_INT size,
5202 int align)
5203 {
5204 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5205 && size > (unsigned int)ix86_section_threshold)
5206 switch_to_section (get_named_section (decl, ".lbss", 0));
5207 else
5208 switch_to_section (bss_section);
5209 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5210 #ifdef ASM_DECLARE_OBJECT_NAME
5211 last_assemble_variable_decl = decl;
5212 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5213 #else
5214 /* Standard thing is just output label for the object. */
5215 ASM_OUTPUT_LABEL (file, name);
5216 #endif /* ASM_DECLARE_OBJECT_NAME */
5217 ASM_OUTPUT_SKIP (file, size ? size : 1);
5218 }
5219 \f
5220 /* Decide whether we must probe the stack before any space allocation
5221 on this target. It's essentially TARGET_STACK_PROBE except when
5222 -fstack-check causes the stack to be already probed differently. */
5223
5224 bool
5225 ix86_target_stack_probe (void)
5226 {
5227 /* Do not probe the stack twice if static stack checking is enabled. */
5228 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5229 return false;
5230
5231 return TARGET_STACK_PROBE;
5232 }
5233 \f
5234 /* Decide whether we can make a sibling call to a function. DECL is the
5235 declaration of the function being targeted by the call and EXP is the
5236 CALL_EXPR representing the call. */
5237
5238 static bool
5239 ix86_function_ok_for_sibcall (tree decl, tree exp)
5240 {
5241 tree type, decl_or_type;
5242 rtx a, b;
5243
5244 /* If we are generating position-independent code, we cannot sibcall
5245 optimize any indirect call, or a direct call to a global function,
5246 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5247 if (!TARGET_MACHO
5248 && !TARGET_64BIT
5249 && flag_pic
5250 && (!decl || !targetm.binds_local_p (decl)))
5251 return false;
5252
5253 /* If we need to align the outgoing stack, then sibcalling would
5254 unalign the stack, which may break the called function. */
5255 if (ix86_minimum_incoming_stack_boundary (true)
5256 < PREFERRED_STACK_BOUNDARY)
5257 return false;
5258
5259 if (decl)
5260 {
5261 decl_or_type = decl;
5262 type = TREE_TYPE (decl);
5263 }
5264 else
5265 {
5266 /* We're looking at the CALL_EXPR, we need the type of the function. */
5267 type = CALL_EXPR_FN (exp); /* pointer expression */
5268 type = TREE_TYPE (type); /* pointer type */
5269 type = TREE_TYPE (type); /* function type */
5270 decl_or_type = type;
5271 }
5272
5273 /* Check that the return value locations are the same. Like
5274 if we are returning floats on the 80387 register stack, we cannot
5275 make a sibcall from a function that doesn't return a float to a
5276 function that does or, conversely, from a function that does return
5277 a float to a function that doesn't; the necessary stack adjustment
5278 would not be executed. This is also the place we notice
5279 differences in the return value ABI. Note that it is ok for one
5280 of the functions to have void return type as long as the return
5281 value of the other is passed in a register. */
5282 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5283 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5284 cfun->decl, false);
5285 if (STACK_REG_P (a) || STACK_REG_P (b))
5286 {
5287 if (!rtx_equal_p (a, b))
5288 return false;
5289 }
5290 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5291 ;
5292 else if (!rtx_equal_p (a, b))
5293 return false;
5294
5295 if (TARGET_64BIT)
5296 {
5297 /* The SYSV ABI has more call-clobbered registers;
5298 disallow sibcalls from MS to SYSV. */
5299 if (cfun->machine->call_abi == MS_ABI
5300 && ix86_function_type_abi (type) == SYSV_ABI)
5301 return false;
5302 }
5303 else
5304 {
5305 /* If this call is indirect, we'll need to be able to use a
5306 call-clobbered register for the address of the target function.
5307 Make sure that all such registers are not used for passing
5308 parameters. Note that DLLIMPORT functions are indirect. */
5309 if (!decl
5310 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5311 {
5312 if (ix86_function_regparm (type, NULL) >= 3)
5313 {
5314 /* ??? Need to count the actual number of registers to be used,
5315 not the possible number of registers. Fix later. */
5316 return false;
5317 }
5318 }
5319 }
5320
5321 /* Otherwise okay. That also includes certain types of indirect calls. */
5322 return true;
5323 }
5324
5325 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5326 and "sseregparm" calling convention attributes;
5327 arguments as in struct attribute_spec.handler. */
5328
5329 static tree
5330 ix86_handle_cconv_attribute (tree *node, tree name,
5331 tree args,
5332 int flags ATTRIBUTE_UNUSED,
5333 bool *no_add_attrs)
5334 {
5335 if (TREE_CODE (*node) != FUNCTION_TYPE
5336 && TREE_CODE (*node) != METHOD_TYPE
5337 && TREE_CODE (*node) != FIELD_DECL
5338 && TREE_CODE (*node) != TYPE_DECL)
5339 {
5340 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5341 name);
5342 *no_add_attrs = true;
5343 return NULL_TREE;
5344 }
5345
5346 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5347 if (is_attribute_p ("regparm", name))
5348 {
5349 tree cst;
5350
5351 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5352 {
5353 error ("fastcall and regparm attributes are not compatible");
5354 }
5355
5356 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5357 {
5358 error ("regparam and thiscall attributes are not compatible");
5359 }
5360
5361 cst = TREE_VALUE (args);
5362 if (TREE_CODE (cst) != INTEGER_CST)
5363 {
5364 warning (OPT_Wattributes,
5365 "%qE attribute requires an integer constant argument",
5366 name);
5367 *no_add_attrs = true;
5368 }
5369 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5370 {
5371 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5372 name, REGPARM_MAX);
5373 *no_add_attrs = true;
5374 }
5375
5376 return NULL_TREE;
5377 }
5378
5379 if (TARGET_64BIT)
5380 {
5381 /* Do not warn when emulating the MS ABI. */
5382 if ((TREE_CODE (*node) != FUNCTION_TYPE
5383 && TREE_CODE (*node) != METHOD_TYPE)
5384 || ix86_function_type_abi (*node) != MS_ABI)
5385 warning (OPT_Wattributes, "%qE attribute ignored",
5386 name);
5387 *no_add_attrs = true;
5388 return NULL_TREE;
5389 }
5390
5391 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5392 if (is_attribute_p ("fastcall", name))
5393 {
5394 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5395 {
5396 error ("fastcall and cdecl attributes are not compatible");
5397 }
5398 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5399 {
5400 error ("fastcall and stdcall attributes are not compatible");
5401 }
5402 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5403 {
5404 error ("fastcall and regparm attributes are not compatible");
5405 }
5406 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5407 {
5408 error ("fastcall and thiscall attributes are not compatible");
5409 }
5410 }
5411
5412 /* Can combine stdcall with fastcall (redundant), regparm and
5413 sseregparm. */
5414 else if (is_attribute_p ("stdcall", name))
5415 {
5416 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5417 {
5418 error ("stdcall and cdecl attributes are not compatible");
5419 }
5420 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5421 {
5422 error ("stdcall and fastcall attributes are not compatible");
5423 }
5424 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5425 {
5426 error ("stdcall and thiscall attributes are not compatible");
5427 }
5428 }
5429
5430 /* Can combine cdecl with regparm and sseregparm. */
5431 else if (is_attribute_p ("cdecl", name))
5432 {
5433 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5434 {
5435 error ("stdcall and cdecl attributes are not compatible");
5436 }
5437 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5438 {
5439 error ("fastcall and cdecl attributes are not compatible");
5440 }
5441 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5442 {
5443 error ("cdecl and thiscall attributes are not compatible");
5444 }
5445 }
5446 else if (is_attribute_p ("thiscall", name))
5447 {
5448 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5449 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5450 name);
5451 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5452 {
5453 error ("stdcall and thiscall attributes are not compatible");
5454 }
5455 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5456 {
5457 error ("fastcall and thiscall attributes are not compatible");
5458 }
5459 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5460 {
5461 error ("cdecl and thiscall attributes are not compatible");
5462 }
5463 }
5464
5465 /* Can combine sseregparm with all attributes. */
5466
5467 return NULL_TREE;
5468 }
5469
5470 /* The transactional memory builtins are implicitly regparm or fastcall
5471 depending on the ABI. Override the generic do-nothing attribute that
5472 these builtins were declared with, and replace it with one of the two
5473 attributes that we expect elsewhere. */
5474
5475 static tree
5476 ix86_handle_tm_regparm_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
5477 tree args ATTRIBUTE_UNUSED,
5478 int flags, bool *no_add_attrs)
5479 {
5480 tree alt;
5481
5482 /* In no case do we want to add the placeholder attribute. */
5483 *no_add_attrs = true;
5484
5485 /* The 64-bit ABI is unchanged for transactional memory. */
5486 if (TARGET_64BIT)
5487 return NULL_TREE;
5488
5489 /* ??? Is there a better way to validate 32-bit windows? We have
5490 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5491 if (CHECK_STACK_LIMIT > 0)
5492 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5493 else
5494 {
5495 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5496 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5497 }
5498 decl_attributes (node, alt, flags);
5499
5500 return NULL_TREE;
5501 }
5502
5503 /* This function determines from TYPE the calling-convention. */
5504
5505 unsigned int
5506 ix86_get_callcvt (const_tree type)
5507 {
5508 unsigned int ret = 0;
5509 bool is_stdarg;
5510 tree attrs;
5511
5512 if (TARGET_64BIT)
5513 return IX86_CALLCVT_CDECL;
5514
5515 attrs = TYPE_ATTRIBUTES (type);
5516 if (attrs != NULL_TREE)
5517 {
5518 if (lookup_attribute ("cdecl", attrs))
5519 ret |= IX86_CALLCVT_CDECL;
5520 else if (lookup_attribute ("stdcall", attrs))
5521 ret |= IX86_CALLCVT_STDCALL;
5522 else if (lookup_attribute ("fastcall", attrs))
5523 ret |= IX86_CALLCVT_FASTCALL;
5524 else if (lookup_attribute ("thiscall", attrs))
5525 ret |= IX86_CALLCVT_THISCALL;
5526
5527 /* Regparam isn't allowed for thiscall and fastcall. */
5528 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5529 {
5530 if (lookup_attribute ("regparm", attrs))
5531 ret |= IX86_CALLCVT_REGPARM;
5532 if (lookup_attribute ("sseregparm", attrs))
5533 ret |= IX86_CALLCVT_SSEREGPARM;
5534 }
5535
5536 if (IX86_BASE_CALLCVT(ret) != 0)
5537 return ret;
5538 }
5539
5540 is_stdarg = stdarg_p (type);
5541 if (TARGET_RTD && !is_stdarg)
5542 return IX86_CALLCVT_STDCALL | ret;
5543
5544 if (ret != 0
5545 || is_stdarg
5546 || TREE_CODE (type) != METHOD_TYPE
5547 || ix86_function_type_abi (type) != MS_ABI)
5548 return IX86_CALLCVT_CDECL | ret;
5549
5550 return IX86_CALLCVT_THISCALL;
5551 }
5552
5553 /* Return 0 if the attributes for two types are incompatible, 1 if they
5554 are compatible, and 2 if they are nearly compatible (which causes a
5555 warning to be generated). */
5556
5557 static int
5558 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5559 {
5560 unsigned int ccvt1, ccvt2;
5561
5562 if (TREE_CODE (type1) != FUNCTION_TYPE
5563 && TREE_CODE (type1) != METHOD_TYPE)
5564 return 1;
5565
5566 ccvt1 = ix86_get_callcvt (type1);
5567 ccvt2 = ix86_get_callcvt (type2);
5568 if (ccvt1 != ccvt2)
5569 return 0;
5570 if (ix86_function_regparm (type1, NULL)
5571 != ix86_function_regparm (type2, NULL))
5572 return 0;
5573
5574 return 1;
5575 }
5576 \f
5577 /* Return the regparm value for a function with the indicated TYPE and DECL.
5578 DECL may be NULL when calling function indirectly
5579 or considering a libcall. */
5580
5581 static int
5582 ix86_function_regparm (const_tree type, const_tree decl)
5583 {
5584 tree attr;
5585 int regparm;
5586 unsigned int ccvt;
5587
5588 if (TARGET_64BIT)
5589 return (ix86_function_type_abi (type) == SYSV_ABI
5590 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5591 ccvt = ix86_get_callcvt (type);
5592 regparm = ix86_regparm;
5593
5594 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5595 {
5596 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5597 if (attr)
5598 {
5599 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5600 return regparm;
5601 }
5602 }
5603 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5604 return 2;
5605 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5606 return 1;
5607
5608 /* Use register calling convention for local functions when possible. */
5609 if (decl
5610 && TREE_CODE (decl) == FUNCTION_DECL
5611 /* Caller and callee must agree on the calling convention, so
5612 checking here just optimize means that with
5613 __attribute__((optimize (...))) caller could use regparm convention
5614 and callee not, or vice versa. Instead look at whether the callee
5615 is optimized or not. */
5616 && opt_for_fn (decl, optimize)
5617 && !(profile_flag && !flag_fentry))
5618 {
5619 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5620 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5621 if (i && i->local && i->can_change_signature)
5622 {
5623 int local_regparm, globals = 0, regno;
5624
5625 /* Make sure no regparm register is taken by a
5626 fixed register variable. */
5627 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5628 if (fixed_regs[local_regparm])
5629 break;
5630
5631 /* We don't want to use regparm(3) for nested functions as
5632 these use a static chain pointer in the third argument. */
5633 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5634 local_regparm = 2;
5635
5636 /* In 32-bit mode save a register for the split stack. */
5637 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5638 local_regparm = 2;
5639
5640 /* Each fixed register usage increases register pressure,
5641 so less registers should be used for argument passing.
5642 This functionality can be overriden by an explicit
5643 regparm value. */
5644 for (regno = AX_REG; regno <= DI_REG; regno++)
5645 if (fixed_regs[regno])
5646 globals++;
5647
5648 local_regparm
5649 = globals < local_regparm ? local_regparm - globals : 0;
5650
5651 if (local_regparm > regparm)
5652 regparm = local_regparm;
5653 }
5654 }
5655
5656 return regparm;
5657 }
5658
5659 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5660 DFmode (2) arguments in SSE registers for a function with the
5661 indicated TYPE and DECL. DECL may be NULL when calling function
5662 indirectly or considering a libcall. Otherwise return 0. */
5663
5664 static int
5665 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5666 {
5667 gcc_assert (!TARGET_64BIT);
5668
5669 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5670 by the sseregparm attribute. */
5671 if (TARGET_SSEREGPARM
5672 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5673 {
5674 if (!TARGET_SSE)
5675 {
5676 if (warn)
5677 {
5678 if (decl)
5679 error ("calling %qD with attribute sseregparm without "
5680 "SSE/SSE2 enabled", decl);
5681 else
5682 error ("calling %qT with attribute sseregparm without "
5683 "SSE/SSE2 enabled", type);
5684 }
5685 return 0;
5686 }
5687
5688 return 2;
5689 }
5690
5691 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5692 (and DFmode for SSE2) arguments in SSE registers. */
5693 if (decl && TARGET_SSE_MATH && optimize
5694 && !(profile_flag && !flag_fentry))
5695 {
5696 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5697 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5698 if (i && i->local && i->can_change_signature)
5699 return TARGET_SSE2 ? 2 : 1;
5700 }
5701
5702 return 0;
5703 }
5704
5705 /* Return true if EAX is live at the start of the function. Used by
5706 ix86_expand_prologue to determine if we need special help before
5707 calling allocate_stack_worker. */
5708
5709 static bool
5710 ix86_eax_live_at_start_p (void)
5711 {
5712 /* Cheat. Don't bother working forward from ix86_function_regparm
5713 to the function type to whether an actual argument is located in
5714 eax. Instead just look at cfg info, which is still close enough
5715 to correct at this point. This gives false positives for broken
5716 functions that might use uninitialized data that happens to be
5717 allocated in eax, but who cares? */
5718 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5719 }
5720
5721 static bool
5722 ix86_keep_aggregate_return_pointer (tree fntype)
5723 {
5724 tree attr;
5725
5726 if (!TARGET_64BIT)
5727 {
5728 attr = lookup_attribute ("callee_pop_aggregate_return",
5729 TYPE_ATTRIBUTES (fntype));
5730 if (attr)
5731 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5732
5733 /* For 32-bit MS-ABI the default is to keep aggregate
5734 return pointer. */
5735 if (ix86_function_type_abi (fntype) == MS_ABI)
5736 return true;
5737 }
5738 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5739 }
5740
5741 /* Value is the number of bytes of arguments automatically
5742 popped when returning from a subroutine call.
5743 FUNDECL is the declaration node of the function (as a tree),
5744 FUNTYPE is the data type of the function (as a tree),
5745 or for a library call it is an identifier node for the subroutine name.
5746 SIZE is the number of bytes of arguments passed on the stack.
5747
5748 On the 80386, the RTD insn may be used to pop them if the number
5749 of args is fixed, but if the number is variable then the caller
5750 must pop them all. RTD can't be used for library calls now
5751 because the library is compiled with the Unix compiler.
5752 Use of RTD is a selectable option, since it is incompatible with
5753 standard Unix calling sequences. If the option is not selected,
5754 the caller must always pop the args.
5755
5756 The attribute stdcall is equivalent to RTD on a per module basis. */
5757
5758 static int
5759 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5760 {
5761 unsigned int ccvt;
5762
5763 /* None of the 64-bit ABIs pop arguments. */
5764 if (TARGET_64BIT)
5765 return 0;
5766
5767 ccvt = ix86_get_callcvt (funtype);
5768
5769 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5770 | IX86_CALLCVT_THISCALL)) != 0
5771 && ! stdarg_p (funtype))
5772 return size;
5773
5774 /* Lose any fake structure return argument if it is passed on the stack. */
5775 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5776 && !ix86_keep_aggregate_return_pointer (funtype))
5777 {
5778 int nregs = ix86_function_regparm (funtype, fundecl);
5779 if (nregs == 0)
5780 return GET_MODE_SIZE (Pmode);
5781 }
5782
5783 return 0;
5784 }
5785
5786 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5787
5788 static bool
5789 ix86_legitimate_combined_insn (rtx insn)
5790 {
5791 /* Check operand constraints in case hard registers were propagated
5792 into insn pattern. This check prevents combine pass from
5793 generating insn patterns with invalid hard register operands.
5794 These invalid insns can eventually confuse reload to error out
5795 with a spill failure. See also PRs 46829 and 46843. */
5796 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
5797 {
5798 int i;
5799
5800 extract_insn (insn);
5801 preprocess_constraints ();
5802
5803 for (i = 0; i < recog_data.n_operands; i++)
5804 {
5805 rtx op = recog_data.operand[i];
5806 enum machine_mode mode = GET_MODE (op);
5807 struct operand_alternative *op_alt;
5808 int offset = 0;
5809 bool win;
5810 int j;
5811
5812 /* For pre-AVX disallow unaligned loads/stores where the
5813 instructions don't support it. */
5814 if (!TARGET_AVX
5815 && VECTOR_MODE_P (GET_MODE (op))
5816 && misaligned_operand (op, GET_MODE (op)))
5817 {
5818 int min_align = get_attr_ssememalign (insn);
5819 if (min_align == 0)
5820 return false;
5821 }
5822
5823 /* A unary operator may be accepted by the predicate, but it
5824 is irrelevant for matching constraints. */
5825 if (UNARY_P (op))
5826 op = XEXP (op, 0);
5827
5828 if (GET_CODE (op) == SUBREG)
5829 {
5830 if (REG_P (SUBREG_REG (op))
5831 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
5832 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
5833 GET_MODE (SUBREG_REG (op)),
5834 SUBREG_BYTE (op),
5835 GET_MODE (op));
5836 op = SUBREG_REG (op);
5837 }
5838
5839 if (!(REG_P (op) && HARD_REGISTER_P (op)))
5840 continue;
5841
5842 op_alt = recog_op_alt[i];
5843
5844 /* Operand has no constraints, anything is OK. */
5845 win = !recog_data.n_alternatives;
5846
5847 for (j = 0; j < recog_data.n_alternatives; j++)
5848 {
5849 if (op_alt[j].anything_ok
5850 || (op_alt[j].matches != -1
5851 && operands_match_p
5852 (recog_data.operand[i],
5853 recog_data.operand[op_alt[j].matches]))
5854 || reg_fits_class_p (op, op_alt[j].cl, offset, mode))
5855 {
5856 win = true;
5857 break;
5858 }
5859 }
5860
5861 if (!win)
5862 return false;
5863 }
5864 }
5865
5866 return true;
5867 }
5868 \f
5869 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5870
5871 static unsigned HOST_WIDE_INT
5872 ix86_asan_shadow_offset (void)
5873 {
5874 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
5875 : HOST_WIDE_INT_C (0x7fff8000))
5876 : (HOST_WIDE_INT_1 << 29);
5877 }
5878 \f
5879 /* Argument support functions. */
5880
5881 /* Return true when register may be used to pass function parameters. */
5882 bool
5883 ix86_function_arg_regno_p (int regno)
5884 {
5885 int i;
5886 const int *parm_regs;
5887
5888 if (!TARGET_64BIT)
5889 {
5890 if (TARGET_MACHO)
5891 return (regno < REGPARM_MAX
5892 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5893 else
5894 return (regno < REGPARM_MAX
5895 || (TARGET_MMX && MMX_REGNO_P (regno)
5896 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5897 || (TARGET_SSE && SSE_REGNO_P (regno)
5898 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5899 }
5900
5901 if (TARGET_SSE && SSE_REGNO_P (regno)
5902 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5903 return true;
5904
5905 /* TODO: The function should depend on current function ABI but
5906 builtins.c would need updating then. Therefore we use the
5907 default ABI. */
5908
5909 /* RAX is used as hidden argument to va_arg functions. */
5910 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5911 return true;
5912
5913 if (ix86_abi == MS_ABI)
5914 parm_regs = x86_64_ms_abi_int_parameter_registers;
5915 else
5916 parm_regs = x86_64_int_parameter_registers;
5917 for (i = 0; i < (ix86_abi == MS_ABI
5918 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5919 if (regno == parm_regs[i])
5920 return true;
5921 return false;
5922 }
5923
5924 /* Return if we do not know how to pass TYPE solely in registers. */
5925
5926 static bool
5927 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5928 {
5929 if (must_pass_in_stack_var_size_or_pad (mode, type))
5930 return true;
5931
5932 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5933 The layout_type routine is crafty and tries to trick us into passing
5934 currently unsupported vector types on the stack by using TImode. */
5935 return (!TARGET_64BIT && mode == TImode
5936 && type && TREE_CODE (type) != VECTOR_TYPE);
5937 }
5938
5939 /* It returns the size, in bytes, of the area reserved for arguments passed
5940 in registers for the function represented by fndecl dependent to the used
5941 abi format. */
5942 int
5943 ix86_reg_parm_stack_space (const_tree fndecl)
5944 {
5945 enum calling_abi call_abi = SYSV_ABI;
5946 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5947 call_abi = ix86_function_abi (fndecl);
5948 else
5949 call_abi = ix86_function_type_abi (fndecl);
5950 if (TARGET_64BIT && call_abi == MS_ABI)
5951 return 32;
5952 return 0;
5953 }
5954
5955 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5956 call abi used. */
5957 enum calling_abi
5958 ix86_function_type_abi (const_tree fntype)
5959 {
5960 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
5961 {
5962 enum calling_abi abi = ix86_abi;
5963 if (abi == SYSV_ABI)
5964 {
5965 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5966 abi = MS_ABI;
5967 }
5968 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5969 abi = SYSV_ABI;
5970 return abi;
5971 }
5972 return ix86_abi;
5973 }
5974
5975 /* We add this as a workaround in order to use libc_has_function
5976 hook in i386.md. */
5977 bool
5978 ix86_libc_has_function (enum function_class fn_class)
5979 {
5980 return targetm.libc_has_function (fn_class);
5981 }
5982
5983 static bool
5984 ix86_function_ms_hook_prologue (const_tree fn)
5985 {
5986 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5987 {
5988 if (decl_function_context (fn) != NULL_TREE)
5989 error_at (DECL_SOURCE_LOCATION (fn),
5990 "ms_hook_prologue is not compatible with nested function");
5991 else
5992 return true;
5993 }
5994 return false;
5995 }
5996
5997 static enum calling_abi
5998 ix86_function_abi (const_tree fndecl)
5999 {
6000 if (! fndecl)
6001 return ix86_abi;
6002 return ix86_function_type_abi (TREE_TYPE (fndecl));
6003 }
6004
6005 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6006 call abi used. */
6007 enum calling_abi
6008 ix86_cfun_abi (void)
6009 {
6010 if (! cfun)
6011 return ix86_abi;
6012 return cfun->machine->call_abi;
6013 }
6014
6015 /* Write the extra assembler code needed to declare a function properly. */
6016
6017 void
6018 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6019 tree decl)
6020 {
6021 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6022
6023 if (is_ms_hook)
6024 {
6025 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6026 unsigned int filler_cc = 0xcccccccc;
6027
6028 for (i = 0; i < filler_count; i += 4)
6029 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6030 }
6031
6032 #ifdef SUBTARGET_ASM_UNWIND_INIT
6033 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6034 #endif
6035
6036 ASM_OUTPUT_LABEL (asm_out_file, fname);
6037
6038 /* Output magic byte marker, if hot-patch attribute is set. */
6039 if (is_ms_hook)
6040 {
6041 if (TARGET_64BIT)
6042 {
6043 /* leaq [%rsp + 0], %rsp */
6044 asm_fprintf (asm_out_file, ASM_BYTE
6045 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6046 }
6047 else
6048 {
6049 /* movl.s %edi, %edi
6050 push %ebp
6051 movl.s %esp, %ebp */
6052 asm_fprintf (asm_out_file, ASM_BYTE
6053 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6054 }
6055 }
6056 }
6057
6058 /* regclass.c */
6059 extern void init_regs (void);
6060
6061 /* Implementation of call abi switching target hook. Specific to FNDECL
6062 the specific call register sets are set. See also
6063 ix86_conditional_register_usage for more details. */
6064 void
6065 ix86_call_abi_override (const_tree fndecl)
6066 {
6067 if (fndecl == NULL_TREE)
6068 cfun->machine->call_abi = ix86_abi;
6069 else
6070 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6071 }
6072
6073 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6074 expensive re-initialization of init_regs each time we switch function context
6075 since this is needed only during RTL expansion. */
6076 static void
6077 ix86_maybe_switch_abi (void)
6078 {
6079 if (TARGET_64BIT &&
6080 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6081 reinit_regs ();
6082 }
6083
6084 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6085 for a call to a function whose data type is FNTYPE.
6086 For a library call, FNTYPE is 0. */
6087
6088 void
6089 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6090 tree fntype, /* tree ptr for function decl */
6091 rtx libname, /* SYMBOL_REF of library name or 0 */
6092 tree fndecl,
6093 int caller)
6094 {
6095 struct cgraph_local_info *i;
6096
6097 memset (cum, 0, sizeof (*cum));
6098
6099 if (fndecl)
6100 {
6101 i = cgraph_local_info (fndecl);
6102 cum->call_abi = ix86_function_abi (fndecl);
6103 }
6104 else
6105 {
6106 i = NULL;
6107 cum->call_abi = ix86_function_type_abi (fntype);
6108 }
6109
6110 cum->caller = caller;
6111
6112 /* Set up the number of registers to use for passing arguments. */
6113 cum->nregs = ix86_regparm;
6114 if (TARGET_64BIT)
6115 {
6116 cum->nregs = (cum->call_abi == SYSV_ABI
6117 ? X86_64_REGPARM_MAX
6118 : X86_64_MS_REGPARM_MAX);
6119 }
6120 if (TARGET_SSE)
6121 {
6122 cum->sse_nregs = SSE_REGPARM_MAX;
6123 if (TARGET_64BIT)
6124 {
6125 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6126 ? X86_64_SSE_REGPARM_MAX
6127 : X86_64_MS_SSE_REGPARM_MAX);
6128 }
6129 }
6130 if (TARGET_MMX)
6131 cum->mmx_nregs = MMX_REGPARM_MAX;
6132 cum->warn_avx512f = true;
6133 cum->warn_avx = true;
6134 cum->warn_sse = true;
6135 cum->warn_mmx = true;
6136
6137 /* Because type might mismatch in between caller and callee, we need to
6138 use actual type of function for local calls.
6139 FIXME: cgraph_analyze can be told to actually record if function uses
6140 va_start so for local functions maybe_vaarg can be made aggressive
6141 helping K&R code.
6142 FIXME: once typesytem is fixed, we won't need this code anymore. */
6143 if (i && i->local && i->can_change_signature)
6144 fntype = TREE_TYPE (fndecl);
6145 cum->maybe_vaarg = (fntype
6146 ? (!prototype_p (fntype) || stdarg_p (fntype))
6147 : !libname);
6148
6149 if (!TARGET_64BIT)
6150 {
6151 /* If there are variable arguments, then we won't pass anything
6152 in registers in 32-bit mode. */
6153 if (stdarg_p (fntype))
6154 {
6155 cum->nregs = 0;
6156 cum->sse_nregs = 0;
6157 cum->mmx_nregs = 0;
6158 cum->warn_avx512f = 0;
6159 cum->warn_avx = 0;
6160 cum->warn_sse = 0;
6161 cum->warn_mmx = 0;
6162 return;
6163 }
6164
6165 /* Use ecx and edx registers if function has fastcall attribute,
6166 else look for regparm information. */
6167 if (fntype)
6168 {
6169 unsigned int ccvt = ix86_get_callcvt (fntype);
6170 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6171 {
6172 cum->nregs = 1;
6173 cum->fastcall = 1; /* Same first register as in fastcall. */
6174 }
6175 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6176 {
6177 cum->nregs = 2;
6178 cum->fastcall = 1;
6179 }
6180 else
6181 cum->nregs = ix86_function_regparm (fntype, fndecl);
6182 }
6183
6184 /* Set up the number of SSE registers used for passing SFmode
6185 and DFmode arguments. Warn for mismatching ABI. */
6186 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6187 }
6188 }
6189
6190 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6191 But in the case of vector types, it is some vector mode.
6192
6193 When we have only some of our vector isa extensions enabled, then there
6194 are some modes for which vector_mode_supported_p is false. For these
6195 modes, the generic vector support in gcc will choose some non-vector mode
6196 in order to implement the type. By computing the natural mode, we'll
6197 select the proper ABI location for the operand and not depend on whatever
6198 the middle-end decides to do with these vector types.
6199
6200 The midde-end can't deal with the vector types > 16 bytes. In this
6201 case, we return the original mode and warn ABI change if CUM isn't
6202 NULL.
6203
6204 If INT_RETURN is true, warn ABI change if the vector mode isn't
6205 available for function return value. */
6206
6207 static enum machine_mode
6208 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6209 bool in_return)
6210 {
6211 enum machine_mode mode = TYPE_MODE (type);
6212
6213 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6214 {
6215 HOST_WIDE_INT size = int_size_in_bytes (type);
6216 if ((size == 8 || size == 16 || size == 32 || size == 64)
6217 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6218 && TYPE_VECTOR_SUBPARTS (type) > 1)
6219 {
6220 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6221
6222 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6223 mode = MIN_MODE_VECTOR_FLOAT;
6224 else
6225 mode = MIN_MODE_VECTOR_INT;
6226
6227 /* Get the mode which has this inner mode and number of units. */
6228 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6229 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6230 && GET_MODE_INNER (mode) == innermode)
6231 {
6232 if (size == 64 && !TARGET_AVX512F)
6233 {
6234 static bool warnedavx512f;
6235 static bool warnedavx512f_ret;
6236
6237 if (cum
6238 && !warnedavx512f
6239 && cum->warn_avx512f)
6240 {
6241 warnedavx512f = true;
6242 warning (0, "AVX512F vector argument without AVX512F "
6243 "enabled changes the ABI");
6244 }
6245 else if (in_return & !warnedavx512f_ret)
6246 {
6247 warnedavx512f_ret = true;
6248 warning (0, "AVX512F vector return without AVX512F "
6249 "enabled changes the ABI");
6250 }
6251
6252 return TYPE_MODE (type);
6253 }
6254 else if (size == 32 && !TARGET_AVX)
6255 {
6256 static bool warnedavx;
6257 static bool warnedavx_ret;
6258
6259 if (cum
6260 && !warnedavx
6261 && cum->warn_avx)
6262 {
6263 warnedavx = true;
6264 warning (0, "AVX vector argument without AVX "
6265 "enabled changes the ABI");
6266 }
6267 else if (in_return & !warnedavx_ret)
6268 {
6269 warnedavx_ret = true;
6270 warning (0, "AVX vector return without AVX "
6271 "enabled changes the ABI");
6272 }
6273
6274 return TYPE_MODE (type);
6275 }
6276 else if (((size == 8 && TARGET_64BIT) || size == 16)
6277 && !TARGET_SSE)
6278 {
6279 static bool warnedsse;
6280 static bool warnedsse_ret;
6281
6282 if (cum
6283 && !warnedsse
6284 && cum->warn_sse)
6285 {
6286 warnedsse = true;
6287 warning (0, "SSE vector argument without SSE "
6288 "enabled changes the ABI");
6289 }
6290 else if (!TARGET_64BIT
6291 && in_return
6292 & !warnedsse_ret)
6293 {
6294 warnedsse_ret = true;
6295 warning (0, "SSE vector return without SSE "
6296 "enabled changes the ABI");
6297 }
6298 }
6299 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6300 {
6301 static bool warnedmmx;
6302 static bool warnedmmx_ret;
6303
6304 if (cum
6305 && !warnedmmx
6306 && cum->warn_mmx)
6307 {
6308 warnedmmx = true;
6309 warning (0, "MMX vector argument without MMX "
6310 "enabled changes the ABI");
6311 }
6312 else if (in_return & !warnedmmx_ret)
6313 {
6314 warnedmmx_ret = true;
6315 warning (0, "MMX vector return without MMX "
6316 "enabled changes the ABI");
6317 }
6318 }
6319 return mode;
6320 }
6321
6322 gcc_unreachable ();
6323 }
6324 }
6325
6326 return mode;
6327 }
6328
6329 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6330 this may not agree with the mode that the type system has chosen for the
6331 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6332 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6333
6334 static rtx
6335 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
6336 unsigned int regno)
6337 {
6338 rtx tmp;
6339
6340 if (orig_mode != BLKmode)
6341 tmp = gen_rtx_REG (orig_mode, regno);
6342 else
6343 {
6344 tmp = gen_rtx_REG (mode, regno);
6345 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6346 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6347 }
6348
6349 return tmp;
6350 }
6351
6352 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6353 of this code is to classify each 8bytes of incoming argument by the register
6354 class and assign registers accordingly. */
6355
6356 /* Return the union class of CLASS1 and CLASS2.
6357 See the x86-64 PS ABI for details. */
6358
6359 static enum x86_64_reg_class
6360 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6361 {
6362 /* Rule #1: If both classes are equal, this is the resulting class. */
6363 if (class1 == class2)
6364 return class1;
6365
6366 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6367 the other class. */
6368 if (class1 == X86_64_NO_CLASS)
6369 return class2;
6370 if (class2 == X86_64_NO_CLASS)
6371 return class1;
6372
6373 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6374 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6375 return X86_64_MEMORY_CLASS;
6376
6377 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6378 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6379 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6380 return X86_64_INTEGERSI_CLASS;
6381 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6382 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6383 return X86_64_INTEGER_CLASS;
6384
6385 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6386 MEMORY is used. */
6387 if (class1 == X86_64_X87_CLASS
6388 || class1 == X86_64_X87UP_CLASS
6389 || class1 == X86_64_COMPLEX_X87_CLASS
6390 || class2 == X86_64_X87_CLASS
6391 || class2 == X86_64_X87UP_CLASS
6392 || class2 == X86_64_COMPLEX_X87_CLASS)
6393 return X86_64_MEMORY_CLASS;
6394
6395 /* Rule #6: Otherwise class SSE is used. */
6396 return X86_64_SSE_CLASS;
6397 }
6398
6399 /* Classify the argument of type TYPE and mode MODE.
6400 CLASSES will be filled by the register class used to pass each word
6401 of the operand. The number of words is returned. In case the parameter
6402 should be passed in memory, 0 is returned. As a special case for zero
6403 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6404
6405 BIT_OFFSET is used internally for handling records and specifies offset
6406 of the offset in bits modulo 512 to avoid overflow cases.
6407
6408 See the x86-64 PS ABI for details.
6409 */
6410
6411 static int
6412 classify_argument (enum machine_mode mode, const_tree type,
6413 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6414 {
6415 HOST_WIDE_INT bytes =
6416 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6417 int words
6418 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6419
6420 /* Variable sized entities are always passed/returned in memory. */
6421 if (bytes < 0)
6422 return 0;
6423
6424 if (mode != VOIDmode
6425 && targetm.calls.must_pass_in_stack (mode, type))
6426 return 0;
6427
6428 if (type && AGGREGATE_TYPE_P (type))
6429 {
6430 int i;
6431 tree field;
6432 enum x86_64_reg_class subclasses[MAX_CLASSES];
6433
6434 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
6435 if (bytes > 32)
6436 return 0;
6437
6438 for (i = 0; i < words; i++)
6439 classes[i] = X86_64_NO_CLASS;
6440
6441 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6442 signalize memory class, so handle it as special case. */
6443 if (!words)
6444 {
6445 classes[0] = X86_64_NO_CLASS;
6446 return 1;
6447 }
6448
6449 /* Classify each field of record and merge classes. */
6450 switch (TREE_CODE (type))
6451 {
6452 case RECORD_TYPE:
6453 /* And now merge the fields of structure. */
6454 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6455 {
6456 if (TREE_CODE (field) == FIELD_DECL)
6457 {
6458 int num;
6459
6460 if (TREE_TYPE (field) == error_mark_node)
6461 continue;
6462
6463 /* Bitfields are always classified as integer. Handle them
6464 early, since later code would consider them to be
6465 misaligned integers. */
6466 if (DECL_BIT_FIELD (field))
6467 {
6468 for (i = (int_bit_position (field)
6469 + (bit_offset % 64)) / 8 / 8;
6470 i < ((int_bit_position (field) + (bit_offset % 64))
6471 + tree_to_shwi (DECL_SIZE (field))
6472 + 63) / 8 / 8; i++)
6473 classes[i] =
6474 merge_classes (X86_64_INTEGER_CLASS,
6475 classes[i]);
6476 }
6477 else
6478 {
6479 int pos;
6480
6481 type = TREE_TYPE (field);
6482
6483 /* Flexible array member is ignored. */
6484 if (TYPE_MODE (type) == BLKmode
6485 && TREE_CODE (type) == ARRAY_TYPE
6486 && TYPE_SIZE (type) == NULL_TREE
6487 && TYPE_DOMAIN (type) != NULL_TREE
6488 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6489 == NULL_TREE))
6490 {
6491 static bool warned;
6492
6493 if (!warned && warn_psabi)
6494 {
6495 warned = true;
6496 inform (input_location,
6497 "the ABI of passing struct with"
6498 " a flexible array member has"
6499 " changed in GCC 4.4");
6500 }
6501 continue;
6502 }
6503 num = classify_argument (TYPE_MODE (type), type,
6504 subclasses,
6505 (int_bit_position (field)
6506 + bit_offset) % 512);
6507 if (!num)
6508 return 0;
6509 pos = (int_bit_position (field)
6510 + (bit_offset % 64)) / 8 / 8;
6511 for (i = 0; i < num && (i + pos) < words; i++)
6512 classes[i + pos] =
6513 merge_classes (subclasses[i], classes[i + pos]);
6514 }
6515 }
6516 }
6517 break;
6518
6519 case ARRAY_TYPE:
6520 /* Arrays are handled as small records. */
6521 {
6522 int num;
6523 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6524 TREE_TYPE (type), subclasses, bit_offset);
6525 if (!num)
6526 return 0;
6527
6528 /* The partial classes are now full classes. */
6529 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6530 subclasses[0] = X86_64_SSE_CLASS;
6531 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6532 && !((bit_offset % 64) == 0 && bytes == 4))
6533 subclasses[0] = X86_64_INTEGER_CLASS;
6534
6535 for (i = 0; i < words; i++)
6536 classes[i] = subclasses[i % num];
6537
6538 break;
6539 }
6540 case UNION_TYPE:
6541 case QUAL_UNION_TYPE:
6542 /* Unions are similar to RECORD_TYPE but offset is always 0.
6543 */
6544 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6545 {
6546 if (TREE_CODE (field) == FIELD_DECL)
6547 {
6548 int num;
6549
6550 if (TREE_TYPE (field) == error_mark_node)
6551 continue;
6552
6553 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6554 TREE_TYPE (field), subclasses,
6555 bit_offset);
6556 if (!num)
6557 return 0;
6558 for (i = 0; i < num; i++)
6559 classes[i] = merge_classes (subclasses[i], classes[i]);
6560 }
6561 }
6562 break;
6563
6564 default:
6565 gcc_unreachable ();
6566 }
6567
6568 if (words > 2)
6569 {
6570 /* When size > 16 bytes, if the first one isn't
6571 X86_64_SSE_CLASS or any other ones aren't
6572 X86_64_SSEUP_CLASS, everything should be passed in
6573 memory. */
6574 if (classes[0] != X86_64_SSE_CLASS)
6575 return 0;
6576
6577 for (i = 1; i < words; i++)
6578 if (classes[i] != X86_64_SSEUP_CLASS)
6579 return 0;
6580 }
6581
6582 /* Final merger cleanup. */
6583 for (i = 0; i < words; i++)
6584 {
6585 /* If one class is MEMORY, everything should be passed in
6586 memory. */
6587 if (classes[i] == X86_64_MEMORY_CLASS)
6588 return 0;
6589
6590 /* The X86_64_SSEUP_CLASS should be always preceded by
6591 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6592 if (classes[i] == X86_64_SSEUP_CLASS
6593 && classes[i - 1] != X86_64_SSE_CLASS
6594 && classes[i - 1] != X86_64_SSEUP_CLASS)
6595 {
6596 /* The first one should never be X86_64_SSEUP_CLASS. */
6597 gcc_assert (i != 0);
6598 classes[i] = X86_64_SSE_CLASS;
6599 }
6600
6601 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6602 everything should be passed in memory. */
6603 if (classes[i] == X86_64_X87UP_CLASS
6604 && (classes[i - 1] != X86_64_X87_CLASS))
6605 {
6606 static bool warned;
6607
6608 /* The first one should never be X86_64_X87UP_CLASS. */
6609 gcc_assert (i != 0);
6610 if (!warned && warn_psabi)
6611 {
6612 warned = true;
6613 inform (input_location,
6614 "the ABI of passing union with long double"
6615 " has changed in GCC 4.4");
6616 }
6617 return 0;
6618 }
6619 }
6620 return words;
6621 }
6622
6623 /* Compute alignment needed. We align all types to natural boundaries with
6624 exception of XFmode that is aligned to 64bits. */
6625 if (mode != VOIDmode && mode != BLKmode)
6626 {
6627 int mode_alignment = GET_MODE_BITSIZE (mode);
6628
6629 if (mode == XFmode)
6630 mode_alignment = 128;
6631 else if (mode == XCmode)
6632 mode_alignment = 256;
6633 if (COMPLEX_MODE_P (mode))
6634 mode_alignment /= 2;
6635 /* Misaligned fields are always returned in memory. */
6636 if (bit_offset % mode_alignment)
6637 return 0;
6638 }
6639
6640 /* for V1xx modes, just use the base mode */
6641 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6642 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6643 mode = GET_MODE_INNER (mode);
6644
6645 /* Classification of atomic types. */
6646 switch (mode)
6647 {
6648 case SDmode:
6649 case DDmode:
6650 classes[0] = X86_64_SSE_CLASS;
6651 return 1;
6652 case TDmode:
6653 classes[0] = X86_64_SSE_CLASS;
6654 classes[1] = X86_64_SSEUP_CLASS;
6655 return 2;
6656 case DImode:
6657 case SImode:
6658 case HImode:
6659 case QImode:
6660 case CSImode:
6661 case CHImode:
6662 case CQImode:
6663 {
6664 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
6665
6666 /* Analyze last 128 bits only. */
6667 size = (size - 1) & 0x7f;
6668
6669 if (size < 32)
6670 {
6671 classes[0] = X86_64_INTEGERSI_CLASS;
6672 return 1;
6673 }
6674 else if (size < 64)
6675 {
6676 classes[0] = X86_64_INTEGER_CLASS;
6677 return 1;
6678 }
6679 else if (size < 64+32)
6680 {
6681 classes[0] = X86_64_INTEGER_CLASS;
6682 classes[1] = X86_64_INTEGERSI_CLASS;
6683 return 2;
6684 }
6685 else if (size < 64+64)
6686 {
6687 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6688 return 2;
6689 }
6690 else
6691 gcc_unreachable ();
6692 }
6693 case CDImode:
6694 case TImode:
6695 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6696 return 2;
6697 case COImode:
6698 case OImode:
6699 /* OImode shouldn't be used directly. */
6700 gcc_unreachable ();
6701 case CTImode:
6702 return 0;
6703 case SFmode:
6704 if (!(bit_offset % 64))
6705 classes[0] = X86_64_SSESF_CLASS;
6706 else
6707 classes[0] = X86_64_SSE_CLASS;
6708 return 1;
6709 case DFmode:
6710 classes[0] = X86_64_SSEDF_CLASS;
6711 return 1;
6712 case XFmode:
6713 classes[0] = X86_64_X87_CLASS;
6714 classes[1] = X86_64_X87UP_CLASS;
6715 return 2;
6716 case TFmode:
6717 classes[0] = X86_64_SSE_CLASS;
6718 classes[1] = X86_64_SSEUP_CLASS;
6719 return 2;
6720 case SCmode:
6721 classes[0] = X86_64_SSE_CLASS;
6722 if (!(bit_offset % 64))
6723 return 1;
6724 else
6725 {
6726 static bool warned;
6727
6728 if (!warned && warn_psabi)
6729 {
6730 warned = true;
6731 inform (input_location,
6732 "the ABI of passing structure with complex float"
6733 " member has changed in GCC 4.4");
6734 }
6735 classes[1] = X86_64_SSESF_CLASS;
6736 return 2;
6737 }
6738 case DCmode:
6739 classes[0] = X86_64_SSEDF_CLASS;
6740 classes[1] = X86_64_SSEDF_CLASS;
6741 return 2;
6742 case XCmode:
6743 classes[0] = X86_64_COMPLEX_X87_CLASS;
6744 return 1;
6745 case TCmode:
6746 /* This modes is larger than 16 bytes. */
6747 return 0;
6748 case V8SFmode:
6749 case V8SImode:
6750 case V32QImode:
6751 case V16HImode:
6752 case V4DFmode:
6753 case V4DImode:
6754 classes[0] = X86_64_SSE_CLASS;
6755 classes[1] = X86_64_SSEUP_CLASS;
6756 classes[2] = X86_64_SSEUP_CLASS;
6757 classes[3] = X86_64_SSEUP_CLASS;
6758 return 4;
6759 case V8DFmode:
6760 case V16SFmode:
6761 case V8DImode:
6762 case V16SImode:
6763 case V32HImode:
6764 case V64QImode:
6765 classes[0] = X86_64_SSE_CLASS;
6766 classes[1] = X86_64_SSEUP_CLASS;
6767 classes[2] = X86_64_SSEUP_CLASS;
6768 classes[3] = X86_64_SSEUP_CLASS;
6769 classes[4] = X86_64_SSEUP_CLASS;
6770 classes[5] = X86_64_SSEUP_CLASS;
6771 classes[6] = X86_64_SSEUP_CLASS;
6772 classes[7] = X86_64_SSEUP_CLASS;
6773 return 8;
6774 case V4SFmode:
6775 case V4SImode:
6776 case V16QImode:
6777 case V8HImode:
6778 case V2DFmode:
6779 case V2DImode:
6780 classes[0] = X86_64_SSE_CLASS;
6781 classes[1] = X86_64_SSEUP_CLASS;
6782 return 2;
6783 case V1TImode:
6784 case V1DImode:
6785 case V2SFmode:
6786 case V2SImode:
6787 case V4HImode:
6788 case V8QImode:
6789 classes[0] = X86_64_SSE_CLASS;
6790 return 1;
6791 case BLKmode:
6792 case VOIDmode:
6793 return 0;
6794 default:
6795 gcc_assert (VECTOR_MODE_P (mode));
6796
6797 if (bytes > 16)
6798 return 0;
6799
6800 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6801
6802 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6803 classes[0] = X86_64_INTEGERSI_CLASS;
6804 else
6805 classes[0] = X86_64_INTEGER_CLASS;
6806 classes[1] = X86_64_INTEGER_CLASS;
6807 return 1 + (bytes > 8);
6808 }
6809 }
6810
6811 /* Examine the argument and return set number of register required in each
6812 class. Return 0 iff parameter should be passed in memory. */
6813 static int
6814 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6815 int *int_nregs, int *sse_nregs)
6816 {
6817 enum x86_64_reg_class regclass[MAX_CLASSES];
6818 int n = classify_argument (mode, type, regclass, 0);
6819
6820 *int_nregs = 0;
6821 *sse_nregs = 0;
6822 if (!n)
6823 return 0;
6824 for (n--; n >= 0; n--)
6825 switch (regclass[n])
6826 {
6827 case X86_64_INTEGER_CLASS:
6828 case X86_64_INTEGERSI_CLASS:
6829 (*int_nregs)++;
6830 break;
6831 case X86_64_SSE_CLASS:
6832 case X86_64_SSESF_CLASS:
6833 case X86_64_SSEDF_CLASS:
6834 (*sse_nregs)++;
6835 break;
6836 case X86_64_NO_CLASS:
6837 case X86_64_SSEUP_CLASS:
6838 break;
6839 case X86_64_X87_CLASS:
6840 case X86_64_X87UP_CLASS:
6841 if (!in_return)
6842 return 0;
6843 break;
6844 case X86_64_COMPLEX_X87_CLASS:
6845 return in_return ? 2 : 0;
6846 case X86_64_MEMORY_CLASS:
6847 gcc_unreachable ();
6848 }
6849 return 1;
6850 }
6851
6852 /* Construct container for the argument used by GCC interface. See
6853 FUNCTION_ARG for the detailed description. */
6854
6855 static rtx
6856 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6857 const_tree type, int in_return, int nintregs, int nsseregs,
6858 const int *intreg, int sse_regno)
6859 {
6860 /* The following variables hold the static issued_error state. */
6861 static bool issued_sse_arg_error;
6862 static bool issued_sse_ret_error;
6863 static bool issued_x87_ret_error;
6864
6865 enum machine_mode tmpmode;
6866 int bytes =
6867 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6868 enum x86_64_reg_class regclass[MAX_CLASSES];
6869 int n;
6870 int i;
6871 int nexps = 0;
6872 int needed_sseregs, needed_intregs;
6873 rtx exp[MAX_CLASSES];
6874 rtx ret;
6875
6876 n = classify_argument (mode, type, regclass, 0);
6877 if (!n)
6878 return NULL;
6879 if (!examine_argument (mode, type, in_return, &needed_intregs,
6880 &needed_sseregs))
6881 return NULL;
6882 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6883 return NULL;
6884
6885 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6886 some less clueful developer tries to use floating-point anyway. */
6887 if (needed_sseregs && !TARGET_SSE)
6888 {
6889 if (in_return)
6890 {
6891 if (!issued_sse_ret_error)
6892 {
6893 error ("SSE register return with SSE disabled");
6894 issued_sse_ret_error = true;
6895 }
6896 }
6897 else if (!issued_sse_arg_error)
6898 {
6899 error ("SSE register argument with SSE disabled");
6900 issued_sse_arg_error = true;
6901 }
6902 return NULL;
6903 }
6904
6905 /* Likewise, error if the ABI requires us to return values in the
6906 x87 registers and the user specified -mno-80387. */
6907 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
6908 for (i = 0; i < n; i++)
6909 if (regclass[i] == X86_64_X87_CLASS
6910 || regclass[i] == X86_64_X87UP_CLASS
6911 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6912 {
6913 if (!issued_x87_ret_error)
6914 {
6915 error ("x87 register return with x87 disabled");
6916 issued_x87_ret_error = true;
6917 }
6918 return NULL;
6919 }
6920
6921 /* First construct simple cases. Avoid SCmode, since we want to use
6922 single register to pass this type. */
6923 if (n == 1 && mode != SCmode)
6924 switch (regclass[0])
6925 {
6926 case X86_64_INTEGER_CLASS:
6927 case X86_64_INTEGERSI_CLASS:
6928 return gen_rtx_REG (mode, intreg[0]);
6929 case X86_64_SSE_CLASS:
6930 case X86_64_SSESF_CLASS:
6931 case X86_64_SSEDF_CLASS:
6932 if (mode != BLKmode)
6933 return gen_reg_or_parallel (mode, orig_mode,
6934 SSE_REGNO (sse_regno));
6935 break;
6936 case X86_64_X87_CLASS:
6937 case X86_64_COMPLEX_X87_CLASS:
6938 return gen_rtx_REG (mode, FIRST_STACK_REG);
6939 case X86_64_NO_CLASS:
6940 /* Zero sized array, struct or class. */
6941 return NULL;
6942 default:
6943 gcc_unreachable ();
6944 }
6945 if (n == 2
6946 && regclass[0] == X86_64_SSE_CLASS
6947 && regclass[1] == X86_64_SSEUP_CLASS
6948 && mode != BLKmode)
6949 return gen_reg_or_parallel (mode, orig_mode,
6950 SSE_REGNO (sse_regno));
6951 if (n == 4
6952 && regclass[0] == X86_64_SSE_CLASS
6953 && regclass[1] == X86_64_SSEUP_CLASS
6954 && regclass[2] == X86_64_SSEUP_CLASS
6955 && regclass[3] == X86_64_SSEUP_CLASS
6956 && mode != BLKmode)
6957 return gen_reg_or_parallel (mode, orig_mode,
6958 SSE_REGNO (sse_regno));
6959 if (n == 8
6960 && regclass[0] == X86_64_SSE_CLASS
6961 && regclass[1] == X86_64_SSEUP_CLASS
6962 && regclass[2] == X86_64_SSEUP_CLASS
6963 && regclass[3] == X86_64_SSEUP_CLASS
6964 && regclass[4] == X86_64_SSEUP_CLASS
6965 && regclass[5] == X86_64_SSEUP_CLASS
6966 && regclass[6] == X86_64_SSEUP_CLASS
6967 && regclass[7] == X86_64_SSEUP_CLASS
6968 && mode != BLKmode)
6969 return gen_reg_or_parallel (mode, orig_mode,
6970 SSE_REGNO (sse_regno));
6971 if (n == 2
6972 && regclass[0] == X86_64_X87_CLASS
6973 && regclass[1] == X86_64_X87UP_CLASS)
6974 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6975
6976 if (n == 2
6977 && regclass[0] == X86_64_INTEGER_CLASS
6978 && regclass[1] == X86_64_INTEGER_CLASS
6979 && (mode == CDImode || mode == TImode || mode == TFmode)
6980 && intreg[0] + 1 == intreg[1])
6981 return gen_rtx_REG (mode, intreg[0]);
6982
6983 /* Otherwise figure out the entries of the PARALLEL. */
6984 for (i = 0; i < n; i++)
6985 {
6986 int pos;
6987
6988 switch (regclass[i])
6989 {
6990 case X86_64_NO_CLASS:
6991 break;
6992 case X86_64_INTEGER_CLASS:
6993 case X86_64_INTEGERSI_CLASS:
6994 /* Merge TImodes on aligned occasions here too. */
6995 if (i * 8 + 8 > bytes)
6996 tmpmode
6997 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6998 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6999 tmpmode = SImode;
7000 else
7001 tmpmode = DImode;
7002 /* We've requested 24 bytes we
7003 don't have mode for. Use DImode. */
7004 if (tmpmode == BLKmode)
7005 tmpmode = DImode;
7006 exp [nexps++]
7007 = gen_rtx_EXPR_LIST (VOIDmode,
7008 gen_rtx_REG (tmpmode, *intreg),
7009 GEN_INT (i*8));
7010 intreg++;
7011 break;
7012 case X86_64_SSESF_CLASS:
7013 exp [nexps++]
7014 = gen_rtx_EXPR_LIST (VOIDmode,
7015 gen_rtx_REG (SFmode,
7016 SSE_REGNO (sse_regno)),
7017 GEN_INT (i*8));
7018 sse_regno++;
7019 break;
7020 case X86_64_SSEDF_CLASS:
7021 exp [nexps++]
7022 = gen_rtx_EXPR_LIST (VOIDmode,
7023 gen_rtx_REG (DFmode,
7024 SSE_REGNO (sse_regno)),
7025 GEN_INT (i*8));
7026 sse_regno++;
7027 break;
7028 case X86_64_SSE_CLASS:
7029 pos = i;
7030 switch (n)
7031 {
7032 case 1:
7033 tmpmode = DImode;
7034 break;
7035 case 2:
7036 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7037 {
7038 tmpmode = TImode;
7039 i++;
7040 }
7041 else
7042 tmpmode = DImode;
7043 break;
7044 case 4:
7045 gcc_assert (i == 0
7046 && regclass[1] == X86_64_SSEUP_CLASS
7047 && regclass[2] == X86_64_SSEUP_CLASS
7048 && regclass[3] == X86_64_SSEUP_CLASS);
7049 tmpmode = OImode;
7050 i += 3;
7051 break;
7052 case 8:
7053 gcc_assert (i == 0
7054 && regclass[1] == X86_64_SSEUP_CLASS
7055 && regclass[2] == X86_64_SSEUP_CLASS
7056 && regclass[3] == X86_64_SSEUP_CLASS
7057 && regclass[4] == X86_64_SSEUP_CLASS
7058 && regclass[5] == X86_64_SSEUP_CLASS
7059 && regclass[6] == X86_64_SSEUP_CLASS
7060 && regclass[7] == X86_64_SSEUP_CLASS);
7061 tmpmode = XImode;
7062 i += 7;
7063 break;
7064 default:
7065 gcc_unreachable ();
7066 }
7067 exp [nexps++]
7068 = gen_rtx_EXPR_LIST (VOIDmode,
7069 gen_rtx_REG (tmpmode,
7070 SSE_REGNO (sse_regno)),
7071 GEN_INT (pos*8));
7072 sse_regno++;
7073 break;
7074 default:
7075 gcc_unreachable ();
7076 }
7077 }
7078
7079 /* Empty aligned struct, union or class. */
7080 if (nexps == 0)
7081 return NULL;
7082
7083 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7084 for (i = 0; i < nexps; i++)
7085 XVECEXP (ret, 0, i) = exp [i];
7086 return ret;
7087 }
7088
7089 /* Update the data in CUM to advance over an argument of mode MODE
7090 and data type TYPE. (TYPE is null for libcalls where that information
7091 may not be available.) */
7092
7093 static void
7094 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7095 const_tree type, HOST_WIDE_INT bytes,
7096 HOST_WIDE_INT words)
7097 {
7098 switch (mode)
7099 {
7100 default:
7101 break;
7102
7103 case BLKmode:
7104 if (bytes < 0)
7105 break;
7106 /* FALLTHRU */
7107
7108 case DImode:
7109 case SImode:
7110 case HImode:
7111 case QImode:
7112 cum->words += words;
7113 cum->nregs -= words;
7114 cum->regno += words;
7115
7116 if (cum->nregs <= 0)
7117 {
7118 cum->nregs = 0;
7119 cum->regno = 0;
7120 }
7121 break;
7122
7123 case OImode:
7124 /* OImode shouldn't be used directly. */
7125 gcc_unreachable ();
7126
7127 case DFmode:
7128 if (cum->float_in_sse < 2)
7129 break;
7130 case SFmode:
7131 if (cum->float_in_sse < 1)
7132 break;
7133 /* FALLTHRU */
7134
7135 case V8SFmode:
7136 case V8SImode:
7137 case V64QImode:
7138 case V32HImode:
7139 case V16SImode:
7140 case V8DImode:
7141 case V16SFmode:
7142 case V8DFmode:
7143 case V32QImode:
7144 case V16HImode:
7145 case V4DFmode:
7146 case V4DImode:
7147 case TImode:
7148 case V16QImode:
7149 case V8HImode:
7150 case V4SImode:
7151 case V2DImode:
7152 case V4SFmode:
7153 case V2DFmode:
7154 if (!type || !AGGREGATE_TYPE_P (type))
7155 {
7156 cum->sse_words += words;
7157 cum->sse_nregs -= 1;
7158 cum->sse_regno += 1;
7159 if (cum->sse_nregs <= 0)
7160 {
7161 cum->sse_nregs = 0;
7162 cum->sse_regno = 0;
7163 }
7164 }
7165 break;
7166
7167 case V8QImode:
7168 case V4HImode:
7169 case V2SImode:
7170 case V2SFmode:
7171 case V1TImode:
7172 case V1DImode:
7173 if (!type || !AGGREGATE_TYPE_P (type))
7174 {
7175 cum->mmx_words += words;
7176 cum->mmx_nregs -= 1;
7177 cum->mmx_regno += 1;
7178 if (cum->mmx_nregs <= 0)
7179 {
7180 cum->mmx_nregs = 0;
7181 cum->mmx_regno = 0;
7182 }
7183 }
7184 break;
7185 }
7186 }
7187
7188 static void
7189 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7190 const_tree type, HOST_WIDE_INT words, bool named)
7191 {
7192 int int_nregs, sse_nregs;
7193
7194 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7195 if (!named && (VALID_AVX512F_REG_MODE (mode)
7196 || VALID_AVX256_REG_MODE (mode)))
7197 return;
7198
7199 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7200 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7201 {
7202 cum->nregs -= int_nregs;
7203 cum->sse_nregs -= sse_nregs;
7204 cum->regno += int_nregs;
7205 cum->sse_regno += sse_nregs;
7206 }
7207 else
7208 {
7209 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7210 cum->words = (cum->words + align - 1) & ~(align - 1);
7211 cum->words += words;
7212 }
7213 }
7214
7215 static void
7216 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7217 HOST_WIDE_INT words)
7218 {
7219 /* Otherwise, this should be passed indirect. */
7220 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7221
7222 cum->words += words;
7223 if (cum->nregs > 0)
7224 {
7225 cum->nregs -= 1;
7226 cum->regno += 1;
7227 }
7228 }
7229
7230 /* Update the data in CUM to advance over an argument of mode MODE and
7231 data type TYPE. (TYPE is null for libcalls where that information
7232 may not be available.) */
7233
7234 static void
7235 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
7236 const_tree type, bool named)
7237 {
7238 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7239 HOST_WIDE_INT bytes, words;
7240
7241 if (mode == BLKmode)
7242 bytes = int_size_in_bytes (type);
7243 else
7244 bytes = GET_MODE_SIZE (mode);
7245 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7246
7247 if (type)
7248 mode = type_natural_mode (type, NULL, false);
7249
7250 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7251 function_arg_advance_ms_64 (cum, bytes, words);
7252 else if (TARGET_64BIT)
7253 function_arg_advance_64 (cum, mode, type, words, named);
7254 else
7255 function_arg_advance_32 (cum, mode, type, bytes, words);
7256 }
7257
7258 /* Define where to put the arguments to a function.
7259 Value is zero to push the argument on the stack,
7260 or a hard register in which to store the argument.
7261
7262 MODE is the argument's machine mode.
7263 TYPE is the data type of the argument (as a tree).
7264 This is null for libcalls where that information may
7265 not be available.
7266 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7267 the preceding args and about the function being called.
7268 NAMED is nonzero if this argument is a named parameter
7269 (otherwise it is an extra parameter matching an ellipsis). */
7270
7271 static rtx
7272 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7273 enum machine_mode orig_mode, const_tree type,
7274 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7275 {
7276 /* Avoid the AL settings for the Unix64 ABI. */
7277 if (mode == VOIDmode)
7278 return constm1_rtx;
7279
7280 switch (mode)
7281 {
7282 default:
7283 break;
7284
7285 case BLKmode:
7286 if (bytes < 0)
7287 break;
7288 /* FALLTHRU */
7289 case DImode:
7290 case SImode:
7291 case HImode:
7292 case QImode:
7293 if (words <= cum->nregs)
7294 {
7295 int regno = cum->regno;
7296
7297 /* Fastcall allocates the first two DWORD (SImode) or
7298 smaller arguments to ECX and EDX if it isn't an
7299 aggregate type . */
7300 if (cum->fastcall)
7301 {
7302 if (mode == BLKmode
7303 || mode == DImode
7304 || (type && AGGREGATE_TYPE_P (type)))
7305 break;
7306
7307 /* ECX not EAX is the first allocated register. */
7308 if (regno == AX_REG)
7309 regno = CX_REG;
7310 }
7311 return gen_rtx_REG (mode, regno);
7312 }
7313 break;
7314
7315 case DFmode:
7316 if (cum->float_in_sse < 2)
7317 break;
7318 case SFmode:
7319 if (cum->float_in_sse < 1)
7320 break;
7321 /* FALLTHRU */
7322 case TImode:
7323 /* In 32bit, we pass TImode in xmm registers. */
7324 case V16QImode:
7325 case V8HImode:
7326 case V4SImode:
7327 case V2DImode:
7328 case V4SFmode:
7329 case V2DFmode:
7330 if (!type || !AGGREGATE_TYPE_P (type))
7331 {
7332 if (cum->sse_nregs)
7333 return gen_reg_or_parallel (mode, orig_mode,
7334 cum->sse_regno + FIRST_SSE_REG);
7335 }
7336 break;
7337
7338 case OImode:
7339 case XImode:
7340 /* OImode and XImode shouldn't be used directly. */
7341 gcc_unreachable ();
7342
7343 case V64QImode:
7344 case V32HImode:
7345 case V16SImode:
7346 case V8DImode:
7347 case V16SFmode:
7348 case V8DFmode:
7349 case V8SFmode:
7350 case V8SImode:
7351 case V32QImode:
7352 case V16HImode:
7353 case V4DFmode:
7354 case V4DImode:
7355 if (!type || !AGGREGATE_TYPE_P (type))
7356 {
7357 if (cum->sse_nregs)
7358 return gen_reg_or_parallel (mode, orig_mode,
7359 cum->sse_regno + FIRST_SSE_REG);
7360 }
7361 break;
7362
7363 case V8QImode:
7364 case V4HImode:
7365 case V2SImode:
7366 case V2SFmode:
7367 case V1TImode:
7368 case V1DImode:
7369 if (!type || !AGGREGATE_TYPE_P (type))
7370 {
7371 if (cum->mmx_nregs)
7372 return gen_reg_or_parallel (mode, orig_mode,
7373 cum->mmx_regno + FIRST_MMX_REG);
7374 }
7375 break;
7376 }
7377
7378 return NULL_RTX;
7379 }
7380
7381 static rtx
7382 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7383 enum machine_mode orig_mode, const_tree type, bool named)
7384 {
7385 /* Handle a hidden AL argument containing number of registers
7386 for varargs x86-64 functions. */
7387 if (mode == VOIDmode)
7388 return GEN_INT (cum->maybe_vaarg
7389 ? (cum->sse_nregs < 0
7390 ? X86_64_SSE_REGPARM_MAX
7391 : cum->sse_regno)
7392 : -1);
7393
7394 switch (mode)
7395 {
7396 default:
7397 break;
7398
7399 case V8SFmode:
7400 case V8SImode:
7401 case V32QImode:
7402 case V16HImode:
7403 case V4DFmode:
7404 case V4DImode:
7405 case V16SFmode:
7406 case V16SImode:
7407 case V64QImode:
7408 case V32HImode:
7409 case V8DFmode:
7410 case V8DImode:
7411 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7412 if (!named)
7413 return NULL;
7414 break;
7415 }
7416
7417 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7418 cum->sse_nregs,
7419 &x86_64_int_parameter_registers [cum->regno],
7420 cum->sse_regno);
7421 }
7422
7423 static rtx
7424 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7425 enum machine_mode orig_mode, bool named,
7426 HOST_WIDE_INT bytes)
7427 {
7428 unsigned int regno;
7429
7430 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7431 We use value of -2 to specify that current function call is MSABI. */
7432 if (mode == VOIDmode)
7433 return GEN_INT (-2);
7434
7435 /* If we've run out of registers, it goes on the stack. */
7436 if (cum->nregs == 0)
7437 return NULL_RTX;
7438
7439 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7440
7441 /* Only floating point modes are passed in anything but integer regs. */
7442 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7443 {
7444 if (named)
7445 regno = cum->regno + FIRST_SSE_REG;
7446 else
7447 {
7448 rtx t1, t2;
7449
7450 /* Unnamed floating parameters are passed in both the
7451 SSE and integer registers. */
7452 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7453 t2 = gen_rtx_REG (mode, regno);
7454 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7455 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7456 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7457 }
7458 }
7459 /* Handle aggregated types passed in register. */
7460 if (orig_mode == BLKmode)
7461 {
7462 if (bytes > 0 && bytes <= 8)
7463 mode = (bytes > 4 ? DImode : SImode);
7464 if (mode == BLKmode)
7465 mode = DImode;
7466 }
7467
7468 return gen_reg_or_parallel (mode, orig_mode, regno);
7469 }
7470
7471 /* Return where to put the arguments to a function.
7472 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7473
7474 MODE is the argument's machine mode. TYPE is the data type of the
7475 argument. It is null for libcalls where that information may not be
7476 available. CUM gives information about the preceding args and about
7477 the function being called. NAMED is nonzero if this argument is a
7478 named parameter (otherwise it is an extra parameter matching an
7479 ellipsis). */
7480
7481 static rtx
7482 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
7483 const_tree type, bool named)
7484 {
7485 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7486 enum machine_mode mode = omode;
7487 HOST_WIDE_INT bytes, words;
7488 rtx arg;
7489
7490 if (mode == BLKmode)
7491 bytes = int_size_in_bytes (type);
7492 else
7493 bytes = GET_MODE_SIZE (mode);
7494 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7495
7496 /* To simplify the code below, represent vector types with a vector mode
7497 even if MMX/SSE are not active. */
7498 if (type && TREE_CODE (type) == VECTOR_TYPE)
7499 mode = type_natural_mode (type, cum, false);
7500
7501 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7502 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7503 else if (TARGET_64BIT)
7504 arg = function_arg_64 (cum, mode, omode, type, named);
7505 else
7506 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7507
7508 return arg;
7509 }
7510
7511 /* A C expression that indicates when an argument must be passed by
7512 reference. If nonzero for an argument, a copy of that argument is
7513 made in memory and a pointer to the argument is passed instead of
7514 the argument itself. The pointer is passed in whatever way is
7515 appropriate for passing a pointer to that type. */
7516
7517 static bool
7518 ix86_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
7519 const_tree type, bool named ATTRIBUTE_UNUSED)
7520 {
7521 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7522
7523 /* See Windows x64 Software Convention. */
7524 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7525 {
7526 int msize = (int) GET_MODE_SIZE (mode);
7527 if (type)
7528 {
7529 /* Arrays are passed by reference. */
7530 if (TREE_CODE (type) == ARRAY_TYPE)
7531 return true;
7532
7533 if (AGGREGATE_TYPE_P (type))
7534 {
7535 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7536 are passed by reference. */
7537 msize = int_size_in_bytes (type);
7538 }
7539 }
7540
7541 /* __m128 is passed by reference. */
7542 switch (msize) {
7543 case 1: case 2: case 4: case 8:
7544 break;
7545 default:
7546 return true;
7547 }
7548 }
7549 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7550 return 1;
7551
7552 return 0;
7553 }
7554
7555 /* Return true when TYPE should be 128bit aligned for 32bit argument
7556 passing ABI. XXX: This function is obsolete and is only used for
7557 checking psABI compatibility with previous versions of GCC. */
7558
7559 static bool
7560 ix86_compat_aligned_value_p (const_tree type)
7561 {
7562 enum machine_mode mode = TYPE_MODE (type);
7563 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7564 || mode == TDmode
7565 || mode == TFmode
7566 || mode == TCmode)
7567 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7568 return true;
7569 if (TYPE_ALIGN (type) < 128)
7570 return false;
7571
7572 if (AGGREGATE_TYPE_P (type))
7573 {
7574 /* Walk the aggregates recursively. */
7575 switch (TREE_CODE (type))
7576 {
7577 case RECORD_TYPE:
7578 case UNION_TYPE:
7579 case QUAL_UNION_TYPE:
7580 {
7581 tree field;
7582
7583 /* Walk all the structure fields. */
7584 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7585 {
7586 if (TREE_CODE (field) == FIELD_DECL
7587 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7588 return true;
7589 }
7590 break;
7591 }
7592
7593 case ARRAY_TYPE:
7594 /* Just for use if some languages passes arrays by value. */
7595 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7596 return true;
7597 break;
7598
7599 default:
7600 gcc_unreachable ();
7601 }
7602 }
7603 return false;
7604 }
7605
7606 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7607 XXX: This function is obsolete and is only used for checking psABI
7608 compatibility with previous versions of GCC. */
7609
7610 static unsigned int
7611 ix86_compat_function_arg_boundary (enum machine_mode mode,
7612 const_tree type, unsigned int align)
7613 {
7614 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7615 natural boundaries. */
7616 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7617 {
7618 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7619 make an exception for SSE modes since these require 128bit
7620 alignment.
7621
7622 The handling here differs from field_alignment. ICC aligns MMX
7623 arguments to 4 byte boundaries, while structure fields are aligned
7624 to 8 byte boundaries. */
7625 if (!type)
7626 {
7627 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7628 align = PARM_BOUNDARY;
7629 }
7630 else
7631 {
7632 if (!ix86_compat_aligned_value_p (type))
7633 align = PARM_BOUNDARY;
7634 }
7635 }
7636 if (align > BIGGEST_ALIGNMENT)
7637 align = BIGGEST_ALIGNMENT;
7638 return align;
7639 }
7640
7641 /* Return true when TYPE should be 128bit aligned for 32bit argument
7642 passing ABI. */
7643
7644 static bool
7645 ix86_contains_aligned_value_p (const_tree type)
7646 {
7647 enum machine_mode mode = TYPE_MODE (type);
7648
7649 if (mode == XFmode || mode == XCmode)
7650 return false;
7651
7652 if (TYPE_ALIGN (type) < 128)
7653 return false;
7654
7655 if (AGGREGATE_TYPE_P (type))
7656 {
7657 /* Walk the aggregates recursively. */
7658 switch (TREE_CODE (type))
7659 {
7660 case RECORD_TYPE:
7661 case UNION_TYPE:
7662 case QUAL_UNION_TYPE:
7663 {
7664 tree field;
7665
7666 /* Walk all the structure fields. */
7667 for (field = TYPE_FIELDS (type);
7668 field;
7669 field = DECL_CHAIN (field))
7670 {
7671 if (TREE_CODE (field) == FIELD_DECL
7672 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7673 return true;
7674 }
7675 break;
7676 }
7677
7678 case ARRAY_TYPE:
7679 /* Just for use if some languages passes arrays by value. */
7680 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7681 return true;
7682 break;
7683
7684 default:
7685 gcc_unreachable ();
7686 }
7687 }
7688 else
7689 return TYPE_ALIGN (type) >= 128;
7690
7691 return false;
7692 }
7693
7694 /* Gives the alignment boundary, in bits, of an argument with the
7695 specified mode and type. */
7696
7697 static unsigned int
7698 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7699 {
7700 unsigned int align;
7701 if (type)
7702 {
7703 /* Since the main variant type is used for call, we convert it to
7704 the main variant type. */
7705 type = TYPE_MAIN_VARIANT (type);
7706 align = TYPE_ALIGN (type);
7707 }
7708 else
7709 align = GET_MODE_ALIGNMENT (mode);
7710 if (align < PARM_BOUNDARY)
7711 align = PARM_BOUNDARY;
7712 else
7713 {
7714 static bool warned;
7715 unsigned int saved_align = align;
7716
7717 if (!TARGET_64BIT)
7718 {
7719 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7720 if (!type)
7721 {
7722 if (mode == XFmode || mode == XCmode)
7723 align = PARM_BOUNDARY;
7724 }
7725 else if (!ix86_contains_aligned_value_p (type))
7726 align = PARM_BOUNDARY;
7727
7728 if (align < 128)
7729 align = PARM_BOUNDARY;
7730 }
7731
7732 if (warn_psabi
7733 && !warned
7734 && align != ix86_compat_function_arg_boundary (mode, type,
7735 saved_align))
7736 {
7737 warned = true;
7738 inform (input_location,
7739 "The ABI for passing parameters with %d-byte"
7740 " alignment has changed in GCC 4.6",
7741 align / BITS_PER_UNIT);
7742 }
7743 }
7744
7745 return align;
7746 }
7747
7748 /* Return true if N is a possible register number of function value. */
7749
7750 static bool
7751 ix86_function_value_regno_p (const unsigned int regno)
7752 {
7753 switch (regno)
7754 {
7755 case AX_REG:
7756 case DX_REG:
7757 return true;
7758 case DI_REG:
7759 case SI_REG:
7760 return TARGET_64BIT && ix86_abi != MS_ABI;
7761
7762 /* Complex values are returned in %st(0)/%st(1) pair. */
7763 case ST0_REG:
7764 case ST1_REG:
7765 /* TODO: The function should depend on current function ABI but
7766 builtins.c would need updating then. Therefore we use the
7767 default ABI. */
7768 if (TARGET_64BIT && ix86_abi == MS_ABI)
7769 return false;
7770 return TARGET_FLOAT_RETURNS_IN_80387;
7771
7772 /* Complex values are returned in %xmm0/%xmm1 pair. */
7773 case XMM0_REG:
7774 case XMM1_REG:
7775 return TARGET_SSE;
7776
7777 case MM0_REG:
7778 if (TARGET_MACHO || TARGET_64BIT)
7779 return false;
7780 return TARGET_MMX;
7781 }
7782
7783 return false;
7784 }
7785
7786 /* Define how to find the value returned by a function.
7787 VALTYPE is the data type of the value (as a tree).
7788 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7789 otherwise, FUNC is 0. */
7790
7791 static rtx
7792 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7793 const_tree fntype, const_tree fn)
7794 {
7795 unsigned int regno;
7796
7797 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7798 we normally prevent this case when mmx is not available. However
7799 some ABIs may require the result to be returned like DImode. */
7800 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7801 regno = FIRST_MMX_REG;
7802
7803 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7804 we prevent this case when sse is not available. However some ABIs
7805 may require the result to be returned like integer TImode. */
7806 else if (mode == TImode
7807 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7808 regno = FIRST_SSE_REG;
7809
7810 /* 32-byte vector modes in %ymm0. */
7811 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7812 regno = FIRST_SSE_REG;
7813
7814 /* 64-byte vector modes in %zmm0. */
7815 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
7816 regno = FIRST_SSE_REG;
7817
7818 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7819 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7820 regno = FIRST_FLOAT_REG;
7821 else
7822 /* Most things go in %eax. */
7823 regno = AX_REG;
7824
7825 /* Override FP return register with %xmm0 for local functions when
7826 SSE math is enabled or for functions with sseregparm attribute. */
7827 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7828 {
7829 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7830 if ((sse_level >= 1 && mode == SFmode)
7831 || (sse_level == 2 && mode == DFmode))
7832 regno = FIRST_SSE_REG;
7833 }
7834
7835 /* OImode shouldn't be used directly. */
7836 gcc_assert (mode != OImode);
7837
7838 return gen_rtx_REG (orig_mode, regno);
7839 }
7840
7841 static rtx
7842 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7843 const_tree valtype)
7844 {
7845 rtx ret;
7846
7847 /* Handle libcalls, which don't provide a type node. */
7848 if (valtype == NULL)
7849 {
7850 unsigned int regno;
7851
7852 switch (mode)
7853 {
7854 case SFmode:
7855 case SCmode:
7856 case DFmode:
7857 case DCmode:
7858 case TFmode:
7859 case SDmode:
7860 case DDmode:
7861 case TDmode:
7862 regno = FIRST_SSE_REG;
7863 break;
7864 case XFmode:
7865 case XCmode:
7866 regno = FIRST_FLOAT_REG;
7867 break;
7868 case TCmode:
7869 return NULL;
7870 default:
7871 regno = AX_REG;
7872 }
7873
7874 return gen_rtx_REG (mode, regno);
7875 }
7876 else if (POINTER_TYPE_P (valtype))
7877 {
7878 /* Pointers are always returned in word_mode. */
7879 mode = word_mode;
7880 }
7881
7882 ret = construct_container (mode, orig_mode, valtype, 1,
7883 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7884 x86_64_int_return_registers, 0);
7885
7886 /* For zero sized structures, construct_container returns NULL, but we
7887 need to keep rest of compiler happy by returning meaningful value. */
7888 if (!ret)
7889 ret = gen_rtx_REG (orig_mode, AX_REG);
7890
7891 return ret;
7892 }
7893
7894 static rtx
7895 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode,
7896 const_tree valtype)
7897 {
7898 unsigned int regno = AX_REG;
7899
7900 if (TARGET_SSE)
7901 {
7902 switch (GET_MODE_SIZE (mode))
7903 {
7904 case 16:
7905 if (valtype != NULL_TREE
7906 && !VECTOR_INTEGER_TYPE_P (valtype)
7907 && !VECTOR_INTEGER_TYPE_P (valtype)
7908 && !INTEGRAL_TYPE_P (valtype)
7909 && !VECTOR_FLOAT_TYPE_P (valtype))
7910 break;
7911 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7912 && !COMPLEX_MODE_P (mode))
7913 regno = FIRST_SSE_REG;
7914 break;
7915 case 8:
7916 case 4:
7917 if (mode == SFmode || mode == DFmode)
7918 regno = FIRST_SSE_REG;
7919 break;
7920 default:
7921 break;
7922 }
7923 }
7924 return gen_rtx_REG (orig_mode, regno);
7925 }
7926
7927 static rtx
7928 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7929 enum machine_mode orig_mode, enum machine_mode mode)
7930 {
7931 const_tree fn, fntype;
7932
7933 fn = NULL_TREE;
7934 if (fntype_or_decl && DECL_P (fntype_or_decl))
7935 fn = fntype_or_decl;
7936 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7937
7938 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7939 return function_value_ms_64 (orig_mode, mode, valtype);
7940 else if (TARGET_64BIT)
7941 return function_value_64 (orig_mode, mode, valtype);
7942 else
7943 return function_value_32 (orig_mode, mode, fntype, fn);
7944 }
7945
7946 static rtx
7947 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7948 bool outgoing ATTRIBUTE_UNUSED)
7949 {
7950 enum machine_mode mode, orig_mode;
7951
7952 orig_mode = TYPE_MODE (valtype);
7953 mode = type_natural_mode (valtype, NULL, true);
7954 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7955 }
7956
7957 /* Pointer function arguments and return values are promoted to
7958 word_mode. */
7959
7960 static enum machine_mode
7961 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
7962 int *punsignedp, const_tree fntype,
7963 int for_return)
7964 {
7965 if (type != NULL_TREE && POINTER_TYPE_P (type))
7966 {
7967 *punsignedp = POINTERS_EXTEND_UNSIGNED;
7968 return word_mode;
7969 }
7970 return default_promote_function_mode (type, mode, punsignedp, fntype,
7971 for_return);
7972 }
7973
7974 /* Return true if a structure, union or array with MODE containing FIELD
7975 should be accessed using BLKmode. */
7976
7977 static bool
7978 ix86_member_type_forces_blk (const_tree field, enum machine_mode mode)
7979 {
7980 /* Union with XFmode must be in BLKmode. */
7981 return (mode == XFmode
7982 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
7983 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
7984 }
7985
7986 rtx
7987 ix86_libcall_value (enum machine_mode mode)
7988 {
7989 return ix86_function_value_1 (NULL, NULL, mode, mode);
7990 }
7991
7992 /* Return true iff type is returned in memory. */
7993
7994 static bool ATTRIBUTE_UNUSED
7995 return_in_memory_32 (const_tree type, enum machine_mode mode)
7996 {
7997 HOST_WIDE_INT size;
7998
7999 if (mode == BLKmode)
8000 return true;
8001
8002 size = int_size_in_bytes (type);
8003
8004 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8005 return false;
8006
8007 if (VECTOR_MODE_P (mode) || mode == TImode)
8008 {
8009 /* User-created vectors small enough to fit in EAX. */
8010 if (size < 8)
8011 return false;
8012
8013 /* MMX/3dNow values are returned in MM0,
8014 except when it doesn't exits or the ABI prescribes otherwise. */
8015 if (size == 8)
8016 return !TARGET_MMX || TARGET_VECT8_RETURNS;
8017
8018 /* SSE values are returned in XMM0, except when it doesn't exist. */
8019 if (size == 16)
8020 return !TARGET_SSE;
8021
8022 /* AVX values are returned in YMM0, except when it doesn't exist. */
8023 if (size == 32)
8024 return !TARGET_AVX;
8025
8026 /* AVX512F values are returned in ZMM0, except when it doesn't exist. */
8027 if (size == 64)
8028 return !TARGET_AVX512F;
8029 }
8030
8031 if (mode == XFmode)
8032 return false;
8033
8034 if (size > 12)
8035 return true;
8036
8037 /* OImode shouldn't be used directly. */
8038 gcc_assert (mode != OImode);
8039
8040 return false;
8041 }
8042
8043 static bool ATTRIBUTE_UNUSED
8044 return_in_memory_64 (const_tree type, enum machine_mode mode)
8045 {
8046 int needed_intregs, needed_sseregs;
8047 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
8048 }
8049
8050 static bool ATTRIBUTE_UNUSED
8051 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
8052 {
8053 HOST_WIDE_INT size = int_size_in_bytes (type);
8054
8055 /* __m128 is returned in xmm0. */
8056 if ((!type || VECTOR_INTEGER_TYPE_P (type) || INTEGRAL_TYPE_P (type)
8057 || VECTOR_FLOAT_TYPE_P (type))
8058 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8059 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
8060 return false;
8061
8062 /* Otherwise, the size must be exactly in [1248]. */
8063 return size != 1 && size != 2 && size != 4 && size != 8;
8064 }
8065
8066 static bool
8067 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8068 {
8069 #ifdef SUBTARGET_RETURN_IN_MEMORY
8070 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8071 #else
8072 const enum machine_mode mode = type_natural_mode (type, NULL, true);
8073
8074 if (TARGET_64BIT)
8075 {
8076 if (ix86_function_type_abi (fntype) == MS_ABI)
8077 return return_in_memory_ms_64 (type, mode);
8078 else
8079 return return_in_memory_64 (type, mode);
8080 }
8081 else
8082 return return_in_memory_32 (type, mode);
8083 #endif
8084 }
8085
8086 \f
8087 /* Create the va_list data type. */
8088
8089 /* Returns the calling convention specific va_list date type.
8090 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8091
8092 static tree
8093 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8094 {
8095 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8096
8097 /* For i386 we use plain pointer to argument area. */
8098 if (!TARGET_64BIT || abi == MS_ABI)
8099 return build_pointer_type (char_type_node);
8100
8101 record = lang_hooks.types.make_type (RECORD_TYPE);
8102 type_decl = build_decl (BUILTINS_LOCATION,
8103 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8104
8105 f_gpr = build_decl (BUILTINS_LOCATION,
8106 FIELD_DECL, get_identifier ("gp_offset"),
8107 unsigned_type_node);
8108 f_fpr = build_decl (BUILTINS_LOCATION,
8109 FIELD_DECL, get_identifier ("fp_offset"),
8110 unsigned_type_node);
8111 f_ovf = build_decl (BUILTINS_LOCATION,
8112 FIELD_DECL, get_identifier ("overflow_arg_area"),
8113 ptr_type_node);
8114 f_sav = build_decl (BUILTINS_LOCATION,
8115 FIELD_DECL, get_identifier ("reg_save_area"),
8116 ptr_type_node);
8117
8118 va_list_gpr_counter_field = f_gpr;
8119 va_list_fpr_counter_field = f_fpr;
8120
8121 DECL_FIELD_CONTEXT (f_gpr) = record;
8122 DECL_FIELD_CONTEXT (f_fpr) = record;
8123 DECL_FIELD_CONTEXT (f_ovf) = record;
8124 DECL_FIELD_CONTEXT (f_sav) = record;
8125
8126 TYPE_STUB_DECL (record) = type_decl;
8127 TYPE_NAME (record) = type_decl;
8128 TYPE_FIELDS (record) = f_gpr;
8129 DECL_CHAIN (f_gpr) = f_fpr;
8130 DECL_CHAIN (f_fpr) = f_ovf;
8131 DECL_CHAIN (f_ovf) = f_sav;
8132
8133 layout_type (record);
8134
8135 /* The correct type is an array type of one element. */
8136 return build_array_type (record, build_index_type (size_zero_node));
8137 }
8138
8139 /* Setup the builtin va_list data type and for 64-bit the additional
8140 calling convention specific va_list data types. */
8141
8142 static tree
8143 ix86_build_builtin_va_list (void)
8144 {
8145 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8146
8147 /* Initialize abi specific va_list builtin types. */
8148 if (TARGET_64BIT)
8149 {
8150 tree t;
8151 if (ix86_abi == MS_ABI)
8152 {
8153 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8154 if (TREE_CODE (t) != RECORD_TYPE)
8155 t = build_variant_type_copy (t);
8156 sysv_va_list_type_node = t;
8157 }
8158 else
8159 {
8160 t = ret;
8161 if (TREE_CODE (t) != RECORD_TYPE)
8162 t = build_variant_type_copy (t);
8163 sysv_va_list_type_node = t;
8164 }
8165 if (ix86_abi != MS_ABI)
8166 {
8167 t = ix86_build_builtin_va_list_abi (MS_ABI);
8168 if (TREE_CODE (t) != RECORD_TYPE)
8169 t = build_variant_type_copy (t);
8170 ms_va_list_type_node = t;
8171 }
8172 else
8173 {
8174 t = ret;
8175 if (TREE_CODE (t) != RECORD_TYPE)
8176 t = build_variant_type_copy (t);
8177 ms_va_list_type_node = t;
8178 }
8179 }
8180
8181 return ret;
8182 }
8183
8184 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8185
8186 static void
8187 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8188 {
8189 rtx save_area, mem;
8190 alias_set_type set;
8191 int i, max;
8192
8193 /* GPR size of varargs save area. */
8194 if (cfun->va_list_gpr_size)
8195 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8196 else
8197 ix86_varargs_gpr_size = 0;
8198
8199 /* FPR size of varargs save area. We don't need it if we don't pass
8200 anything in SSE registers. */
8201 if (TARGET_SSE && cfun->va_list_fpr_size)
8202 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8203 else
8204 ix86_varargs_fpr_size = 0;
8205
8206 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8207 return;
8208
8209 save_area = frame_pointer_rtx;
8210 set = get_varargs_alias_set ();
8211
8212 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8213 if (max > X86_64_REGPARM_MAX)
8214 max = X86_64_REGPARM_MAX;
8215
8216 for (i = cum->regno; i < max; i++)
8217 {
8218 mem = gen_rtx_MEM (word_mode,
8219 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8220 MEM_NOTRAP_P (mem) = 1;
8221 set_mem_alias_set (mem, set);
8222 emit_move_insn (mem,
8223 gen_rtx_REG (word_mode,
8224 x86_64_int_parameter_registers[i]));
8225 }
8226
8227 if (ix86_varargs_fpr_size)
8228 {
8229 enum machine_mode smode;
8230 rtx label, test;
8231
8232 /* Now emit code to save SSE registers. The AX parameter contains number
8233 of SSE parameter registers used to call this function, though all we
8234 actually check here is the zero/non-zero status. */
8235
8236 label = gen_label_rtx ();
8237 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8238 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8239 label));
8240
8241 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8242 we used movdqa (i.e. TImode) instead? Perhaps even better would
8243 be if we could determine the real mode of the data, via a hook
8244 into pass_stdarg. Ignore all that for now. */
8245 smode = V4SFmode;
8246 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8247 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8248
8249 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8250 if (max > X86_64_SSE_REGPARM_MAX)
8251 max = X86_64_SSE_REGPARM_MAX;
8252
8253 for (i = cum->sse_regno; i < max; ++i)
8254 {
8255 mem = plus_constant (Pmode, save_area,
8256 i * 16 + ix86_varargs_gpr_size);
8257 mem = gen_rtx_MEM (smode, mem);
8258 MEM_NOTRAP_P (mem) = 1;
8259 set_mem_alias_set (mem, set);
8260 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8261
8262 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8263 }
8264
8265 emit_label (label);
8266 }
8267 }
8268
8269 static void
8270 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8271 {
8272 alias_set_type set = get_varargs_alias_set ();
8273 int i;
8274
8275 /* Reset to zero, as there might be a sysv vaarg used
8276 before. */
8277 ix86_varargs_gpr_size = 0;
8278 ix86_varargs_fpr_size = 0;
8279
8280 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8281 {
8282 rtx reg, mem;
8283
8284 mem = gen_rtx_MEM (Pmode,
8285 plus_constant (Pmode, virtual_incoming_args_rtx,
8286 i * UNITS_PER_WORD));
8287 MEM_NOTRAP_P (mem) = 1;
8288 set_mem_alias_set (mem, set);
8289
8290 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8291 emit_move_insn (mem, reg);
8292 }
8293 }
8294
8295 static void
8296 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
8297 tree type, int *pretend_size ATTRIBUTE_UNUSED,
8298 int no_rtl)
8299 {
8300 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8301 CUMULATIVE_ARGS next_cum;
8302 tree fntype;
8303
8304 /* This argument doesn't appear to be used anymore. Which is good,
8305 because the old code here didn't suppress rtl generation. */
8306 gcc_assert (!no_rtl);
8307
8308 if (!TARGET_64BIT)
8309 return;
8310
8311 fntype = TREE_TYPE (current_function_decl);
8312
8313 /* For varargs, we do not want to skip the dummy va_dcl argument.
8314 For stdargs, we do want to skip the last named argument. */
8315 next_cum = *cum;
8316 if (stdarg_p (fntype))
8317 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8318 true);
8319
8320 if (cum->call_abi == MS_ABI)
8321 setup_incoming_varargs_ms_64 (&next_cum);
8322 else
8323 setup_incoming_varargs_64 (&next_cum);
8324 }
8325
8326 /* Checks if TYPE is of kind va_list char *. */
8327
8328 static bool
8329 is_va_list_char_pointer (tree type)
8330 {
8331 tree canonic;
8332
8333 /* For 32-bit it is always true. */
8334 if (!TARGET_64BIT)
8335 return true;
8336 canonic = ix86_canonical_va_list_type (type);
8337 return (canonic == ms_va_list_type_node
8338 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8339 }
8340
8341 /* Implement va_start. */
8342
8343 static void
8344 ix86_va_start (tree valist, rtx nextarg)
8345 {
8346 HOST_WIDE_INT words, n_gpr, n_fpr;
8347 tree f_gpr, f_fpr, f_ovf, f_sav;
8348 tree gpr, fpr, ovf, sav, t;
8349 tree type;
8350 rtx ovf_rtx;
8351
8352 if (flag_split_stack
8353 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8354 {
8355 unsigned int scratch_regno;
8356
8357 /* When we are splitting the stack, we can't refer to the stack
8358 arguments using internal_arg_pointer, because they may be on
8359 the old stack. The split stack prologue will arrange to
8360 leave a pointer to the old stack arguments in a scratch
8361 register, which we here copy to a pseudo-register. The split
8362 stack prologue can't set the pseudo-register directly because
8363 it (the prologue) runs before any registers have been saved. */
8364
8365 scratch_regno = split_stack_prologue_scratch_regno ();
8366 if (scratch_regno != INVALID_REGNUM)
8367 {
8368 rtx reg, seq;
8369
8370 reg = gen_reg_rtx (Pmode);
8371 cfun->machine->split_stack_varargs_pointer = reg;
8372
8373 start_sequence ();
8374 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8375 seq = get_insns ();
8376 end_sequence ();
8377
8378 push_topmost_sequence ();
8379 emit_insn_after (seq, entry_of_function ());
8380 pop_topmost_sequence ();
8381 }
8382 }
8383
8384 /* Only 64bit target needs something special. */
8385 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8386 {
8387 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8388 std_expand_builtin_va_start (valist, nextarg);
8389 else
8390 {
8391 rtx va_r, next;
8392
8393 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8394 next = expand_binop (ptr_mode, add_optab,
8395 cfun->machine->split_stack_varargs_pointer,
8396 crtl->args.arg_offset_rtx,
8397 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8398 convert_move (va_r, next, 0);
8399 }
8400 return;
8401 }
8402
8403 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8404 f_fpr = DECL_CHAIN (f_gpr);
8405 f_ovf = DECL_CHAIN (f_fpr);
8406 f_sav = DECL_CHAIN (f_ovf);
8407
8408 valist = build_simple_mem_ref (valist);
8409 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8410 /* The following should be folded into the MEM_REF offset. */
8411 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8412 f_gpr, NULL_TREE);
8413 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8414 f_fpr, NULL_TREE);
8415 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8416 f_ovf, NULL_TREE);
8417 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8418 f_sav, NULL_TREE);
8419
8420 /* Count number of gp and fp argument registers used. */
8421 words = crtl->args.info.words;
8422 n_gpr = crtl->args.info.regno;
8423 n_fpr = crtl->args.info.sse_regno;
8424
8425 if (cfun->va_list_gpr_size)
8426 {
8427 type = TREE_TYPE (gpr);
8428 t = build2 (MODIFY_EXPR, type,
8429 gpr, build_int_cst (type, n_gpr * 8));
8430 TREE_SIDE_EFFECTS (t) = 1;
8431 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8432 }
8433
8434 if (TARGET_SSE && cfun->va_list_fpr_size)
8435 {
8436 type = TREE_TYPE (fpr);
8437 t = build2 (MODIFY_EXPR, type, fpr,
8438 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8439 TREE_SIDE_EFFECTS (t) = 1;
8440 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8441 }
8442
8443 /* Find the overflow area. */
8444 type = TREE_TYPE (ovf);
8445 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8446 ovf_rtx = crtl->args.internal_arg_pointer;
8447 else
8448 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8449 t = make_tree (type, ovf_rtx);
8450 if (words != 0)
8451 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8452 t = build2 (MODIFY_EXPR, type, ovf, t);
8453 TREE_SIDE_EFFECTS (t) = 1;
8454 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8455
8456 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8457 {
8458 /* Find the register save area.
8459 Prologue of the function save it right above stack frame. */
8460 type = TREE_TYPE (sav);
8461 t = make_tree (type, frame_pointer_rtx);
8462 if (!ix86_varargs_gpr_size)
8463 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8464 t = build2 (MODIFY_EXPR, type, sav, t);
8465 TREE_SIDE_EFFECTS (t) = 1;
8466 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8467 }
8468 }
8469
8470 /* Implement va_arg. */
8471
8472 static tree
8473 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8474 gimple_seq *post_p)
8475 {
8476 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8477 tree f_gpr, f_fpr, f_ovf, f_sav;
8478 tree gpr, fpr, ovf, sav, t;
8479 int size, rsize;
8480 tree lab_false, lab_over = NULL_TREE;
8481 tree addr, t2;
8482 rtx container;
8483 int indirect_p = 0;
8484 tree ptrtype;
8485 enum machine_mode nat_mode;
8486 unsigned int arg_boundary;
8487
8488 /* Only 64bit target needs something special. */
8489 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8490 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8491
8492 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8493 f_fpr = DECL_CHAIN (f_gpr);
8494 f_ovf = DECL_CHAIN (f_fpr);
8495 f_sav = DECL_CHAIN (f_ovf);
8496
8497 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8498 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8499 valist = build_va_arg_indirect_ref (valist);
8500 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8501 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8502 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8503
8504 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8505 if (indirect_p)
8506 type = build_pointer_type (type);
8507 size = int_size_in_bytes (type);
8508 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8509
8510 nat_mode = type_natural_mode (type, NULL, false);
8511 switch (nat_mode)
8512 {
8513 case V8SFmode:
8514 case V8SImode:
8515 case V32QImode:
8516 case V16HImode:
8517 case V4DFmode:
8518 case V4DImode:
8519 case V16SFmode:
8520 case V16SImode:
8521 case V64QImode:
8522 case V32HImode:
8523 case V8DFmode:
8524 case V8DImode:
8525 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8526 if (!TARGET_64BIT_MS_ABI)
8527 {
8528 container = NULL;
8529 break;
8530 }
8531
8532 default:
8533 container = construct_container (nat_mode, TYPE_MODE (type),
8534 type, 0, X86_64_REGPARM_MAX,
8535 X86_64_SSE_REGPARM_MAX, intreg,
8536 0);
8537 break;
8538 }
8539
8540 /* Pull the value out of the saved registers. */
8541
8542 addr = create_tmp_var (ptr_type_node, "addr");
8543
8544 if (container)
8545 {
8546 int needed_intregs, needed_sseregs;
8547 bool need_temp;
8548 tree int_addr, sse_addr;
8549
8550 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8551 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8552
8553 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8554
8555 need_temp = (!REG_P (container)
8556 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8557 || TYPE_ALIGN (type) > 128));
8558
8559 /* In case we are passing structure, verify that it is consecutive block
8560 on the register save area. If not we need to do moves. */
8561 if (!need_temp && !REG_P (container))
8562 {
8563 /* Verify that all registers are strictly consecutive */
8564 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8565 {
8566 int i;
8567
8568 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8569 {
8570 rtx slot = XVECEXP (container, 0, i);
8571 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8572 || INTVAL (XEXP (slot, 1)) != i * 16)
8573 need_temp = 1;
8574 }
8575 }
8576 else
8577 {
8578 int i;
8579
8580 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8581 {
8582 rtx slot = XVECEXP (container, 0, i);
8583 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8584 || INTVAL (XEXP (slot, 1)) != i * 8)
8585 need_temp = 1;
8586 }
8587 }
8588 }
8589 if (!need_temp)
8590 {
8591 int_addr = addr;
8592 sse_addr = addr;
8593 }
8594 else
8595 {
8596 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8597 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8598 }
8599
8600 /* First ensure that we fit completely in registers. */
8601 if (needed_intregs)
8602 {
8603 t = build_int_cst (TREE_TYPE (gpr),
8604 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8605 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8606 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8607 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8608 gimplify_and_add (t, pre_p);
8609 }
8610 if (needed_sseregs)
8611 {
8612 t = build_int_cst (TREE_TYPE (fpr),
8613 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8614 + X86_64_REGPARM_MAX * 8);
8615 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8616 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8617 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8618 gimplify_and_add (t, pre_p);
8619 }
8620
8621 /* Compute index to start of area used for integer regs. */
8622 if (needed_intregs)
8623 {
8624 /* int_addr = gpr + sav; */
8625 t = fold_build_pointer_plus (sav, gpr);
8626 gimplify_assign (int_addr, t, pre_p);
8627 }
8628 if (needed_sseregs)
8629 {
8630 /* sse_addr = fpr + sav; */
8631 t = fold_build_pointer_plus (sav, fpr);
8632 gimplify_assign (sse_addr, t, pre_p);
8633 }
8634 if (need_temp)
8635 {
8636 int i, prev_size = 0;
8637 tree temp = create_tmp_var (type, "va_arg_tmp");
8638
8639 /* addr = &temp; */
8640 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8641 gimplify_assign (addr, t, pre_p);
8642
8643 for (i = 0; i < XVECLEN (container, 0); i++)
8644 {
8645 rtx slot = XVECEXP (container, 0, i);
8646 rtx reg = XEXP (slot, 0);
8647 enum machine_mode mode = GET_MODE (reg);
8648 tree piece_type;
8649 tree addr_type;
8650 tree daddr_type;
8651 tree src_addr, src;
8652 int src_offset;
8653 tree dest_addr, dest;
8654 int cur_size = GET_MODE_SIZE (mode);
8655
8656 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8657 prev_size = INTVAL (XEXP (slot, 1));
8658 if (prev_size + cur_size > size)
8659 {
8660 cur_size = size - prev_size;
8661 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8662 if (mode == BLKmode)
8663 mode = QImode;
8664 }
8665 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8666 if (mode == GET_MODE (reg))
8667 addr_type = build_pointer_type (piece_type);
8668 else
8669 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8670 true);
8671 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8672 true);
8673
8674 if (SSE_REGNO_P (REGNO (reg)))
8675 {
8676 src_addr = sse_addr;
8677 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8678 }
8679 else
8680 {
8681 src_addr = int_addr;
8682 src_offset = REGNO (reg) * 8;
8683 }
8684 src_addr = fold_convert (addr_type, src_addr);
8685 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
8686
8687 dest_addr = fold_convert (daddr_type, addr);
8688 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
8689 if (cur_size == GET_MODE_SIZE (mode))
8690 {
8691 src = build_va_arg_indirect_ref (src_addr);
8692 dest = build_va_arg_indirect_ref (dest_addr);
8693
8694 gimplify_assign (dest, src, pre_p);
8695 }
8696 else
8697 {
8698 tree copy
8699 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8700 3, dest_addr, src_addr,
8701 size_int (cur_size));
8702 gimplify_and_add (copy, pre_p);
8703 }
8704 prev_size += cur_size;
8705 }
8706 }
8707
8708 if (needed_intregs)
8709 {
8710 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8711 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8712 gimplify_assign (gpr, t, pre_p);
8713 }
8714
8715 if (needed_sseregs)
8716 {
8717 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8718 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8719 gimplify_assign (fpr, t, pre_p);
8720 }
8721
8722 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8723
8724 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8725 }
8726
8727 /* ... otherwise out of the overflow area. */
8728
8729 /* When we align parameter on stack for caller, if the parameter
8730 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8731 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8732 here with caller. */
8733 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8734 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8735 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8736
8737 /* Care for on-stack alignment if needed. */
8738 if (arg_boundary <= 64 || size == 0)
8739 t = ovf;
8740 else
8741 {
8742 HOST_WIDE_INT align = arg_boundary / 8;
8743 t = fold_build_pointer_plus_hwi (ovf, align - 1);
8744 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8745 build_int_cst (TREE_TYPE (t), -align));
8746 }
8747
8748 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8749 gimplify_assign (addr, t, pre_p);
8750
8751 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
8752 gimplify_assign (unshare_expr (ovf), t, pre_p);
8753
8754 if (container)
8755 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8756
8757 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8758 addr = fold_convert (ptrtype, addr);
8759
8760 if (indirect_p)
8761 addr = build_va_arg_indirect_ref (addr);
8762 return build_va_arg_indirect_ref (addr);
8763 }
8764 \f
8765 /* Return true if OPNUM's MEM should be matched
8766 in movabs* patterns. */
8767
8768 bool
8769 ix86_check_movabs (rtx insn, int opnum)
8770 {
8771 rtx set, mem;
8772
8773 set = PATTERN (insn);
8774 if (GET_CODE (set) == PARALLEL)
8775 set = XVECEXP (set, 0, 0);
8776 gcc_assert (GET_CODE (set) == SET);
8777 mem = XEXP (set, opnum);
8778 while (GET_CODE (mem) == SUBREG)
8779 mem = SUBREG_REG (mem);
8780 gcc_assert (MEM_P (mem));
8781 return volatile_ok || !MEM_VOLATILE_P (mem);
8782 }
8783 \f
8784 /* Initialize the table of extra 80387 mathematical constants. */
8785
8786 static void
8787 init_ext_80387_constants (void)
8788 {
8789 static const char * cst[5] =
8790 {
8791 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8792 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8793 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8794 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8795 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8796 };
8797 int i;
8798
8799 for (i = 0; i < 5; i++)
8800 {
8801 real_from_string (&ext_80387_constants_table[i], cst[i]);
8802 /* Ensure each constant is rounded to XFmode precision. */
8803 real_convert (&ext_80387_constants_table[i],
8804 XFmode, &ext_80387_constants_table[i]);
8805 }
8806
8807 ext_80387_constants_init = 1;
8808 }
8809
8810 /* Return non-zero if the constant is something that
8811 can be loaded with a special instruction. */
8812
8813 int
8814 standard_80387_constant_p (rtx x)
8815 {
8816 enum machine_mode mode = GET_MODE (x);
8817
8818 REAL_VALUE_TYPE r;
8819
8820 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8821 return -1;
8822
8823 if (x == CONST0_RTX (mode))
8824 return 1;
8825 if (x == CONST1_RTX (mode))
8826 return 2;
8827
8828 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8829
8830 /* For XFmode constants, try to find a special 80387 instruction when
8831 optimizing for size or on those CPUs that benefit from them. */
8832 if (mode == XFmode
8833 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8834 {
8835 int i;
8836
8837 if (! ext_80387_constants_init)
8838 init_ext_80387_constants ();
8839
8840 for (i = 0; i < 5; i++)
8841 if (real_identical (&r, &ext_80387_constants_table[i]))
8842 return i + 3;
8843 }
8844
8845 /* Load of the constant -0.0 or -1.0 will be split as
8846 fldz;fchs or fld1;fchs sequence. */
8847 if (real_isnegzero (&r))
8848 return 8;
8849 if (real_identical (&r, &dconstm1))
8850 return 9;
8851
8852 return 0;
8853 }
8854
8855 /* Return the opcode of the special instruction to be used to load
8856 the constant X. */
8857
8858 const char *
8859 standard_80387_constant_opcode (rtx x)
8860 {
8861 switch (standard_80387_constant_p (x))
8862 {
8863 case 1:
8864 return "fldz";
8865 case 2:
8866 return "fld1";
8867 case 3:
8868 return "fldlg2";
8869 case 4:
8870 return "fldln2";
8871 case 5:
8872 return "fldl2e";
8873 case 6:
8874 return "fldl2t";
8875 case 7:
8876 return "fldpi";
8877 case 8:
8878 case 9:
8879 return "#";
8880 default:
8881 gcc_unreachable ();
8882 }
8883 }
8884
8885 /* Return the CONST_DOUBLE representing the 80387 constant that is
8886 loaded by the specified special instruction. The argument IDX
8887 matches the return value from standard_80387_constant_p. */
8888
8889 rtx
8890 standard_80387_constant_rtx (int idx)
8891 {
8892 int i;
8893
8894 if (! ext_80387_constants_init)
8895 init_ext_80387_constants ();
8896
8897 switch (idx)
8898 {
8899 case 3:
8900 case 4:
8901 case 5:
8902 case 6:
8903 case 7:
8904 i = idx - 3;
8905 break;
8906
8907 default:
8908 gcc_unreachable ();
8909 }
8910
8911 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8912 XFmode);
8913 }
8914
8915 /* Return 1 if X is all 0s and 2 if x is all 1s
8916 in supported SSE/AVX vector mode. */
8917
8918 int
8919 standard_sse_constant_p (rtx x)
8920 {
8921 enum machine_mode mode = GET_MODE (x);
8922
8923 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8924 return 1;
8925 if (vector_all_ones_operand (x, mode))
8926 switch (mode)
8927 {
8928 case V16QImode:
8929 case V8HImode:
8930 case V4SImode:
8931 case V2DImode:
8932 if (TARGET_SSE2)
8933 return 2;
8934 case V32QImode:
8935 case V16HImode:
8936 case V8SImode:
8937 case V4DImode:
8938 if (TARGET_AVX2)
8939 return 2;
8940 case V64QImode:
8941 case V32HImode:
8942 case V16SImode:
8943 case V8DImode:
8944 if (TARGET_AVX512F)
8945 return 2;
8946 default:
8947 break;
8948 }
8949
8950 return 0;
8951 }
8952
8953 /* Return the opcode of the special instruction to be used to load
8954 the constant X. */
8955
8956 const char *
8957 standard_sse_constant_opcode (rtx insn, rtx x)
8958 {
8959 switch (standard_sse_constant_p (x))
8960 {
8961 case 1:
8962 switch (get_attr_mode (insn))
8963 {
8964 case MODE_XI:
8965 case MODE_V16SF:
8966 return "vpxord\t%g0, %g0, %g0";
8967 case MODE_V8DF:
8968 return "vpxorq\t%g0, %g0, %g0";
8969 case MODE_TI:
8970 return "%vpxor\t%0, %d0";
8971 case MODE_V2DF:
8972 return "%vxorpd\t%0, %d0";
8973 case MODE_V4SF:
8974 return "%vxorps\t%0, %d0";
8975
8976 case MODE_OI:
8977 return "vpxor\t%x0, %x0, %x0";
8978 case MODE_V4DF:
8979 return "vxorpd\t%x0, %x0, %x0";
8980 case MODE_V8SF:
8981 return "vxorps\t%x0, %x0, %x0";
8982
8983 default:
8984 break;
8985 }
8986
8987 case 2:
8988 if (get_attr_mode (insn) == MODE_XI
8989 || get_attr_mode (insn) == MODE_V8DF
8990 || get_attr_mode (insn) == MODE_V16SF)
8991 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
8992 if (TARGET_AVX)
8993 return "vpcmpeqd\t%0, %0, %0";
8994 else
8995 return "pcmpeqd\t%0, %0";
8996
8997 default:
8998 break;
8999 }
9000 gcc_unreachable ();
9001 }
9002
9003 /* Returns true if OP contains a symbol reference */
9004
9005 bool
9006 symbolic_reference_mentioned_p (rtx op)
9007 {
9008 const char *fmt;
9009 int i;
9010
9011 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9012 return true;
9013
9014 fmt = GET_RTX_FORMAT (GET_CODE (op));
9015 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9016 {
9017 if (fmt[i] == 'E')
9018 {
9019 int j;
9020
9021 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9022 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9023 return true;
9024 }
9025
9026 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9027 return true;
9028 }
9029
9030 return false;
9031 }
9032
9033 /* Return true if it is appropriate to emit `ret' instructions in the
9034 body of a function. Do this only if the epilogue is simple, needing a
9035 couple of insns. Prior to reloading, we can't tell how many registers
9036 must be saved, so return false then. Return false if there is no frame
9037 marker to de-allocate. */
9038
9039 bool
9040 ix86_can_use_return_insn_p (void)
9041 {
9042 struct ix86_frame frame;
9043
9044 if (! reload_completed || frame_pointer_needed)
9045 return 0;
9046
9047 /* Don't allow more than 32k pop, since that's all we can do
9048 with one instruction. */
9049 if (crtl->args.pops_args && crtl->args.size >= 32768)
9050 return 0;
9051
9052 ix86_compute_frame_layout (&frame);
9053 return (frame.stack_pointer_offset == UNITS_PER_WORD
9054 && (frame.nregs + frame.nsseregs) == 0);
9055 }
9056 \f
9057 /* Value should be nonzero if functions must have frame pointers.
9058 Zero means the frame pointer need not be set up (and parms may
9059 be accessed via the stack pointer) in functions that seem suitable. */
9060
9061 static bool
9062 ix86_frame_pointer_required (void)
9063 {
9064 /* If we accessed previous frames, then the generated code expects
9065 to be able to access the saved ebp value in our frame. */
9066 if (cfun->machine->accesses_prev_frame)
9067 return true;
9068
9069 /* Several x86 os'es need a frame pointer for other reasons,
9070 usually pertaining to setjmp. */
9071 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9072 return true;
9073
9074 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9075 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9076 return true;
9077
9078 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9079 allocation is 4GB. */
9080 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9081 return true;
9082
9083 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9084 turns off the frame pointer by default. Turn it back on now if
9085 we've not got a leaf function. */
9086 if (TARGET_OMIT_LEAF_FRAME_POINTER
9087 && (!crtl->is_leaf
9088 || ix86_current_function_calls_tls_descriptor))
9089 return true;
9090
9091 if (crtl->profile && !flag_fentry)
9092 return true;
9093
9094 return false;
9095 }
9096
9097 /* Record that the current function accesses previous call frames. */
9098
9099 void
9100 ix86_setup_frame_addresses (void)
9101 {
9102 cfun->machine->accesses_prev_frame = 1;
9103 }
9104 \f
9105 #ifndef USE_HIDDEN_LINKONCE
9106 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9107 # define USE_HIDDEN_LINKONCE 1
9108 # else
9109 # define USE_HIDDEN_LINKONCE 0
9110 # endif
9111 #endif
9112
9113 static int pic_labels_used;
9114
9115 /* Fills in the label name that should be used for a pc thunk for
9116 the given register. */
9117
9118 static void
9119 get_pc_thunk_name (char name[32], unsigned int regno)
9120 {
9121 gcc_assert (!TARGET_64BIT);
9122
9123 if (USE_HIDDEN_LINKONCE)
9124 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9125 else
9126 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9127 }
9128
9129
9130 /* This function generates code for -fpic that loads %ebx with
9131 the return address of the caller and then returns. */
9132
9133 static void
9134 ix86_code_end (void)
9135 {
9136 rtx xops[2];
9137 int regno;
9138
9139 for (regno = AX_REG; regno <= SP_REG; regno++)
9140 {
9141 char name[32];
9142 tree decl;
9143
9144 if (!(pic_labels_used & (1 << regno)))
9145 continue;
9146
9147 get_pc_thunk_name (name, regno);
9148
9149 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9150 get_identifier (name),
9151 build_function_type_list (void_type_node, NULL_TREE));
9152 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9153 NULL_TREE, void_type_node);
9154 TREE_PUBLIC (decl) = 1;
9155 TREE_STATIC (decl) = 1;
9156 DECL_IGNORED_P (decl) = 1;
9157
9158 #if TARGET_MACHO
9159 if (TARGET_MACHO)
9160 {
9161 switch_to_section (darwin_sections[text_coal_section]);
9162 fputs ("\t.weak_definition\t", asm_out_file);
9163 assemble_name (asm_out_file, name);
9164 fputs ("\n\t.private_extern\t", asm_out_file);
9165 assemble_name (asm_out_file, name);
9166 putc ('\n', asm_out_file);
9167 ASM_OUTPUT_LABEL (asm_out_file, name);
9168 DECL_WEAK (decl) = 1;
9169 }
9170 else
9171 #endif
9172 if (USE_HIDDEN_LINKONCE)
9173 {
9174 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
9175
9176 targetm.asm_out.unique_section (decl, 0);
9177 switch_to_section (get_named_section (decl, NULL, 0));
9178
9179 targetm.asm_out.globalize_label (asm_out_file, name);
9180 fputs ("\t.hidden\t", asm_out_file);
9181 assemble_name (asm_out_file, name);
9182 putc ('\n', asm_out_file);
9183 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9184 }
9185 else
9186 {
9187 switch_to_section (text_section);
9188 ASM_OUTPUT_LABEL (asm_out_file, name);
9189 }
9190
9191 DECL_INITIAL (decl) = make_node (BLOCK);
9192 current_function_decl = decl;
9193 init_function_start (decl);
9194 first_function_block_is_cold = false;
9195 /* Make sure unwind info is emitted for the thunk if needed. */
9196 final_start_function (emit_barrier (), asm_out_file, 1);
9197
9198 /* Pad stack IP move with 4 instructions (two NOPs count
9199 as one instruction). */
9200 if (TARGET_PAD_SHORT_FUNCTION)
9201 {
9202 int i = 8;
9203
9204 while (i--)
9205 fputs ("\tnop\n", asm_out_file);
9206 }
9207
9208 xops[0] = gen_rtx_REG (Pmode, regno);
9209 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9210 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9211 fputs ("\tret\n", asm_out_file);
9212 final_end_function ();
9213 init_insn_lengths ();
9214 free_after_compilation (cfun);
9215 set_cfun (NULL);
9216 current_function_decl = NULL;
9217 }
9218
9219 if (flag_split_stack)
9220 file_end_indicate_split_stack ();
9221 }
9222
9223 /* Emit code for the SET_GOT patterns. */
9224
9225 const char *
9226 output_set_got (rtx dest, rtx label)
9227 {
9228 rtx xops[3];
9229
9230 xops[0] = dest;
9231
9232 if (TARGET_VXWORKS_RTP && flag_pic)
9233 {
9234 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9235 xops[2] = gen_rtx_MEM (Pmode,
9236 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9237 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9238
9239 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9240 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9241 an unadorned address. */
9242 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9243 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9244 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9245 return "";
9246 }
9247
9248 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9249
9250 if (!flag_pic)
9251 {
9252 if (TARGET_MACHO)
9253 /* We don't need a pic base, we're not producing pic. */
9254 gcc_unreachable ();
9255
9256 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9257 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9258 targetm.asm_out.internal_label (asm_out_file, "L",
9259 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9260 }
9261 else
9262 {
9263 char name[32];
9264 get_pc_thunk_name (name, REGNO (dest));
9265 pic_labels_used |= 1 << REGNO (dest);
9266
9267 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9268 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9269 output_asm_insn ("call\t%X2", xops);
9270
9271 #if TARGET_MACHO
9272 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9273 This is what will be referenced by the Mach-O PIC subsystem. */
9274 if (machopic_should_output_picbase_label () || !label)
9275 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9276
9277 /* When we are restoring the pic base at the site of a nonlocal label,
9278 and we decided to emit the pic base above, we will still output a
9279 local label used for calculating the correction offset (even though
9280 the offset will be 0 in that case). */
9281 if (label)
9282 targetm.asm_out.internal_label (asm_out_file, "L",
9283 CODE_LABEL_NUMBER (label));
9284 #endif
9285 }
9286
9287 if (!TARGET_MACHO)
9288 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9289
9290 return "";
9291 }
9292
9293 /* Generate an "push" pattern for input ARG. */
9294
9295 static rtx
9296 gen_push (rtx arg)
9297 {
9298 struct machine_function *m = cfun->machine;
9299
9300 if (m->fs.cfa_reg == stack_pointer_rtx)
9301 m->fs.cfa_offset += UNITS_PER_WORD;
9302 m->fs.sp_offset += UNITS_PER_WORD;
9303
9304 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9305 arg = gen_rtx_REG (word_mode, REGNO (arg));
9306
9307 return gen_rtx_SET (VOIDmode,
9308 gen_rtx_MEM (word_mode,
9309 gen_rtx_PRE_DEC (Pmode,
9310 stack_pointer_rtx)),
9311 arg);
9312 }
9313
9314 /* Generate an "pop" pattern for input ARG. */
9315
9316 static rtx
9317 gen_pop (rtx arg)
9318 {
9319 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9320 arg = gen_rtx_REG (word_mode, REGNO (arg));
9321
9322 return gen_rtx_SET (VOIDmode,
9323 arg,
9324 gen_rtx_MEM (word_mode,
9325 gen_rtx_POST_INC (Pmode,
9326 stack_pointer_rtx)));
9327 }
9328
9329 /* Return >= 0 if there is an unused call-clobbered register available
9330 for the entire function. */
9331
9332 static unsigned int
9333 ix86_select_alt_pic_regnum (void)
9334 {
9335 if (crtl->is_leaf
9336 && !crtl->profile
9337 && !ix86_current_function_calls_tls_descriptor)
9338 {
9339 int i, drap;
9340 /* Can't use the same register for both PIC and DRAP. */
9341 if (crtl->drap_reg)
9342 drap = REGNO (crtl->drap_reg);
9343 else
9344 drap = -1;
9345 for (i = 2; i >= 0; --i)
9346 if (i != drap && !df_regs_ever_live_p (i))
9347 return i;
9348 }
9349
9350 return INVALID_REGNUM;
9351 }
9352
9353 /* Return TRUE if we need to save REGNO. */
9354
9355 static bool
9356 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9357 {
9358 if (pic_offset_table_rtx
9359 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
9360 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9361 || crtl->profile
9362 || crtl->calls_eh_return
9363 || crtl->uses_const_pool
9364 || cfun->has_nonlocal_label))
9365 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9366
9367 if (crtl->calls_eh_return && maybe_eh_return)
9368 {
9369 unsigned i;
9370 for (i = 0; ; i++)
9371 {
9372 unsigned test = EH_RETURN_DATA_REGNO (i);
9373 if (test == INVALID_REGNUM)
9374 break;
9375 if (test == regno)
9376 return true;
9377 }
9378 }
9379
9380 if (crtl->drap_reg
9381 && regno == REGNO (crtl->drap_reg)
9382 && !cfun->machine->no_drap_save_restore)
9383 return true;
9384
9385 return (df_regs_ever_live_p (regno)
9386 && !call_used_regs[regno]
9387 && !fixed_regs[regno]
9388 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9389 }
9390
9391 /* Return number of saved general prupose registers. */
9392
9393 static int
9394 ix86_nsaved_regs (void)
9395 {
9396 int nregs = 0;
9397 int regno;
9398
9399 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9400 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9401 nregs ++;
9402 return nregs;
9403 }
9404
9405 /* Return number of saved SSE registrers. */
9406
9407 static int
9408 ix86_nsaved_sseregs (void)
9409 {
9410 int nregs = 0;
9411 int regno;
9412
9413 if (!TARGET_64BIT_MS_ABI)
9414 return 0;
9415 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9416 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9417 nregs ++;
9418 return nregs;
9419 }
9420
9421 /* Given FROM and TO register numbers, say whether this elimination is
9422 allowed. If stack alignment is needed, we can only replace argument
9423 pointer with hard frame pointer, or replace frame pointer with stack
9424 pointer. Otherwise, frame pointer elimination is automatically
9425 handled and all other eliminations are valid. */
9426
9427 static bool
9428 ix86_can_eliminate (const int from, const int to)
9429 {
9430 if (stack_realign_fp)
9431 return ((from == ARG_POINTER_REGNUM
9432 && to == HARD_FRAME_POINTER_REGNUM)
9433 || (from == FRAME_POINTER_REGNUM
9434 && to == STACK_POINTER_REGNUM));
9435 else
9436 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9437 }
9438
9439 /* Return the offset between two registers, one to be eliminated, and the other
9440 its replacement, at the start of a routine. */
9441
9442 HOST_WIDE_INT
9443 ix86_initial_elimination_offset (int from, int to)
9444 {
9445 struct ix86_frame frame;
9446 ix86_compute_frame_layout (&frame);
9447
9448 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9449 return frame.hard_frame_pointer_offset;
9450 else if (from == FRAME_POINTER_REGNUM
9451 && to == HARD_FRAME_POINTER_REGNUM)
9452 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9453 else
9454 {
9455 gcc_assert (to == STACK_POINTER_REGNUM);
9456
9457 if (from == ARG_POINTER_REGNUM)
9458 return frame.stack_pointer_offset;
9459
9460 gcc_assert (from == FRAME_POINTER_REGNUM);
9461 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9462 }
9463 }
9464
9465 /* In a dynamically-aligned function, we can't know the offset from
9466 stack pointer to frame pointer, so we must ensure that setjmp
9467 eliminates fp against the hard fp (%ebp) rather than trying to
9468 index from %esp up to the top of the frame across a gap that is
9469 of unknown (at compile-time) size. */
9470 static rtx
9471 ix86_builtin_setjmp_frame_value (void)
9472 {
9473 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9474 }
9475
9476 /* When using -fsplit-stack, the allocation routines set a field in
9477 the TCB to the bottom of the stack plus this much space, measured
9478 in bytes. */
9479
9480 #define SPLIT_STACK_AVAILABLE 256
9481
9482 /* Fill structure ix86_frame about frame of currently computed function. */
9483
9484 static void
9485 ix86_compute_frame_layout (struct ix86_frame *frame)
9486 {
9487 unsigned HOST_WIDE_INT stack_alignment_needed;
9488 HOST_WIDE_INT offset;
9489 unsigned HOST_WIDE_INT preferred_alignment;
9490 HOST_WIDE_INT size = get_frame_size ();
9491 HOST_WIDE_INT to_allocate;
9492
9493 frame->nregs = ix86_nsaved_regs ();
9494 frame->nsseregs = ix86_nsaved_sseregs ();
9495
9496 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9497 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9498
9499 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9500 function prologues and leaf. */
9501 if ((TARGET_64BIT_MS_ABI && preferred_alignment < 16)
9502 && (!crtl->is_leaf || cfun->calls_alloca != 0
9503 || ix86_current_function_calls_tls_descriptor))
9504 {
9505 preferred_alignment = 16;
9506 stack_alignment_needed = 16;
9507 crtl->preferred_stack_boundary = 128;
9508 crtl->stack_alignment_needed = 128;
9509 }
9510
9511 gcc_assert (!size || stack_alignment_needed);
9512 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9513 gcc_assert (preferred_alignment <= stack_alignment_needed);
9514
9515 /* For SEH we have to limit the amount of code movement into the prologue.
9516 At present we do this via a BLOCKAGE, at which point there's very little
9517 scheduling that can be done, which means that there's very little point
9518 in doing anything except PUSHs. */
9519 if (TARGET_SEH)
9520 cfun->machine->use_fast_prologue_epilogue = false;
9521
9522 /* During reload iteration the amount of registers saved can change.
9523 Recompute the value as needed. Do not recompute when amount of registers
9524 didn't change as reload does multiple calls to the function and does not
9525 expect the decision to change within single iteration. */
9526 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
9527 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9528 {
9529 int count = frame->nregs;
9530 struct cgraph_node *node = cgraph_get_node (current_function_decl);
9531
9532 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9533
9534 /* The fast prologue uses move instead of push to save registers. This
9535 is significantly longer, but also executes faster as modern hardware
9536 can execute the moves in parallel, but can't do that for push/pop.
9537
9538 Be careful about choosing what prologue to emit: When function takes
9539 many instructions to execute we may use slow version as well as in
9540 case function is known to be outside hot spot (this is known with
9541 feedback only). Weight the size of function by number of registers
9542 to save as it is cheap to use one or two push instructions but very
9543 slow to use many of them. */
9544 if (count)
9545 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9546 if (node->frequency < NODE_FREQUENCY_NORMAL
9547 || (flag_branch_probabilities
9548 && node->frequency < NODE_FREQUENCY_HOT))
9549 cfun->machine->use_fast_prologue_epilogue = false;
9550 else
9551 cfun->machine->use_fast_prologue_epilogue
9552 = !expensive_function_p (count);
9553 }
9554
9555 frame->save_regs_using_mov
9556 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
9557 /* If static stack checking is enabled and done with probes,
9558 the registers need to be saved before allocating the frame. */
9559 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
9560
9561 /* Skip return address. */
9562 offset = UNITS_PER_WORD;
9563
9564 /* Skip pushed static chain. */
9565 if (ix86_static_chain_on_stack)
9566 offset += UNITS_PER_WORD;
9567
9568 /* Skip saved base pointer. */
9569 if (frame_pointer_needed)
9570 offset += UNITS_PER_WORD;
9571 frame->hfp_save_offset = offset;
9572
9573 /* The traditional frame pointer location is at the top of the frame. */
9574 frame->hard_frame_pointer_offset = offset;
9575
9576 /* Register save area */
9577 offset += frame->nregs * UNITS_PER_WORD;
9578 frame->reg_save_offset = offset;
9579
9580 /* On SEH target, registers are pushed just before the frame pointer
9581 location. */
9582 if (TARGET_SEH)
9583 frame->hard_frame_pointer_offset = offset;
9584
9585 /* Align and set SSE register save area. */
9586 if (frame->nsseregs)
9587 {
9588 /* The only ABI that has saved SSE registers (Win64) also has a
9589 16-byte aligned default stack, and thus we don't need to be
9590 within the re-aligned local stack frame to save them. */
9591 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9592 offset = (offset + 16 - 1) & -16;
9593 offset += frame->nsseregs * 16;
9594 }
9595 frame->sse_reg_save_offset = offset;
9596
9597 /* The re-aligned stack starts here. Values before this point are not
9598 directly comparable with values below this point. In order to make
9599 sure that no value happens to be the same before and after, force
9600 the alignment computation below to add a non-zero value. */
9601 if (stack_realign_fp)
9602 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9603
9604 /* Va-arg area */
9605 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9606 offset += frame->va_arg_size;
9607
9608 /* Align start of frame for local function. */
9609 if (stack_realign_fp
9610 || offset != frame->sse_reg_save_offset
9611 || size != 0
9612 || !crtl->is_leaf
9613 || cfun->calls_alloca
9614 || ix86_current_function_calls_tls_descriptor)
9615 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9616
9617 /* Frame pointer points here. */
9618 frame->frame_pointer_offset = offset;
9619
9620 offset += size;
9621
9622 /* Add outgoing arguments area. Can be skipped if we eliminated
9623 all the function calls as dead code.
9624 Skipping is however impossible when function calls alloca. Alloca
9625 expander assumes that last crtl->outgoing_args_size
9626 of stack frame are unused. */
9627 if (ACCUMULATE_OUTGOING_ARGS
9628 && (!crtl->is_leaf || cfun->calls_alloca
9629 || ix86_current_function_calls_tls_descriptor))
9630 {
9631 offset += crtl->outgoing_args_size;
9632 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9633 }
9634 else
9635 frame->outgoing_arguments_size = 0;
9636
9637 /* Align stack boundary. Only needed if we're calling another function
9638 or using alloca. */
9639 if (!crtl->is_leaf || cfun->calls_alloca
9640 || ix86_current_function_calls_tls_descriptor)
9641 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9642
9643 /* We've reached end of stack frame. */
9644 frame->stack_pointer_offset = offset;
9645
9646 /* Size prologue needs to allocate. */
9647 to_allocate = offset - frame->sse_reg_save_offset;
9648
9649 if ((!to_allocate && frame->nregs <= 1)
9650 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9651 frame->save_regs_using_mov = false;
9652
9653 if (ix86_using_red_zone ()
9654 && crtl->sp_is_unchanging
9655 && crtl->is_leaf
9656 && !ix86_current_function_calls_tls_descriptor)
9657 {
9658 frame->red_zone_size = to_allocate;
9659 if (frame->save_regs_using_mov)
9660 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9661 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9662 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9663 }
9664 else
9665 frame->red_zone_size = 0;
9666 frame->stack_pointer_offset -= frame->red_zone_size;
9667
9668 /* The SEH frame pointer location is near the bottom of the frame.
9669 This is enforced by the fact that the difference between the
9670 stack pointer and the frame pointer is limited to 240 bytes in
9671 the unwind data structure. */
9672 if (TARGET_SEH)
9673 {
9674 HOST_WIDE_INT diff;
9675
9676 /* If we can leave the frame pointer where it is, do so. Also, returns
9677 the establisher frame for __builtin_frame_address (0). */
9678 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9679 if (diff <= SEH_MAX_FRAME_SIZE
9680 && (diff > 240 || (diff & 15) != 0)
9681 && !crtl->accesses_prior_frames)
9682 {
9683 /* Ideally we'd determine what portion of the local stack frame
9684 (within the constraint of the lowest 240) is most heavily used.
9685 But without that complication, simply bias the frame pointer
9686 by 128 bytes so as to maximize the amount of the local stack
9687 frame that is addressable with 8-bit offsets. */
9688 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9689 }
9690 }
9691 }
9692
9693 /* This is semi-inlined memory_address_length, but simplified
9694 since we know that we're always dealing with reg+offset, and
9695 to avoid having to create and discard all that rtl. */
9696
9697 static inline int
9698 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9699 {
9700 int len = 4;
9701
9702 if (offset == 0)
9703 {
9704 /* EBP and R13 cannot be encoded without an offset. */
9705 len = (regno == BP_REG || regno == R13_REG);
9706 }
9707 else if (IN_RANGE (offset, -128, 127))
9708 len = 1;
9709
9710 /* ESP and R12 must be encoded with a SIB byte. */
9711 if (regno == SP_REG || regno == R12_REG)
9712 len++;
9713
9714 return len;
9715 }
9716
9717 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9718 The valid base registers are taken from CFUN->MACHINE->FS. */
9719
9720 static rtx
9721 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9722 {
9723 const struct machine_function *m = cfun->machine;
9724 rtx base_reg = NULL;
9725 HOST_WIDE_INT base_offset = 0;
9726
9727 if (m->use_fast_prologue_epilogue)
9728 {
9729 /* Choose the base register most likely to allow the most scheduling
9730 opportunities. Generally FP is valid throughout the function,
9731 while DRAP must be reloaded within the epilogue. But choose either
9732 over the SP due to increased encoding size. */
9733
9734 if (m->fs.fp_valid)
9735 {
9736 base_reg = hard_frame_pointer_rtx;
9737 base_offset = m->fs.fp_offset - cfa_offset;
9738 }
9739 else if (m->fs.drap_valid)
9740 {
9741 base_reg = crtl->drap_reg;
9742 base_offset = 0 - cfa_offset;
9743 }
9744 else if (m->fs.sp_valid)
9745 {
9746 base_reg = stack_pointer_rtx;
9747 base_offset = m->fs.sp_offset - cfa_offset;
9748 }
9749 }
9750 else
9751 {
9752 HOST_WIDE_INT toffset;
9753 int len = 16, tlen;
9754
9755 /* Choose the base register with the smallest address encoding.
9756 With a tie, choose FP > DRAP > SP. */
9757 if (m->fs.sp_valid)
9758 {
9759 base_reg = stack_pointer_rtx;
9760 base_offset = m->fs.sp_offset - cfa_offset;
9761 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9762 }
9763 if (m->fs.drap_valid)
9764 {
9765 toffset = 0 - cfa_offset;
9766 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9767 if (tlen <= len)
9768 {
9769 base_reg = crtl->drap_reg;
9770 base_offset = toffset;
9771 len = tlen;
9772 }
9773 }
9774 if (m->fs.fp_valid)
9775 {
9776 toffset = m->fs.fp_offset - cfa_offset;
9777 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9778 if (tlen <= len)
9779 {
9780 base_reg = hard_frame_pointer_rtx;
9781 base_offset = toffset;
9782 len = tlen;
9783 }
9784 }
9785 }
9786 gcc_assert (base_reg != NULL);
9787
9788 return plus_constant (Pmode, base_reg, base_offset);
9789 }
9790
9791 /* Emit code to save registers in the prologue. */
9792
9793 static void
9794 ix86_emit_save_regs (void)
9795 {
9796 unsigned int regno;
9797 rtx insn;
9798
9799 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9800 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9801 {
9802 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
9803 RTX_FRAME_RELATED_P (insn) = 1;
9804 }
9805 }
9806
9807 /* Emit a single register save at CFA - CFA_OFFSET. */
9808
9809 static void
9810 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9811 HOST_WIDE_INT cfa_offset)
9812 {
9813 struct machine_function *m = cfun->machine;
9814 rtx reg = gen_rtx_REG (mode, regno);
9815 rtx mem, addr, base, insn;
9816
9817 addr = choose_baseaddr (cfa_offset);
9818 mem = gen_frame_mem (mode, addr);
9819
9820 /* For SSE saves, we need to indicate the 128-bit alignment. */
9821 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9822
9823 insn = emit_move_insn (mem, reg);
9824 RTX_FRAME_RELATED_P (insn) = 1;
9825
9826 base = addr;
9827 if (GET_CODE (base) == PLUS)
9828 base = XEXP (base, 0);
9829 gcc_checking_assert (REG_P (base));
9830
9831 /* When saving registers into a re-aligned local stack frame, avoid
9832 any tricky guessing by dwarf2out. */
9833 if (m->fs.realigned)
9834 {
9835 gcc_checking_assert (stack_realign_drap);
9836
9837 if (regno == REGNO (crtl->drap_reg))
9838 {
9839 /* A bit of a hack. We force the DRAP register to be saved in
9840 the re-aligned stack frame, which provides us with a copy
9841 of the CFA that will last past the prologue. Install it. */
9842 gcc_checking_assert (cfun->machine->fs.fp_valid);
9843 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9844 cfun->machine->fs.fp_offset - cfa_offset);
9845 mem = gen_rtx_MEM (mode, addr);
9846 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9847 }
9848 else
9849 {
9850 /* The frame pointer is a stable reference within the
9851 aligned frame. Use it. */
9852 gcc_checking_assert (cfun->machine->fs.fp_valid);
9853 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9854 cfun->machine->fs.fp_offset - cfa_offset);
9855 mem = gen_rtx_MEM (mode, addr);
9856 add_reg_note (insn, REG_CFA_EXPRESSION,
9857 gen_rtx_SET (VOIDmode, mem, reg));
9858 }
9859 }
9860
9861 /* The memory may not be relative to the current CFA register,
9862 which means that we may need to generate a new pattern for
9863 use by the unwind info. */
9864 else if (base != m->fs.cfa_reg)
9865 {
9866 addr = plus_constant (Pmode, m->fs.cfa_reg,
9867 m->fs.cfa_offset - cfa_offset);
9868 mem = gen_rtx_MEM (mode, addr);
9869 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9870 }
9871 }
9872
9873 /* Emit code to save registers using MOV insns.
9874 First register is stored at CFA - CFA_OFFSET. */
9875 static void
9876 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9877 {
9878 unsigned int regno;
9879
9880 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9881 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9882 {
9883 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
9884 cfa_offset -= UNITS_PER_WORD;
9885 }
9886 }
9887
9888 /* Emit code to save SSE registers using MOV insns.
9889 First register is stored at CFA - CFA_OFFSET. */
9890 static void
9891 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9892 {
9893 unsigned int regno;
9894
9895 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9896 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9897 {
9898 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9899 cfa_offset -= 16;
9900 }
9901 }
9902
9903 static GTY(()) rtx queued_cfa_restores;
9904
9905 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9906 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9907 Don't add the note if the previously saved value will be left untouched
9908 within stack red-zone till return, as unwinders can find the same value
9909 in the register and on the stack. */
9910
9911 static void
9912 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9913 {
9914 if (!crtl->shrink_wrapped
9915 && cfa_offset <= cfun->machine->fs.red_zone_offset)
9916 return;
9917
9918 if (insn)
9919 {
9920 add_reg_note (insn, REG_CFA_RESTORE, reg);
9921 RTX_FRAME_RELATED_P (insn) = 1;
9922 }
9923 else
9924 queued_cfa_restores
9925 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9926 }
9927
9928 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9929
9930 static void
9931 ix86_add_queued_cfa_restore_notes (rtx insn)
9932 {
9933 rtx last;
9934 if (!queued_cfa_restores)
9935 return;
9936 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9937 ;
9938 XEXP (last, 1) = REG_NOTES (insn);
9939 REG_NOTES (insn) = queued_cfa_restores;
9940 queued_cfa_restores = NULL_RTX;
9941 RTX_FRAME_RELATED_P (insn) = 1;
9942 }
9943
9944 /* Expand prologue or epilogue stack adjustment.
9945 The pattern exist to put a dependency on all ebp-based memory accesses.
9946 STYLE should be negative if instructions should be marked as frame related,
9947 zero if %r11 register is live and cannot be freely used and positive
9948 otherwise. */
9949
9950 static void
9951 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9952 int style, bool set_cfa)
9953 {
9954 struct machine_function *m = cfun->machine;
9955 rtx insn;
9956 bool add_frame_related_expr = false;
9957
9958 if (Pmode == SImode)
9959 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9960 else if (x86_64_immediate_operand (offset, DImode))
9961 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9962 else
9963 {
9964 rtx tmp;
9965 /* r11 is used by indirect sibcall return as well, set before the
9966 epilogue and used after the epilogue. */
9967 if (style)
9968 tmp = gen_rtx_REG (DImode, R11_REG);
9969 else
9970 {
9971 gcc_assert (src != hard_frame_pointer_rtx
9972 && dest != hard_frame_pointer_rtx);
9973 tmp = hard_frame_pointer_rtx;
9974 }
9975 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9976 if (style < 0)
9977 add_frame_related_expr = true;
9978
9979 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
9980 }
9981
9982 insn = emit_insn (insn);
9983 if (style >= 0)
9984 ix86_add_queued_cfa_restore_notes (insn);
9985
9986 if (set_cfa)
9987 {
9988 rtx r;
9989
9990 gcc_assert (m->fs.cfa_reg == src);
9991 m->fs.cfa_offset += INTVAL (offset);
9992 m->fs.cfa_reg = dest;
9993
9994 r = gen_rtx_PLUS (Pmode, src, offset);
9995 r = gen_rtx_SET (VOIDmode, dest, r);
9996 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9997 RTX_FRAME_RELATED_P (insn) = 1;
9998 }
9999 else if (style < 0)
10000 {
10001 RTX_FRAME_RELATED_P (insn) = 1;
10002 if (add_frame_related_expr)
10003 {
10004 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10005 r = gen_rtx_SET (VOIDmode, dest, r);
10006 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10007 }
10008 }
10009
10010 if (dest == stack_pointer_rtx)
10011 {
10012 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10013 bool valid = m->fs.sp_valid;
10014
10015 if (src == hard_frame_pointer_rtx)
10016 {
10017 valid = m->fs.fp_valid;
10018 ooffset = m->fs.fp_offset;
10019 }
10020 else if (src == crtl->drap_reg)
10021 {
10022 valid = m->fs.drap_valid;
10023 ooffset = 0;
10024 }
10025 else
10026 {
10027 /* Else there are two possibilities: SP itself, which we set
10028 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10029 taken care of this by hand along the eh_return path. */
10030 gcc_checking_assert (src == stack_pointer_rtx
10031 || offset == const0_rtx);
10032 }
10033
10034 m->fs.sp_offset = ooffset - INTVAL (offset);
10035 m->fs.sp_valid = valid;
10036 }
10037 }
10038
10039 /* Find an available register to be used as dynamic realign argument
10040 pointer regsiter. Such a register will be written in prologue and
10041 used in begin of body, so it must not be
10042 1. parameter passing register.
10043 2. GOT pointer.
10044 We reuse static-chain register if it is available. Otherwise, we
10045 use DI for i386 and R13 for x86-64. We chose R13 since it has
10046 shorter encoding.
10047
10048 Return: the regno of chosen register. */
10049
10050 static unsigned int
10051 find_drap_reg (void)
10052 {
10053 tree decl = cfun->decl;
10054
10055 if (TARGET_64BIT)
10056 {
10057 /* Use R13 for nested function or function need static chain.
10058 Since function with tail call may use any caller-saved
10059 registers in epilogue, DRAP must not use caller-saved
10060 register in such case. */
10061 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10062 return R13_REG;
10063
10064 return R10_REG;
10065 }
10066 else
10067 {
10068 /* Use DI for nested function or function need static chain.
10069 Since function with tail call may use any caller-saved
10070 registers in epilogue, DRAP must not use caller-saved
10071 register in such case. */
10072 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10073 return DI_REG;
10074
10075 /* Reuse static chain register if it isn't used for parameter
10076 passing. */
10077 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10078 {
10079 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10080 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10081 return CX_REG;
10082 }
10083 return DI_REG;
10084 }
10085 }
10086
10087 /* Return minimum incoming stack alignment. */
10088
10089 static unsigned int
10090 ix86_minimum_incoming_stack_boundary (bool sibcall)
10091 {
10092 unsigned int incoming_stack_boundary;
10093
10094 /* Prefer the one specified at command line. */
10095 if (ix86_user_incoming_stack_boundary)
10096 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10097 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10098 if -mstackrealign is used, it isn't used for sibcall check and
10099 estimated stack alignment is 128bit. */
10100 else if (!sibcall
10101 && !TARGET_64BIT
10102 && ix86_force_align_arg_pointer
10103 && crtl->stack_alignment_estimated == 128)
10104 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10105 else
10106 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10107
10108 /* Incoming stack alignment can be changed on individual functions
10109 via force_align_arg_pointer attribute. We use the smallest
10110 incoming stack boundary. */
10111 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10112 && lookup_attribute (ix86_force_align_arg_pointer_string,
10113 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10114 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10115
10116 /* The incoming stack frame has to be aligned at least at
10117 parm_stack_boundary. */
10118 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10119 incoming_stack_boundary = crtl->parm_stack_boundary;
10120
10121 /* Stack at entrance of main is aligned by runtime. We use the
10122 smallest incoming stack boundary. */
10123 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10124 && DECL_NAME (current_function_decl)
10125 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10126 && DECL_FILE_SCOPE_P (current_function_decl))
10127 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10128
10129 return incoming_stack_boundary;
10130 }
10131
10132 /* Update incoming stack boundary and estimated stack alignment. */
10133
10134 static void
10135 ix86_update_stack_boundary (void)
10136 {
10137 ix86_incoming_stack_boundary
10138 = ix86_minimum_incoming_stack_boundary (false);
10139
10140 /* x86_64 vararg needs 16byte stack alignment for register save
10141 area. */
10142 if (TARGET_64BIT
10143 && cfun->stdarg
10144 && crtl->stack_alignment_estimated < 128)
10145 crtl->stack_alignment_estimated = 128;
10146 }
10147
10148 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10149 needed or an rtx for DRAP otherwise. */
10150
10151 static rtx
10152 ix86_get_drap_rtx (void)
10153 {
10154 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10155 crtl->need_drap = true;
10156
10157 if (stack_realign_drap)
10158 {
10159 /* Assign DRAP to vDRAP and returns vDRAP */
10160 unsigned int regno = find_drap_reg ();
10161 rtx drap_vreg;
10162 rtx arg_ptr;
10163 rtx seq, insn;
10164
10165 arg_ptr = gen_rtx_REG (Pmode, regno);
10166 crtl->drap_reg = arg_ptr;
10167
10168 start_sequence ();
10169 drap_vreg = copy_to_reg (arg_ptr);
10170 seq = get_insns ();
10171 end_sequence ();
10172
10173 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10174 if (!optimize)
10175 {
10176 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10177 RTX_FRAME_RELATED_P (insn) = 1;
10178 }
10179 return drap_vreg;
10180 }
10181 else
10182 return NULL;
10183 }
10184
10185 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10186
10187 static rtx
10188 ix86_internal_arg_pointer (void)
10189 {
10190 return virtual_incoming_args_rtx;
10191 }
10192
10193 struct scratch_reg {
10194 rtx reg;
10195 bool saved;
10196 };
10197
10198 /* Return a short-lived scratch register for use on function entry.
10199 In 32-bit mode, it is valid only after the registers are saved
10200 in the prologue. This register must be released by means of
10201 release_scratch_register_on_entry once it is dead. */
10202
10203 static void
10204 get_scratch_register_on_entry (struct scratch_reg *sr)
10205 {
10206 int regno;
10207
10208 sr->saved = false;
10209
10210 if (TARGET_64BIT)
10211 {
10212 /* We always use R11 in 64-bit mode. */
10213 regno = R11_REG;
10214 }
10215 else
10216 {
10217 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10218 bool fastcall_p
10219 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10220 bool thiscall_p
10221 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10222 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10223 int regparm = ix86_function_regparm (fntype, decl);
10224 int drap_regno
10225 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10226
10227 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10228 for the static chain register. */
10229 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10230 && drap_regno != AX_REG)
10231 regno = AX_REG;
10232 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10233 for the static chain register. */
10234 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10235 regno = AX_REG;
10236 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10237 regno = DX_REG;
10238 /* ecx is the static chain register. */
10239 else if (regparm < 3 && !fastcall_p && !thiscall_p
10240 && !static_chain_p
10241 && drap_regno != CX_REG)
10242 regno = CX_REG;
10243 else if (ix86_save_reg (BX_REG, true))
10244 regno = BX_REG;
10245 /* esi is the static chain register. */
10246 else if (!(regparm == 3 && static_chain_p)
10247 && ix86_save_reg (SI_REG, true))
10248 regno = SI_REG;
10249 else if (ix86_save_reg (DI_REG, true))
10250 regno = DI_REG;
10251 else
10252 {
10253 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10254 sr->saved = true;
10255 }
10256 }
10257
10258 sr->reg = gen_rtx_REG (Pmode, regno);
10259 if (sr->saved)
10260 {
10261 rtx insn = emit_insn (gen_push (sr->reg));
10262 RTX_FRAME_RELATED_P (insn) = 1;
10263 }
10264 }
10265
10266 /* Release a scratch register obtained from the preceding function. */
10267
10268 static void
10269 release_scratch_register_on_entry (struct scratch_reg *sr)
10270 {
10271 if (sr->saved)
10272 {
10273 struct machine_function *m = cfun->machine;
10274 rtx x, insn = emit_insn (gen_pop (sr->reg));
10275
10276 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10277 RTX_FRAME_RELATED_P (insn) = 1;
10278 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10279 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10280 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10281 m->fs.sp_offset -= UNITS_PER_WORD;
10282 }
10283 }
10284
10285 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10286
10287 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10288
10289 static void
10290 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10291 {
10292 /* We skip the probe for the first interval + a small dope of 4 words and
10293 probe that many bytes past the specified size to maintain a protection
10294 area at the botton of the stack. */
10295 const int dope = 4 * UNITS_PER_WORD;
10296 rtx size_rtx = GEN_INT (size), last;
10297
10298 /* See if we have a constant small number of probes to generate. If so,
10299 that's the easy case. The run-time loop is made up of 11 insns in the
10300 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10301 for n # of intervals. */
10302 if (size <= 5 * PROBE_INTERVAL)
10303 {
10304 HOST_WIDE_INT i, adjust;
10305 bool first_probe = true;
10306
10307 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10308 values of N from 1 until it exceeds SIZE. If only one probe is
10309 needed, this will not generate any code. Then adjust and probe
10310 to PROBE_INTERVAL + SIZE. */
10311 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10312 {
10313 if (first_probe)
10314 {
10315 adjust = 2 * PROBE_INTERVAL + dope;
10316 first_probe = false;
10317 }
10318 else
10319 adjust = PROBE_INTERVAL;
10320
10321 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10322 plus_constant (Pmode, stack_pointer_rtx,
10323 -adjust)));
10324 emit_stack_probe (stack_pointer_rtx);
10325 }
10326
10327 if (first_probe)
10328 adjust = size + PROBE_INTERVAL + dope;
10329 else
10330 adjust = size + PROBE_INTERVAL - i;
10331
10332 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10333 plus_constant (Pmode, stack_pointer_rtx,
10334 -adjust)));
10335 emit_stack_probe (stack_pointer_rtx);
10336
10337 /* Adjust back to account for the additional first interval. */
10338 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10339 plus_constant (Pmode, stack_pointer_rtx,
10340 PROBE_INTERVAL + dope)));
10341 }
10342
10343 /* Otherwise, do the same as above, but in a loop. Note that we must be
10344 extra careful with variables wrapping around because we might be at
10345 the very top (or the very bottom) of the address space and we have
10346 to be able to handle this case properly; in particular, we use an
10347 equality test for the loop condition. */
10348 else
10349 {
10350 HOST_WIDE_INT rounded_size;
10351 struct scratch_reg sr;
10352
10353 get_scratch_register_on_entry (&sr);
10354
10355
10356 /* Step 1: round SIZE to the previous multiple of the interval. */
10357
10358 rounded_size = size & -PROBE_INTERVAL;
10359
10360
10361 /* Step 2: compute initial and final value of the loop counter. */
10362
10363 /* SP = SP_0 + PROBE_INTERVAL. */
10364 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10365 plus_constant (Pmode, stack_pointer_rtx,
10366 - (PROBE_INTERVAL + dope))));
10367
10368 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10369 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10370 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10371 gen_rtx_PLUS (Pmode, sr.reg,
10372 stack_pointer_rtx)));
10373
10374
10375 /* Step 3: the loop
10376
10377 while (SP != LAST_ADDR)
10378 {
10379 SP = SP + PROBE_INTERVAL
10380 probe at SP
10381 }
10382
10383 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10384 values of N from 1 until it is equal to ROUNDED_SIZE. */
10385
10386 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10387
10388
10389 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10390 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10391
10392 if (size != rounded_size)
10393 {
10394 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10395 plus_constant (Pmode, stack_pointer_rtx,
10396 rounded_size - size)));
10397 emit_stack_probe (stack_pointer_rtx);
10398 }
10399
10400 /* Adjust back to account for the additional first interval. */
10401 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10402 plus_constant (Pmode, stack_pointer_rtx,
10403 PROBE_INTERVAL + dope)));
10404
10405 release_scratch_register_on_entry (&sr);
10406 }
10407
10408 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10409
10410 /* Even if the stack pointer isn't the CFA register, we need to correctly
10411 describe the adjustments made to it, in particular differentiate the
10412 frame-related ones from the frame-unrelated ones. */
10413 if (size > 0)
10414 {
10415 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10416 XVECEXP (expr, 0, 0)
10417 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10418 plus_constant (Pmode, stack_pointer_rtx, -size));
10419 XVECEXP (expr, 0, 1)
10420 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10421 plus_constant (Pmode, stack_pointer_rtx,
10422 PROBE_INTERVAL + dope + size));
10423 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10424 RTX_FRAME_RELATED_P (last) = 1;
10425
10426 cfun->machine->fs.sp_offset += size;
10427 }
10428
10429 /* Make sure nothing is scheduled before we are done. */
10430 emit_insn (gen_blockage ());
10431 }
10432
10433 /* Adjust the stack pointer up to REG while probing it. */
10434
10435 const char *
10436 output_adjust_stack_and_probe (rtx reg)
10437 {
10438 static int labelno = 0;
10439 char loop_lab[32], end_lab[32];
10440 rtx xops[2];
10441
10442 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10443 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10444
10445 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10446
10447 /* Jump to END_LAB if SP == LAST_ADDR. */
10448 xops[0] = stack_pointer_rtx;
10449 xops[1] = reg;
10450 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10451 fputs ("\tje\t", asm_out_file);
10452 assemble_name_raw (asm_out_file, end_lab);
10453 fputc ('\n', asm_out_file);
10454
10455 /* SP = SP + PROBE_INTERVAL. */
10456 xops[1] = GEN_INT (PROBE_INTERVAL);
10457 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10458
10459 /* Probe at SP. */
10460 xops[1] = const0_rtx;
10461 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10462
10463 fprintf (asm_out_file, "\tjmp\t");
10464 assemble_name_raw (asm_out_file, loop_lab);
10465 fputc ('\n', asm_out_file);
10466
10467 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10468
10469 return "";
10470 }
10471
10472 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10473 inclusive. These are offsets from the current stack pointer. */
10474
10475 static void
10476 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10477 {
10478 /* See if we have a constant small number of probes to generate. If so,
10479 that's the easy case. The run-time loop is made up of 7 insns in the
10480 generic case while the compile-time loop is made up of n insns for n #
10481 of intervals. */
10482 if (size <= 7 * PROBE_INTERVAL)
10483 {
10484 HOST_WIDE_INT i;
10485
10486 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10487 it exceeds SIZE. If only one probe is needed, this will not
10488 generate any code. Then probe at FIRST + SIZE. */
10489 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10490 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10491 -(first + i)));
10492
10493 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10494 -(first + size)));
10495 }
10496
10497 /* Otherwise, do the same as above, but in a loop. Note that we must be
10498 extra careful with variables wrapping around because we might be at
10499 the very top (or the very bottom) of the address space and we have
10500 to be able to handle this case properly; in particular, we use an
10501 equality test for the loop condition. */
10502 else
10503 {
10504 HOST_WIDE_INT rounded_size, last;
10505 struct scratch_reg sr;
10506
10507 get_scratch_register_on_entry (&sr);
10508
10509
10510 /* Step 1: round SIZE to the previous multiple of the interval. */
10511
10512 rounded_size = size & -PROBE_INTERVAL;
10513
10514
10515 /* Step 2: compute initial and final value of the loop counter. */
10516
10517 /* TEST_OFFSET = FIRST. */
10518 emit_move_insn (sr.reg, GEN_INT (-first));
10519
10520 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10521 last = first + rounded_size;
10522
10523
10524 /* Step 3: the loop
10525
10526 while (TEST_ADDR != LAST_ADDR)
10527 {
10528 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10529 probe at TEST_ADDR
10530 }
10531
10532 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10533 until it is equal to ROUNDED_SIZE. */
10534
10535 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10536
10537
10538 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10539 that SIZE is equal to ROUNDED_SIZE. */
10540
10541 if (size != rounded_size)
10542 emit_stack_probe (plus_constant (Pmode,
10543 gen_rtx_PLUS (Pmode,
10544 stack_pointer_rtx,
10545 sr.reg),
10546 rounded_size - size));
10547
10548 release_scratch_register_on_entry (&sr);
10549 }
10550
10551 /* Make sure nothing is scheduled before we are done. */
10552 emit_insn (gen_blockage ());
10553 }
10554
10555 /* Probe a range of stack addresses from REG to END, inclusive. These are
10556 offsets from the current stack pointer. */
10557
10558 const char *
10559 output_probe_stack_range (rtx reg, rtx end)
10560 {
10561 static int labelno = 0;
10562 char loop_lab[32], end_lab[32];
10563 rtx xops[3];
10564
10565 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10566 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10567
10568 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10569
10570 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10571 xops[0] = reg;
10572 xops[1] = end;
10573 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10574 fputs ("\tje\t", asm_out_file);
10575 assemble_name_raw (asm_out_file, end_lab);
10576 fputc ('\n', asm_out_file);
10577
10578 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10579 xops[1] = GEN_INT (PROBE_INTERVAL);
10580 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10581
10582 /* Probe at TEST_ADDR. */
10583 xops[0] = stack_pointer_rtx;
10584 xops[1] = reg;
10585 xops[2] = const0_rtx;
10586 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10587
10588 fprintf (asm_out_file, "\tjmp\t");
10589 assemble_name_raw (asm_out_file, loop_lab);
10590 fputc ('\n', asm_out_file);
10591
10592 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10593
10594 return "";
10595 }
10596
10597 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10598 to be generated in correct form. */
10599 static void
10600 ix86_finalize_stack_realign_flags (void)
10601 {
10602 /* Check if stack realign is really needed after reload, and
10603 stores result in cfun */
10604 unsigned int incoming_stack_boundary
10605 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
10606 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
10607 unsigned int stack_realign = (incoming_stack_boundary
10608 < (crtl->is_leaf
10609 ? crtl->max_used_stack_slot_alignment
10610 : crtl->stack_alignment_needed));
10611
10612 if (crtl->stack_realign_finalized)
10613 {
10614 /* After stack_realign_needed is finalized, we can't no longer
10615 change it. */
10616 gcc_assert (crtl->stack_realign_needed == stack_realign);
10617 return;
10618 }
10619
10620 /* If the only reason for frame_pointer_needed is that we conservatively
10621 assumed stack realignment might be needed, but in the end nothing that
10622 needed the stack alignment had been spilled, clear frame_pointer_needed
10623 and say we don't need stack realignment. */
10624 if (stack_realign
10625 && frame_pointer_needed
10626 && crtl->is_leaf
10627 && flag_omit_frame_pointer
10628 && crtl->sp_is_unchanging
10629 && !ix86_current_function_calls_tls_descriptor
10630 && !crtl->accesses_prior_frames
10631 && !cfun->calls_alloca
10632 && !crtl->calls_eh_return
10633 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
10634 && !ix86_frame_pointer_required ()
10635 && get_frame_size () == 0
10636 && ix86_nsaved_sseregs () == 0
10637 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
10638 {
10639 HARD_REG_SET set_up_by_prologue, prologue_used;
10640 basic_block bb;
10641
10642 CLEAR_HARD_REG_SET (prologue_used);
10643 CLEAR_HARD_REG_SET (set_up_by_prologue);
10644 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
10645 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
10646 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
10647 HARD_FRAME_POINTER_REGNUM);
10648 FOR_EACH_BB_FN (bb, cfun)
10649 {
10650 rtx insn;
10651 FOR_BB_INSNS (bb, insn)
10652 if (NONDEBUG_INSN_P (insn)
10653 && requires_stack_frame_p (insn, prologue_used,
10654 set_up_by_prologue))
10655 {
10656 crtl->stack_realign_needed = stack_realign;
10657 crtl->stack_realign_finalized = true;
10658 return;
10659 }
10660 }
10661
10662 /* If drap has been set, but it actually isn't live at the start
10663 of the function, there is no reason to set it up. */
10664 if (crtl->drap_reg)
10665 {
10666 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
10667 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
10668 {
10669 crtl->drap_reg = NULL_RTX;
10670 crtl->need_drap = false;
10671 }
10672 }
10673 else
10674 cfun->machine->no_drap_save_restore = true;
10675
10676 frame_pointer_needed = false;
10677 stack_realign = false;
10678 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
10679 crtl->stack_alignment_needed = incoming_stack_boundary;
10680 crtl->stack_alignment_estimated = incoming_stack_boundary;
10681 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
10682 crtl->preferred_stack_boundary = incoming_stack_boundary;
10683 df_finish_pass (true);
10684 df_scan_alloc (NULL);
10685 df_scan_blocks ();
10686 df_compute_regs_ever_live (true);
10687 df_analyze ();
10688 }
10689
10690 crtl->stack_realign_needed = stack_realign;
10691 crtl->stack_realign_finalized = true;
10692 }
10693
10694 /* Expand the prologue into a bunch of separate insns. */
10695
10696 void
10697 ix86_expand_prologue (void)
10698 {
10699 struct machine_function *m = cfun->machine;
10700 rtx insn, t;
10701 bool pic_reg_used;
10702 struct ix86_frame frame;
10703 HOST_WIDE_INT allocate;
10704 bool int_registers_saved;
10705 bool sse_registers_saved;
10706
10707 ix86_finalize_stack_realign_flags ();
10708
10709 /* DRAP should not coexist with stack_realign_fp */
10710 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10711
10712 memset (&m->fs, 0, sizeof (m->fs));
10713
10714 /* Initialize CFA state for before the prologue. */
10715 m->fs.cfa_reg = stack_pointer_rtx;
10716 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10717
10718 /* Track SP offset to the CFA. We continue tracking this after we've
10719 swapped the CFA register away from SP. In the case of re-alignment
10720 this is fudged; we're interested to offsets within the local frame. */
10721 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10722 m->fs.sp_valid = true;
10723
10724 ix86_compute_frame_layout (&frame);
10725
10726 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10727 {
10728 /* We should have already generated an error for any use of
10729 ms_hook on a nested function. */
10730 gcc_checking_assert (!ix86_static_chain_on_stack);
10731
10732 /* Check if profiling is active and we shall use profiling before
10733 prologue variant. If so sorry. */
10734 if (crtl->profile && flag_fentry != 0)
10735 sorry ("ms_hook_prologue attribute isn%'t compatible "
10736 "with -mfentry for 32-bit");
10737
10738 /* In ix86_asm_output_function_label we emitted:
10739 8b ff movl.s %edi,%edi
10740 55 push %ebp
10741 8b ec movl.s %esp,%ebp
10742
10743 This matches the hookable function prologue in Win32 API
10744 functions in Microsoft Windows XP Service Pack 2 and newer.
10745 Wine uses this to enable Windows apps to hook the Win32 API
10746 functions provided by Wine.
10747
10748 What that means is that we've already set up the frame pointer. */
10749
10750 if (frame_pointer_needed
10751 && !(crtl->drap_reg && crtl->stack_realign_needed))
10752 {
10753 rtx push, mov;
10754
10755 /* We've decided to use the frame pointer already set up.
10756 Describe this to the unwinder by pretending that both
10757 push and mov insns happen right here.
10758
10759 Putting the unwind info here at the end of the ms_hook
10760 is done so that we can make absolutely certain we get
10761 the required byte sequence at the start of the function,
10762 rather than relying on an assembler that can produce
10763 the exact encoding required.
10764
10765 However it does mean (in the unpatched case) that we have
10766 a 1 insn window where the asynchronous unwind info is
10767 incorrect. However, if we placed the unwind info at
10768 its correct location we would have incorrect unwind info
10769 in the patched case. Which is probably all moot since
10770 I don't expect Wine generates dwarf2 unwind info for the
10771 system libraries that use this feature. */
10772
10773 insn = emit_insn (gen_blockage ());
10774
10775 push = gen_push (hard_frame_pointer_rtx);
10776 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10777 stack_pointer_rtx);
10778 RTX_FRAME_RELATED_P (push) = 1;
10779 RTX_FRAME_RELATED_P (mov) = 1;
10780
10781 RTX_FRAME_RELATED_P (insn) = 1;
10782 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10783 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10784
10785 /* Note that gen_push incremented m->fs.cfa_offset, even
10786 though we didn't emit the push insn here. */
10787 m->fs.cfa_reg = hard_frame_pointer_rtx;
10788 m->fs.fp_offset = m->fs.cfa_offset;
10789 m->fs.fp_valid = true;
10790 }
10791 else
10792 {
10793 /* The frame pointer is not needed so pop %ebp again.
10794 This leaves us with a pristine state. */
10795 emit_insn (gen_pop (hard_frame_pointer_rtx));
10796 }
10797 }
10798
10799 /* The first insn of a function that accepts its static chain on the
10800 stack is to push the register that would be filled in by a direct
10801 call. This insn will be skipped by the trampoline. */
10802 else if (ix86_static_chain_on_stack)
10803 {
10804 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10805 emit_insn (gen_blockage ());
10806
10807 /* We don't want to interpret this push insn as a register save,
10808 only as a stack adjustment. The real copy of the register as
10809 a save will be done later, if needed. */
10810 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
10811 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10812 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10813 RTX_FRAME_RELATED_P (insn) = 1;
10814 }
10815
10816 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10817 of DRAP is needed and stack realignment is really needed after reload */
10818 if (stack_realign_drap)
10819 {
10820 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10821
10822 /* Only need to push parameter pointer reg if it is caller saved. */
10823 if (!call_used_regs[REGNO (crtl->drap_reg)])
10824 {
10825 /* Push arg pointer reg */
10826 insn = emit_insn (gen_push (crtl->drap_reg));
10827 RTX_FRAME_RELATED_P (insn) = 1;
10828 }
10829
10830 /* Grab the argument pointer. */
10831 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
10832 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10833 RTX_FRAME_RELATED_P (insn) = 1;
10834 m->fs.cfa_reg = crtl->drap_reg;
10835 m->fs.cfa_offset = 0;
10836
10837 /* Align the stack. */
10838 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10839 stack_pointer_rtx,
10840 GEN_INT (-align_bytes)));
10841 RTX_FRAME_RELATED_P (insn) = 1;
10842
10843 /* Replicate the return address on the stack so that return
10844 address can be reached via (argp - 1) slot. This is needed
10845 to implement macro RETURN_ADDR_RTX and intrinsic function
10846 expand_builtin_return_addr etc. */
10847 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
10848 t = gen_frame_mem (word_mode, t);
10849 insn = emit_insn (gen_push (t));
10850 RTX_FRAME_RELATED_P (insn) = 1;
10851
10852 /* For the purposes of frame and register save area addressing,
10853 we've started over with a new frame. */
10854 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10855 m->fs.realigned = true;
10856 }
10857
10858 int_registers_saved = (frame.nregs == 0);
10859 sse_registers_saved = (frame.nsseregs == 0);
10860
10861 if (frame_pointer_needed && !m->fs.fp_valid)
10862 {
10863 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10864 slower on all targets. Also sdb doesn't like it. */
10865 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10866 RTX_FRAME_RELATED_P (insn) = 1;
10867
10868 /* Push registers now, before setting the frame pointer
10869 on SEH target. */
10870 if (!int_registers_saved
10871 && TARGET_SEH
10872 && !frame.save_regs_using_mov)
10873 {
10874 ix86_emit_save_regs ();
10875 int_registers_saved = true;
10876 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10877 }
10878
10879 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10880 {
10881 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10882 RTX_FRAME_RELATED_P (insn) = 1;
10883
10884 if (m->fs.cfa_reg == stack_pointer_rtx)
10885 m->fs.cfa_reg = hard_frame_pointer_rtx;
10886 m->fs.fp_offset = m->fs.sp_offset;
10887 m->fs.fp_valid = true;
10888 }
10889 }
10890
10891 if (!int_registers_saved)
10892 {
10893 /* If saving registers via PUSH, do so now. */
10894 if (!frame.save_regs_using_mov)
10895 {
10896 ix86_emit_save_regs ();
10897 int_registers_saved = true;
10898 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10899 }
10900
10901 /* When using red zone we may start register saving before allocating
10902 the stack frame saving one cycle of the prologue. However, avoid
10903 doing this if we have to probe the stack; at least on x86_64 the
10904 stack probe can turn into a call that clobbers a red zone location. */
10905 else if (ix86_using_red_zone ()
10906 && (! TARGET_STACK_PROBE
10907 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10908 {
10909 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10910 int_registers_saved = true;
10911 }
10912 }
10913
10914 if (stack_realign_fp)
10915 {
10916 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10917 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10918
10919 /* The computation of the size of the re-aligned stack frame means
10920 that we must allocate the size of the register save area before
10921 performing the actual alignment. Otherwise we cannot guarantee
10922 that there's enough storage above the realignment point. */
10923 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10924 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10925 GEN_INT (m->fs.sp_offset
10926 - frame.sse_reg_save_offset),
10927 -1, false);
10928
10929 /* Align the stack. */
10930 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10931 stack_pointer_rtx,
10932 GEN_INT (-align_bytes)));
10933
10934 /* For the purposes of register save area addressing, the stack
10935 pointer is no longer valid. As for the value of sp_offset,
10936 see ix86_compute_frame_layout, which we need to match in order
10937 to pass verification of stack_pointer_offset at the end. */
10938 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10939 m->fs.sp_valid = false;
10940 }
10941
10942 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10943
10944 if (flag_stack_usage_info)
10945 {
10946 /* We start to count from ARG_POINTER. */
10947 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10948
10949 /* If it was realigned, take into account the fake frame. */
10950 if (stack_realign_drap)
10951 {
10952 if (ix86_static_chain_on_stack)
10953 stack_size += UNITS_PER_WORD;
10954
10955 if (!call_used_regs[REGNO (crtl->drap_reg)])
10956 stack_size += UNITS_PER_WORD;
10957
10958 /* This over-estimates by 1 minimal-stack-alignment-unit but
10959 mitigates that by counting in the new return address slot. */
10960 current_function_dynamic_stack_size
10961 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10962 }
10963
10964 current_function_static_stack_size = stack_size;
10965 }
10966
10967 /* On SEH target with very large frame size, allocate an area to save
10968 SSE registers (as the very large allocation won't be described). */
10969 if (TARGET_SEH
10970 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
10971 && !sse_registers_saved)
10972 {
10973 HOST_WIDE_INT sse_size =
10974 frame.sse_reg_save_offset - frame.reg_save_offset;
10975
10976 gcc_assert (int_registers_saved);
10977
10978 /* No need to do stack checking as the area will be immediately
10979 written. */
10980 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10981 GEN_INT (-sse_size), -1,
10982 m->fs.cfa_reg == stack_pointer_rtx);
10983 allocate -= sse_size;
10984 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10985 sse_registers_saved = true;
10986 }
10987
10988 /* The stack has already been decremented by the instruction calling us
10989 so probe if the size is non-negative to preserve the protection area. */
10990 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
10991 {
10992 /* We expect the registers to be saved when probes are used. */
10993 gcc_assert (int_registers_saved);
10994
10995 if (STACK_CHECK_MOVING_SP)
10996 {
10997 if (!(crtl->is_leaf && !cfun->calls_alloca
10998 && allocate <= PROBE_INTERVAL))
10999 {
11000 ix86_adjust_stack_and_probe (allocate);
11001 allocate = 0;
11002 }
11003 }
11004 else
11005 {
11006 HOST_WIDE_INT size = allocate;
11007
11008 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11009 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11010
11011 if (TARGET_STACK_PROBE)
11012 {
11013 if (crtl->is_leaf && !cfun->calls_alloca)
11014 {
11015 if (size > PROBE_INTERVAL)
11016 ix86_emit_probe_stack_range (0, size);
11017 }
11018 else
11019 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11020 }
11021 else
11022 {
11023 if (crtl->is_leaf && !cfun->calls_alloca)
11024 {
11025 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11026 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11027 size - STACK_CHECK_PROTECT);
11028 }
11029 else
11030 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11031 }
11032 }
11033 }
11034
11035 if (allocate == 0)
11036 ;
11037 else if (!ix86_target_stack_probe ()
11038 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11039 {
11040 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11041 GEN_INT (-allocate), -1,
11042 m->fs.cfa_reg == stack_pointer_rtx);
11043 }
11044 else
11045 {
11046 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11047 rtx r10 = NULL;
11048 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11049 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11050 bool eax_live = ix86_eax_live_at_start_p ();
11051 bool r10_live = false;
11052
11053 if (TARGET_64BIT)
11054 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11055
11056 if (eax_live)
11057 {
11058 insn = emit_insn (gen_push (eax));
11059 allocate -= UNITS_PER_WORD;
11060 /* Note that SEH directives need to continue tracking the stack
11061 pointer even after the frame pointer has been set up. */
11062 if (sp_is_cfa_reg || TARGET_SEH)
11063 {
11064 if (sp_is_cfa_reg)
11065 m->fs.cfa_offset += UNITS_PER_WORD;
11066 RTX_FRAME_RELATED_P (insn) = 1;
11067 }
11068 }
11069
11070 if (r10_live)
11071 {
11072 r10 = gen_rtx_REG (Pmode, R10_REG);
11073 insn = emit_insn (gen_push (r10));
11074 allocate -= UNITS_PER_WORD;
11075 if (sp_is_cfa_reg || TARGET_SEH)
11076 {
11077 if (sp_is_cfa_reg)
11078 m->fs.cfa_offset += UNITS_PER_WORD;
11079 RTX_FRAME_RELATED_P (insn) = 1;
11080 }
11081 }
11082
11083 emit_move_insn (eax, GEN_INT (allocate));
11084 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11085
11086 /* Use the fact that AX still contains ALLOCATE. */
11087 adjust_stack_insn = (Pmode == DImode
11088 ? gen_pro_epilogue_adjust_stack_di_sub
11089 : gen_pro_epilogue_adjust_stack_si_sub);
11090
11091 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11092 stack_pointer_rtx, eax));
11093
11094 if (sp_is_cfa_reg || TARGET_SEH)
11095 {
11096 if (sp_is_cfa_reg)
11097 m->fs.cfa_offset += allocate;
11098 RTX_FRAME_RELATED_P (insn) = 1;
11099 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11100 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11101 plus_constant (Pmode, stack_pointer_rtx,
11102 -allocate)));
11103 }
11104 m->fs.sp_offset += allocate;
11105
11106 /* Use stack_pointer_rtx for relative addressing so that code
11107 works for realigned stack, too. */
11108 if (r10_live && eax_live)
11109 {
11110 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11111 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11112 gen_frame_mem (word_mode, t));
11113 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11114 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11115 gen_frame_mem (word_mode, t));
11116 }
11117 else if (eax_live || r10_live)
11118 {
11119 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11120 emit_move_insn (gen_rtx_REG (word_mode,
11121 (eax_live ? AX_REG : R10_REG)),
11122 gen_frame_mem (word_mode, t));
11123 }
11124 }
11125 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11126
11127 /* If we havn't already set up the frame pointer, do so now. */
11128 if (frame_pointer_needed && !m->fs.fp_valid)
11129 {
11130 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11131 GEN_INT (frame.stack_pointer_offset
11132 - frame.hard_frame_pointer_offset));
11133 insn = emit_insn (insn);
11134 RTX_FRAME_RELATED_P (insn) = 1;
11135 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11136
11137 if (m->fs.cfa_reg == stack_pointer_rtx)
11138 m->fs.cfa_reg = hard_frame_pointer_rtx;
11139 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11140 m->fs.fp_valid = true;
11141 }
11142
11143 if (!int_registers_saved)
11144 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11145 if (!sse_registers_saved)
11146 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11147
11148 pic_reg_used = false;
11149 /* We don't use pic-register for pe-coff target. */
11150 if (pic_offset_table_rtx
11151 && !TARGET_PECOFF
11152 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
11153 || crtl->profile))
11154 {
11155 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
11156
11157 if (alt_pic_reg_used != INVALID_REGNUM)
11158 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
11159
11160 pic_reg_used = true;
11161 }
11162
11163 if (pic_reg_used)
11164 {
11165 if (TARGET_64BIT)
11166 {
11167 if (ix86_cmodel == CM_LARGE_PIC)
11168 {
11169 rtx label, tmp_reg;
11170
11171 gcc_assert (Pmode == DImode);
11172 label = gen_label_rtx ();
11173 emit_label (label);
11174 LABEL_PRESERVE_P (label) = 1;
11175 tmp_reg = gen_rtx_REG (Pmode, R11_REG);
11176 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
11177 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
11178 label));
11179 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
11180 insn = emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
11181 pic_offset_table_rtx, tmp_reg));
11182 }
11183 else
11184 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
11185 }
11186 else
11187 {
11188 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
11189 RTX_FRAME_RELATED_P (insn) = 1;
11190 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11191 }
11192 }
11193
11194 /* In the pic_reg_used case, make sure that the got load isn't deleted
11195 when mcount needs it. Blockage to avoid call movement across mcount
11196 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
11197 note. */
11198 if (crtl->profile && !flag_fentry && pic_reg_used)
11199 emit_insn (gen_prologue_use (pic_offset_table_rtx));
11200
11201 if (crtl->drap_reg && !crtl->stack_realign_needed)
11202 {
11203 /* vDRAP is setup but after reload it turns out stack realign
11204 isn't necessary, here we will emit prologue to setup DRAP
11205 without stack realign adjustment */
11206 t = choose_baseaddr (0);
11207 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11208 }
11209
11210 /* Prevent instructions from being scheduled into register save push
11211 sequence when access to the redzone area is done through frame pointer.
11212 The offset between the frame pointer and the stack pointer is calculated
11213 relative to the value of the stack pointer at the end of the function
11214 prologue, and moving instructions that access redzone area via frame
11215 pointer inside push sequence violates this assumption. */
11216 if (frame_pointer_needed && frame.red_zone_size)
11217 emit_insn (gen_memory_blockage ());
11218
11219 /* Emit cld instruction if stringops are used in the function. */
11220 if (TARGET_CLD && ix86_current_function_needs_cld)
11221 emit_insn (gen_cld ());
11222
11223 /* SEH requires that the prologue end within 256 bytes of the start of
11224 the function. Prevent instruction schedules that would extend that.
11225 Further, prevent alloca modifications to the stack pointer from being
11226 combined with prologue modifications. */
11227 if (TARGET_SEH)
11228 emit_insn (gen_prologue_use (stack_pointer_rtx));
11229 }
11230
11231 /* Emit code to restore REG using a POP insn. */
11232
11233 static void
11234 ix86_emit_restore_reg_using_pop (rtx reg)
11235 {
11236 struct machine_function *m = cfun->machine;
11237 rtx insn = emit_insn (gen_pop (reg));
11238
11239 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11240 m->fs.sp_offset -= UNITS_PER_WORD;
11241
11242 if (m->fs.cfa_reg == crtl->drap_reg
11243 && REGNO (reg) == REGNO (crtl->drap_reg))
11244 {
11245 /* Previously we'd represented the CFA as an expression
11246 like *(%ebp - 8). We've just popped that value from
11247 the stack, which means we need to reset the CFA to
11248 the drap register. This will remain until we restore
11249 the stack pointer. */
11250 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11251 RTX_FRAME_RELATED_P (insn) = 1;
11252
11253 /* This means that the DRAP register is valid for addressing too. */
11254 m->fs.drap_valid = true;
11255 return;
11256 }
11257
11258 if (m->fs.cfa_reg == stack_pointer_rtx)
11259 {
11260 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11261 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11262 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11263 RTX_FRAME_RELATED_P (insn) = 1;
11264
11265 m->fs.cfa_offset -= UNITS_PER_WORD;
11266 }
11267
11268 /* When the frame pointer is the CFA, and we pop it, we are
11269 swapping back to the stack pointer as the CFA. This happens
11270 for stack frames that don't allocate other data, so we assume
11271 the stack pointer is now pointing at the return address, i.e.
11272 the function entry state, which makes the offset be 1 word. */
11273 if (reg == hard_frame_pointer_rtx)
11274 {
11275 m->fs.fp_valid = false;
11276 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11277 {
11278 m->fs.cfa_reg = stack_pointer_rtx;
11279 m->fs.cfa_offset -= UNITS_PER_WORD;
11280
11281 add_reg_note (insn, REG_CFA_DEF_CFA,
11282 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11283 GEN_INT (m->fs.cfa_offset)));
11284 RTX_FRAME_RELATED_P (insn) = 1;
11285 }
11286 }
11287 }
11288
11289 /* Emit code to restore saved registers using POP insns. */
11290
11291 static void
11292 ix86_emit_restore_regs_using_pop (void)
11293 {
11294 unsigned int regno;
11295
11296 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11297 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11298 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11299 }
11300
11301 /* Emit code and notes for the LEAVE instruction. */
11302
11303 static void
11304 ix86_emit_leave (void)
11305 {
11306 struct machine_function *m = cfun->machine;
11307 rtx insn = emit_insn (ix86_gen_leave ());
11308
11309 ix86_add_queued_cfa_restore_notes (insn);
11310
11311 gcc_assert (m->fs.fp_valid);
11312 m->fs.sp_valid = true;
11313 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11314 m->fs.fp_valid = false;
11315
11316 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11317 {
11318 m->fs.cfa_reg = stack_pointer_rtx;
11319 m->fs.cfa_offset = m->fs.sp_offset;
11320
11321 add_reg_note (insn, REG_CFA_DEF_CFA,
11322 plus_constant (Pmode, stack_pointer_rtx,
11323 m->fs.sp_offset));
11324 RTX_FRAME_RELATED_P (insn) = 1;
11325 }
11326 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11327 m->fs.fp_offset);
11328 }
11329
11330 /* Emit code to restore saved registers using MOV insns.
11331 First register is restored from CFA - CFA_OFFSET. */
11332 static void
11333 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11334 bool maybe_eh_return)
11335 {
11336 struct machine_function *m = cfun->machine;
11337 unsigned int regno;
11338
11339 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11340 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11341 {
11342 rtx reg = gen_rtx_REG (word_mode, regno);
11343 rtx insn, mem;
11344
11345 mem = choose_baseaddr (cfa_offset);
11346 mem = gen_frame_mem (word_mode, mem);
11347 insn = emit_move_insn (reg, mem);
11348
11349 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11350 {
11351 /* Previously we'd represented the CFA as an expression
11352 like *(%ebp - 8). We've just popped that value from
11353 the stack, which means we need to reset the CFA to
11354 the drap register. This will remain until we restore
11355 the stack pointer. */
11356 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11357 RTX_FRAME_RELATED_P (insn) = 1;
11358
11359 /* This means that the DRAP register is valid for addressing. */
11360 m->fs.drap_valid = true;
11361 }
11362 else
11363 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11364
11365 cfa_offset -= UNITS_PER_WORD;
11366 }
11367 }
11368
11369 /* Emit code to restore saved registers using MOV insns.
11370 First register is restored from CFA - CFA_OFFSET. */
11371 static void
11372 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11373 bool maybe_eh_return)
11374 {
11375 unsigned int regno;
11376
11377 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11378 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11379 {
11380 rtx reg = gen_rtx_REG (V4SFmode, regno);
11381 rtx mem;
11382
11383 mem = choose_baseaddr (cfa_offset);
11384 mem = gen_rtx_MEM (V4SFmode, mem);
11385 set_mem_align (mem, 128);
11386 emit_move_insn (reg, mem);
11387
11388 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11389
11390 cfa_offset -= 16;
11391 }
11392 }
11393
11394 /* Restore function stack, frame, and registers. */
11395
11396 void
11397 ix86_expand_epilogue (int style)
11398 {
11399 struct machine_function *m = cfun->machine;
11400 struct machine_frame_state frame_state_save = m->fs;
11401 struct ix86_frame frame;
11402 bool restore_regs_via_mov;
11403 bool using_drap;
11404
11405 ix86_finalize_stack_realign_flags ();
11406 ix86_compute_frame_layout (&frame);
11407
11408 m->fs.sp_valid = (!frame_pointer_needed
11409 || (crtl->sp_is_unchanging
11410 && !stack_realign_fp));
11411 gcc_assert (!m->fs.sp_valid
11412 || m->fs.sp_offset == frame.stack_pointer_offset);
11413
11414 /* The FP must be valid if the frame pointer is present. */
11415 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11416 gcc_assert (!m->fs.fp_valid
11417 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11418
11419 /* We must have *some* valid pointer to the stack frame. */
11420 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11421
11422 /* The DRAP is never valid at this point. */
11423 gcc_assert (!m->fs.drap_valid);
11424
11425 /* See the comment about red zone and frame
11426 pointer usage in ix86_expand_prologue. */
11427 if (frame_pointer_needed && frame.red_zone_size)
11428 emit_insn (gen_memory_blockage ());
11429
11430 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11431 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11432
11433 /* Determine the CFA offset of the end of the red-zone. */
11434 m->fs.red_zone_offset = 0;
11435 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11436 {
11437 /* The red-zone begins below the return address. */
11438 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11439
11440 /* When the register save area is in the aligned portion of
11441 the stack, determine the maximum runtime displacement that
11442 matches up with the aligned frame. */
11443 if (stack_realign_drap)
11444 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
11445 + UNITS_PER_WORD);
11446 }
11447
11448 /* Special care must be taken for the normal return case of a function
11449 using eh_return: the eax and edx registers are marked as saved, but
11450 not restored along this path. Adjust the save location to match. */
11451 if (crtl->calls_eh_return && style != 2)
11452 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
11453
11454 /* EH_RETURN requires the use of moves to function properly. */
11455 if (crtl->calls_eh_return)
11456 restore_regs_via_mov = true;
11457 /* SEH requires the use of pops to identify the epilogue. */
11458 else if (TARGET_SEH)
11459 restore_regs_via_mov = false;
11460 /* If we're only restoring one register and sp is not valid then
11461 using a move instruction to restore the register since it's
11462 less work than reloading sp and popping the register. */
11463 else if (!m->fs.sp_valid && frame.nregs <= 1)
11464 restore_regs_via_mov = true;
11465 else if (TARGET_EPILOGUE_USING_MOVE
11466 && cfun->machine->use_fast_prologue_epilogue
11467 && (frame.nregs > 1
11468 || m->fs.sp_offset != frame.reg_save_offset))
11469 restore_regs_via_mov = true;
11470 else if (frame_pointer_needed
11471 && !frame.nregs
11472 && m->fs.sp_offset != frame.reg_save_offset)
11473 restore_regs_via_mov = true;
11474 else if (frame_pointer_needed
11475 && TARGET_USE_LEAVE
11476 && cfun->machine->use_fast_prologue_epilogue
11477 && frame.nregs == 1)
11478 restore_regs_via_mov = true;
11479 else
11480 restore_regs_via_mov = false;
11481
11482 if (restore_regs_via_mov || frame.nsseregs)
11483 {
11484 /* Ensure that the entire register save area is addressable via
11485 the stack pointer, if we will restore via sp. */
11486 if (TARGET_64BIT
11487 && m->fs.sp_offset > 0x7fffffff
11488 && !(m->fs.fp_valid || m->fs.drap_valid)
11489 && (frame.nsseregs + frame.nregs) != 0)
11490 {
11491 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11492 GEN_INT (m->fs.sp_offset
11493 - frame.sse_reg_save_offset),
11494 style,
11495 m->fs.cfa_reg == stack_pointer_rtx);
11496 }
11497 }
11498
11499 /* If there are any SSE registers to restore, then we have to do it
11500 via moves, since there's obviously no pop for SSE regs. */
11501 if (frame.nsseregs)
11502 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11503 style == 2);
11504
11505 if (restore_regs_via_mov)
11506 {
11507 rtx t;
11508
11509 if (frame.nregs)
11510 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11511
11512 /* eh_return epilogues need %ecx added to the stack pointer. */
11513 if (style == 2)
11514 {
11515 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11516
11517 /* Stack align doesn't work with eh_return. */
11518 gcc_assert (!stack_realign_drap);
11519 /* Neither does regparm nested functions. */
11520 gcc_assert (!ix86_static_chain_on_stack);
11521
11522 if (frame_pointer_needed)
11523 {
11524 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11525 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
11526 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11527
11528 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11529 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11530
11531 /* Note that we use SA as a temporary CFA, as the return
11532 address is at the proper place relative to it. We
11533 pretend this happens at the FP restore insn because
11534 prior to this insn the FP would be stored at the wrong
11535 offset relative to SA, and after this insn we have no
11536 other reasonable register to use for the CFA. We don't
11537 bother resetting the CFA to the SP for the duration of
11538 the return insn. */
11539 add_reg_note (insn, REG_CFA_DEF_CFA,
11540 plus_constant (Pmode, sa, UNITS_PER_WORD));
11541 ix86_add_queued_cfa_restore_notes (insn);
11542 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11543 RTX_FRAME_RELATED_P (insn) = 1;
11544
11545 m->fs.cfa_reg = sa;
11546 m->fs.cfa_offset = UNITS_PER_WORD;
11547 m->fs.fp_valid = false;
11548
11549 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11550 const0_rtx, style, false);
11551 }
11552 else
11553 {
11554 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11555 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
11556 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11557 ix86_add_queued_cfa_restore_notes (insn);
11558
11559 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
11560 if (m->fs.cfa_offset != UNITS_PER_WORD)
11561 {
11562 m->fs.cfa_offset = UNITS_PER_WORD;
11563 add_reg_note (insn, REG_CFA_DEF_CFA,
11564 plus_constant (Pmode, stack_pointer_rtx,
11565 UNITS_PER_WORD));
11566 RTX_FRAME_RELATED_P (insn) = 1;
11567 }
11568 }
11569 m->fs.sp_offset = UNITS_PER_WORD;
11570 m->fs.sp_valid = true;
11571 }
11572 }
11573 else
11574 {
11575 /* SEH requires that the function end with (1) a stack adjustment
11576 if necessary, (2) a sequence of pops, and (3) a return or
11577 jump instruction. Prevent insns from the function body from
11578 being scheduled into this sequence. */
11579 if (TARGET_SEH)
11580 {
11581 /* Prevent a catch region from being adjacent to the standard
11582 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11583 several other flags that would be interesting to test are
11584 not yet set up. */
11585 if (flag_non_call_exceptions)
11586 emit_insn (gen_nops (const1_rtx));
11587 else
11588 emit_insn (gen_blockage ());
11589 }
11590
11591 /* First step is to deallocate the stack frame so that we can
11592 pop the registers. Also do it on SEH target for very large
11593 frame as the emitted instructions aren't allowed by the ABI in
11594 epilogues. */
11595 if (!m->fs.sp_valid
11596 || (TARGET_SEH
11597 && (m->fs.sp_offset - frame.reg_save_offset
11598 >= SEH_MAX_FRAME_SIZE)))
11599 {
11600 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
11601 GEN_INT (m->fs.fp_offset
11602 - frame.reg_save_offset),
11603 style, false);
11604 }
11605 else if (m->fs.sp_offset != frame.reg_save_offset)
11606 {
11607 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11608 GEN_INT (m->fs.sp_offset
11609 - frame.reg_save_offset),
11610 style,
11611 m->fs.cfa_reg == stack_pointer_rtx);
11612 }
11613
11614 ix86_emit_restore_regs_using_pop ();
11615 }
11616
11617 /* If we used a stack pointer and haven't already got rid of it,
11618 then do so now. */
11619 if (m->fs.fp_valid)
11620 {
11621 /* If the stack pointer is valid and pointing at the frame
11622 pointer store address, then we only need a pop. */
11623 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
11624 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11625 /* Leave results in shorter dependency chains on CPUs that are
11626 able to grok it fast. */
11627 else if (TARGET_USE_LEAVE
11628 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
11629 || !cfun->machine->use_fast_prologue_epilogue)
11630 ix86_emit_leave ();
11631 else
11632 {
11633 pro_epilogue_adjust_stack (stack_pointer_rtx,
11634 hard_frame_pointer_rtx,
11635 const0_rtx, style, !using_drap);
11636 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11637 }
11638 }
11639
11640 if (using_drap)
11641 {
11642 int param_ptr_offset = UNITS_PER_WORD;
11643 rtx insn;
11644
11645 gcc_assert (stack_realign_drap);
11646
11647 if (ix86_static_chain_on_stack)
11648 param_ptr_offset += UNITS_PER_WORD;
11649 if (!call_used_regs[REGNO (crtl->drap_reg)])
11650 param_ptr_offset += UNITS_PER_WORD;
11651
11652 insn = emit_insn (gen_rtx_SET
11653 (VOIDmode, stack_pointer_rtx,
11654 gen_rtx_PLUS (Pmode,
11655 crtl->drap_reg,
11656 GEN_INT (-param_ptr_offset))));
11657 m->fs.cfa_reg = stack_pointer_rtx;
11658 m->fs.cfa_offset = param_ptr_offset;
11659 m->fs.sp_offset = param_ptr_offset;
11660 m->fs.realigned = false;
11661
11662 add_reg_note (insn, REG_CFA_DEF_CFA,
11663 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11664 GEN_INT (param_ptr_offset)));
11665 RTX_FRAME_RELATED_P (insn) = 1;
11666
11667 if (!call_used_regs[REGNO (crtl->drap_reg)])
11668 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
11669 }
11670
11671 /* At this point the stack pointer must be valid, and we must have
11672 restored all of the registers. We may not have deallocated the
11673 entire stack frame. We've delayed this until now because it may
11674 be possible to merge the local stack deallocation with the
11675 deallocation forced by ix86_static_chain_on_stack. */
11676 gcc_assert (m->fs.sp_valid);
11677 gcc_assert (!m->fs.fp_valid);
11678 gcc_assert (!m->fs.realigned);
11679 if (m->fs.sp_offset != UNITS_PER_WORD)
11680 {
11681 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11682 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
11683 style, true);
11684 }
11685 else
11686 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11687
11688 /* Sibcall epilogues don't want a return instruction. */
11689 if (style == 0)
11690 {
11691 m->fs = frame_state_save;
11692 return;
11693 }
11694
11695 if (crtl->args.pops_args && crtl->args.size)
11696 {
11697 rtx popc = GEN_INT (crtl->args.pops_args);
11698
11699 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11700 address, do explicit add, and jump indirectly to the caller. */
11701
11702 if (crtl->args.pops_args >= 65536)
11703 {
11704 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11705 rtx insn;
11706
11707 /* There is no "pascal" calling convention in any 64bit ABI. */
11708 gcc_assert (!TARGET_64BIT);
11709
11710 insn = emit_insn (gen_pop (ecx));
11711 m->fs.cfa_offset -= UNITS_PER_WORD;
11712 m->fs.sp_offset -= UNITS_PER_WORD;
11713
11714 add_reg_note (insn, REG_CFA_ADJUST_CFA,
11715 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
11716 add_reg_note (insn, REG_CFA_REGISTER,
11717 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11718 RTX_FRAME_RELATED_P (insn) = 1;
11719
11720 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11721 popc, -1, true);
11722 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
11723 }
11724 else
11725 emit_jump_insn (gen_simple_return_pop_internal (popc));
11726 }
11727 else
11728 emit_jump_insn (gen_simple_return_internal ());
11729
11730 /* Restore the state back to the state from the prologue,
11731 so that it's correct for the next epilogue. */
11732 m->fs = frame_state_save;
11733 }
11734
11735 /* Reset from the function's potential modifications. */
11736
11737 static void
11738 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11739 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
11740 {
11741 if (pic_offset_table_rtx)
11742 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11743 #if TARGET_MACHO
11744 /* Mach-O doesn't support labels at the end of objects, so if
11745 it looks like we might want one, insert a NOP. */
11746 {
11747 rtx insn = get_last_insn ();
11748 rtx deleted_debug_label = NULL_RTX;
11749 while (insn
11750 && NOTE_P (insn)
11751 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11752 {
11753 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11754 notes only, instead set their CODE_LABEL_NUMBER to -1,
11755 otherwise there would be code generation differences
11756 in between -g and -g0. */
11757 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11758 deleted_debug_label = insn;
11759 insn = PREV_INSN (insn);
11760 }
11761 if (insn
11762 && (LABEL_P (insn)
11763 || (NOTE_P (insn)
11764 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11765 fputs ("\tnop\n", file);
11766 else if (deleted_debug_label)
11767 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
11768 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11769 CODE_LABEL_NUMBER (insn) = -1;
11770 }
11771 #endif
11772
11773 }
11774
11775 /* Return a scratch register to use in the split stack prologue. The
11776 split stack prologue is used for -fsplit-stack. It is the first
11777 instructions in the function, even before the regular prologue.
11778 The scratch register can be any caller-saved register which is not
11779 used for parameters or for the static chain. */
11780
11781 static unsigned int
11782 split_stack_prologue_scratch_regno (void)
11783 {
11784 if (TARGET_64BIT)
11785 return R11_REG;
11786 else
11787 {
11788 bool is_fastcall, is_thiscall;
11789 int regparm;
11790
11791 is_fastcall = (lookup_attribute ("fastcall",
11792 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11793 != NULL);
11794 is_thiscall = (lookup_attribute ("thiscall",
11795 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11796 != NULL);
11797 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11798
11799 if (is_fastcall)
11800 {
11801 if (DECL_STATIC_CHAIN (cfun->decl))
11802 {
11803 sorry ("-fsplit-stack does not support fastcall with "
11804 "nested function");
11805 return INVALID_REGNUM;
11806 }
11807 return AX_REG;
11808 }
11809 else if (is_thiscall)
11810 {
11811 if (!DECL_STATIC_CHAIN (cfun->decl))
11812 return DX_REG;
11813 return AX_REG;
11814 }
11815 else if (regparm < 3)
11816 {
11817 if (!DECL_STATIC_CHAIN (cfun->decl))
11818 return CX_REG;
11819 else
11820 {
11821 if (regparm >= 2)
11822 {
11823 sorry ("-fsplit-stack does not support 2 register "
11824 " parameters for a nested function");
11825 return INVALID_REGNUM;
11826 }
11827 return DX_REG;
11828 }
11829 }
11830 else
11831 {
11832 /* FIXME: We could make this work by pushing a register
11833 around the addition and comparison. */
11834 sorry ("-fsplit-stack does not support 3 register parameters");
11835 return INVALID_REGNUM;
11836 }
11837 }
11838 }
11839
11840 /* A SYMBOL_REF for the function which allocates new stackspace for
11841 -fsplit-stack. */
11842
11843 static GTY(()) rtx split_stack_fn;
11844
11845 /* A SYMBOL_REF for the more stack function when using the large
11846 model. */
11847
11848 static GTY(()) rtx split_stack_fn_large;
11849
11850 /* Handle -fsplit-stack. These are the first instructions in the
11851 function, even before the regular prologue. */
11852
11853 void
11854 ix86_expand_split_stack_prologue (void)
11855 {
11856 struct ix86_frame frame;
11857 HOST_WIDE_INT allocate;
11858 unsigned HOST_WIDE_INT args_size;
11859 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11860 rtx scratch_reg = NULL_RTX;
11861 rtx varargs_label = NULL_RTX;
11862 rtx fn;
11863
11864 gcc_assert (flag_split_stack && reload_completed);
11865
11866 ix86_finalize_stack_realign_flags ();
11867 ix86_compute_frame_layout (&frame);
11868 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11869
11870 /* This is the label we will branch to if we have enough stack
11871 space. We expect the basic block reordering pass to reverse this
11872 branch if optimizing, so that we branch in the unlikely case. */
11873 label = gen_label_rtx ();
11874
11875 /* We need to compare the stack pointer minus the frame size with
11876 the stack boundary in the TCB. The stack boundary always gives
11877 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11878 can compare directly. Otherwise we need to do an addition. */
11879
11880 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11881 UNSPEC_STACK_CHECK);
11882 limit = gen_rtx_CONST (Pmode, limit);
11883 limit = gen_rtx_MEM (Pmode, limit);
11884 if (allocate < SPLIT_STACK_AVAILABLE)
11885 current = stack_pointer_rtx;
11886 else
11887 {
11888 unsigned int scratch_regno;
11889 rtx offset;
11890
11891 /* We need a scratch register to hold the stack pointer minus
11892 the required frame size. Since this is the very start of the
11893 function, the scratch register can be any caller-saved
11894 register which is not used for parameters. */
11895 offset = GEN_INT (- allocate);
11896 scratch_regno = split_stack_prologue_scratch_regno ();
11897 if (scratch_regno == INVALID_REGNUM)
11898 return;
11899 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11900 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11901 {
11902 /* We don't use ix86_gen_add3 in this case because it will
11903 want to split to lea, but when not optimizing the insn
11904 will not be split after this point. */
11905 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11906 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11907 offset)));
11908 }
11909 else
11910 {
11911 emit_move_insn (scratch_reg, offset);
11912 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
11913 stack_pointer_rtx));
11914 }
11915 current = scratch_reg;
11916 }
11917
11918 ix86_expand_branch (GEU, current, limit, label);
11919 jump_insn = get_last_insn ();
11920 JUMP_LABEL (jump_insn) = label;
11921
11922 /* Mark the jump as very likely to be taken. */
11923 add_int_reg_note (jump_insn, REG_BR_PROB,
11924 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
11925
11926 if (split_stack_fn == NULL_RTX)
11927 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11928 fn = split_stack_fn;
11929
11930 /* Get more stack space. We pass in the desired stack space and the
11931 size of the arguments to copy to the new stack. In 32-bit mode
11932 we push the parameters; __morestack will return on a new stack
11933 anyhow. In 64-bit mode we pass the parameters in r10 and
11934 r11. */
11935 allocate_rtx = GEN_INT (allocate);
11936 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11937 call_fusage = NULL_RTX;
11938 if (TARGET_64BIT)
11939 {
11940 rtx reg10, reg11;
11941
11942 reg10 = gen_rtx_REG (Pmode, R10_REG);
11943 reg11 = gen_rtx_REG (Pmode, R11_REG);
11944
11945 /* If this function uses a static chain, it will be in %r10.
11946 Preserve it across the call to __morestack. */
11947 if (DECL_STATIC_CHAIN (cfun->decl))
11948 {
11949 rtx rax;
11950
11951 rax = gen_rtx_REG (word_mode, AX_REG);
11952 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
11953 use_reg (&call_fusage, rax);
11954 }
11955
11956 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
11957 && !TARGET_PECOFF)
11958 {
11959 HOST_WIDE_INT argval;
11960
11961 gcc_assert (Pmode == DImode);
11962 /* When using the large model we need to load the address
11963 into a register, and we've run out of registers. So we
11964 switch to a different calling convention, and we call a
11965 different function: __morestack_large. We pass the
11966 argument size in the upper 32 bits of r10 and pass the
11967 frame size in the lower 32 bits. */
11968 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
11969 gcc_assert ((args_size & 0xffffffff) == args_size);
11970
11971 if (split_stack_fn_large == NULL_RTX)
11972 split_stack_fn_large =
11973 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
11974
11975 if (ix86_cmodel == CM_LARGE_PIC)
11976 {
11977 rtx label, x;
11978
11979 label = gen_label_rtx ();
11980 emit_label (label);
11981 LABEL_PRESERVE_P (label) = 1;
11982 emit_insn (gen_set_rip_rex64 (reg10, label));
11983 emit_insn (gen_set_got_offset_rex64 (reg11, label));
11984 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
11985 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
11986 UNSPEC_GOT);
11987 x = gen_rtx_CONST (Pmode, x);
11988 emit_move_insn (reg11, x);
11989 x = gen_rtx_PLUS (Pmode, reg10, reg11);
11990 x = gen_const_mem (Pmode, x);
11991 emit_move_insn (reg11, x);
11992 }
11993 else
11994 emit_move_insn (reg11, split_stack_fn_large);
11995
11996 fn = reg11;
11997
11998 argval = ((args_size << 16) << 16) + allocate;
11999 emit_move_insn (reg10, GEN_INT (argval));
12000 }
12001 else
12002 {
12003 emit_move_insn (reg10, allocate_rtx);
12004 emit_move_insn (reg11, GEN_INT (args_size));
12005 use_reg (&call_fusage, reg11);
12006 }
12007
12008 use_reg (&call_fusage, reg10);
12009 }
12010 else
12011 {
12012 emit_insn (gen_push (GEN_INT (args_size)));
12013 emit_insn (gen_push (allocate_rtx));
12014 }
12015 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12016 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12017 NULL_RTX, false);
12018 add_function_usage_to (call_insn, call_fusage);
12019
12020 /* In order to make call/return prediction work right, we now need
12021 to execute a return instruction. See
12022 libgcc/config/i386/morestack.S for the details on how this works.
12023
12024 For flow purposes gcc must not see this as a return
12025 instruction--we need control flow to continue at the subsequent
12026 label. Therefore, we use an unspec. */
12027 gcc_assert (crtl->args.pops_args < 65536);
12028 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12029
12030 /* If we are in 64-bit mode and this function uses a static chain,
12031 we saved %r10 in %rax before calling _morestack. */
12032 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12033 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12034 gen_rtx_REG (word_mode, AX_REG));
12035
12036 /* If this function calls va_start, we need to store a pointer to
12037 the arguments on the old stack, because they may not have been
12038 all copied to the new stack. At this point the old stack can be
12039 found at the frame pointer value used by __morestack, because
12040 __morestack has set that up before calling back to us. Here we
12041 store that pointer in a scratch register, and in
12042 ix86_expand_prologue we store the scratch register in a stack
12043 slot. */
12044 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12045 {
12046 unsigned int scratch_regno;
12047 rtx frame_reg;
12048 int words;
12049
12050 scratch_regno = split_stack_prologue_scratch_regno ();
12051 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12052 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12053
12054 /* 64-bit:
12055 fp -> old fp value
12056 return address within this function
12057 return address of caller of this function
12058 stack arguments
12059 So we add three words to get to the stack arguments.
12060
12061 32-bit:
12062 fp -> old fp value
12063 return address within this function
12064 first argument to __morestack
12065 second argument to __morestack
12066 return address of caller of this function
12067 stack arguments
12068 So we add five words to get to the stack arguments.
12069 */
12070 words = TARGET_64BIT ? 3 : 5;
12071 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12072 gen_rtx_PLUS (Pmode, frame_reg,
12073 GEN_INT (words * UNITS_PER_WORD))));
12074
12075 varargs_label = gen_label_rtx ();
12076 emit_jump_insn (gen_jump (varargs_label));
12077 JUMP_LABEL (get_last_insn ()) = varargs_label;
12078
12079 emit_barrier ();
12080 }
12081
12082 emit_label (label);
12083 LABEL_NUSES (label) = 1;
12084
12085 /* If this function calls va_start, we now have to set the scratch
12086 register for the case where we do not call __morestack. In this
12087 case we need to set it based on the stack pointer. */
12088 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12089 {
12090 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12091 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12092 GEN_INT (UNITS_PER_WORD))));
12093
12094 emit_label (varargs_label);
12095 LABEL_NUSES (varargs_label) = 1;
12096 }
12097 }
12098
12099 /* We may have to tell the dataflow pass that the split stack prologue
12100 is initializing a scratch register. */
12101
12102 static void
12103 ix86_live_on_entry (bitmap regs)
12104 {
12105 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12106 {
12107 gcc_assert (flag_split_stack);
12108 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12109 }
12110 }
12111 \f
12112 /* Extract the parts of an RTL expression that is a valid memory address
12113 for an instruction. Return 0 if the structure of the address is
12114 grossly off. Return -1 if the address contains ASHIFT, so it is not
12115 strictly valid, but still used for computing length of lea instruction. */
12116
12117 int
12118 ix86_decompose_address (rtx addr, struct ix86_address *out)
12119 {
12120 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12121 rtx base_reg, index_reg;
12122 HOST_WIDE_INT scale = 1;
12123 rtx scale_rtx = NULL_RTX;
12124 rtx tmp;
12125 int retval = 1;
12126 enum ix86_address_seg seg = SEG_DEFAULT;
12127
12128 /* Allow zero-extended SImode addresses,
12129 they will be emitted with addr32 prefix. */
12130 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12131 {
12132 if (GET_CODE (addr) == ZERO_EXTEND
12133 && GET_MODE (XEXP (addr, 0)) == SImode)
12134 {
12135 addr = XEXP (addr, 0);
12136 if (CONST_INT_P (addr))
12137 return 0;
12138 }
12139 else if (GET_CODE (addr) == AND
12140 && const_32bit_mask (XEXP (addr, 1), DImode))
12141 {
12142 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12143 if (addr == NULL_RTX)
12144 return 0;
12145
12146 if (CONST_INT_P (addr))
12147 return 0;
12148 }
12149 }
12150
12151 /* Allow SImode subregs of DImode addresses,
12152 they will be emitted with addr32 prefix. */
12153 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12154 {
12155 if (GET_CODE (addr) == SUBREG
12156 && GET_MODE (SUBREG_REG (addr)) == DImode)
12157 {
12158 addr = SUBREG_REG (addr);
12159 if (CONST_INT_P (addr))
12160 return 0;
12161 }
12162 }
12163
12164 if (REG_P (addr))
12165 base = addr;
12166 else if (GET_CODE (addr) == SUBREG)
12167 {
12168 if (REG_P (SUBREG_REG (addr)))
12169 base = addr;
12170 else
12171 return 0;
12172 }
12173 else if (GET_CODE (addr) == PLUS)
12174 {
12175 rtx addends[4], op;
12176 int n = 0, i;
12177
12178 op = addr;
12179 do
12180 {
12181 if (n >= 4)
12182 return 0;
12183 addends[n++] = XEXP (op, 1);
12184 op = XEXP (op, 0);
12185 }
12186 while (GET_CODE (op) == PLUS);
12187 if (n >= 4)
12188 return 0;
12189 addends[n] = op;
12190
12191 for (i = n; i >= 0; --i)
12192 {
12193 op = addends[i];
12194 switch (GET_CODE (op))
12195 {
12196 case MULT:
12197 if (index)
12198 return 0;
12199 index = XEXP (op, 0);
12200 scale_rtx = XEXP (op, 1);
12201 break;
12202
12203 case ASHIFT:
12204 if (index)
12205 return 0;
12206 index = XEXP (op, 0);
12207 tmp = XEXP (op, 1);
12208 if (!CONST_INT_P (tmp))
12209 return 0;
12210 scale = INTVAL (tmp);
12211 if ((unsigned HOST_WIDE_INT) scale > 3)
12212 return 0;
12213 scale = 1 << scale;
12214 break;
12215
12216 case ZERO_EXTEND:
12217 op = XEXP (op, 0);
12218 if (GET_CODE (op) != UNSPEC)
12219 return 0;
12220 /* FALLTHRU */
12221
12222 case UNSPEC:
12223 if (XINT (op, 1) == UNSPEC_TP
12224 && TARGET_TLS_DIRECT_SEG_REFS
12225 && seg == SEG_DEFAULT)
12226 seg = DEFAULT_TLS_SEG_REG;
12227 else
12228 return 0;
12229 break;
12230
12231 case SUBREG:
12232 if (!REG_P (SUBREG_REG (op)))
12233 return 0;
12234 /* FALLTHRU */
12235
12236 case REG:
12237 if (!base)
12238 base = op;
12239 else if (!index)
12240 index = op;
12241 else
12242 return 0;
12243 break;
12244
12245 case CONST:
12246 case CONST_INT:
12247 case SYMBOL_REF:
12248 case LABEL_REF:
12249 if (disp)
12250 return 0;
12251 disp = op;
12252 break;
12253
12254 default:
12255 return 0;
12256 }
12257 }
12258 }
12259 else if (GET_CODE (addr) == MULT)
12260 {
12261 index = XEXP (addr, 0); /* index*scale */
12262 scale_rtx = XEXP (addr, 1);
12263 }
12264 else if (GET_CODE (addr) == ASHIFT)
12265 {
12266 /* We're called for lea too, which implements ashift on occasion. */
12267 index = XEXP (addr, 0);
12268 tmp = XEXP (addr, 1);
12269 if (!CONST_INT_P (tmp))
12270 return 0;
12271 scale = INTVAL (tmp);
12272 if ((unsigned HOST_WIDE_INT) scale > 3)
12273 return 0;
12274 scale = 1 << scale;
12275 retval = -1;
12276 }
12277 else
12278 disp = addr; /* displacement */
12279
12280 if (index)
12281 {
12282 if (REG_P (index))
12283 ;
12284 else if (GET_CODE (index) == SUBREG
12285 && REG_P (SUBREG_REG (index)))
12286 ;
12287 else
12288 return 0;
12289 }
12290
12291 /* Extract the integral value of scale. */
12292 if (scale_rtx)
12293 {
12294 if (!CONST_INT_P (scale_rtx))
12295 return 0;
12296 scale = INTVAL (scale_rtx);
12297 }
12298
12299 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12300 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12301
12302 /* Avoid useless 0 displacement. */
12303 if (disp == const0_rtx && (base || index))
12304 disp = NULL_RTX;
12305
12306 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12307 if (base_reg && index_reg && scale == 1
12308 && (index_reg == arg_pointer_rtx
12309 || index_reg == frame_pointer_rtx
12310 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12311 {
12312 rtx tmp;
12313 tmp = base, base = index, index = tmp;
12314 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
12315 }
12316
12317 /* Special case: %ebp cannot be encoded as a base without a displacement.
12318 Similarly %r13. */
12319 if (!disp
12320 && base_reg
12321 && (base_reg == hard_frame_pointer_rtx
12322 || base_reg == frame_pointer_rtx
12323 || base_reg == arg_pointer_rtx
12324 || (REG_P (base_reg)
12325 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12326 || REGNO (base_reg) == R13_REG))))
12327 disp = const0_rtx;
12328
12329 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12330 Avoid this by transforming to [%esi+0].
12331 Reload calls address legitimization without cfun defined, so we need
12332 to test cfun for being non-NULL. */
12333 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12334 && base_reg && !index_reg && !disp
12335 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12336 disp = const0_rtx;
12337
12338 /* Special case: encode reg+reg instead of reg*2. */
12339 if (!base && index && scale == 2)
12340 base = index, base_reg = index_reg, scale = 1;
12341
12342 /* Special case: scaling cannot be encoded without base or displacement. */
12343 if (!base && !disp && index && scale != 1)
12344 disp = const0_rtx;
12345
12346 out->base = base;
12347 out->index = index;
12348 out->disp = disp;
12349 out->scale = scale;
12350 out->seg = seg;
12351
12352 return retval;
12353 }
12354 \f
12355 /* Return cost of the memory address x.
12356 For i386, it is better to use a complex address than let gcc copy
12357 the address into a reg and make a new pseudo. But not if the address
12358 requires to two regs - that would mean more pseudos with longer
12359 lifetimes. */
12360 static int
12361 ix86_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
12362 addr_space_t as ATTRIBUTE_UNUSED,
12363 bool speed ATTRIBUTE_UNUSED)
12364 {
12365 struct ix86_address parts;
12366 int cost = 1;
12367 int ok = ix86_decompose_address (x, &parts);
12368
12369 gcc_assert (ok);
12370
12371 if (parts.base && GET_CODE (parts.base) == SUBREG)
12372 parts.base = SUBREG_REG (parts.base);
12373 if (parts.index && GET_CODE (parts.index) == SUBREG)
12374 parts.index = SUBREG_REG (parts.index);
12375
12376 /* Attempt to minimize number of registers in the address. */
12377 if ((parts.base
12378 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
12379 || (parts.index
12380 && (!REG_P (parts.index)
12381 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
12382 cost++;
12383
12384 if (parts.base
12385 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12386 && parts.index
12387 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12388 && parts.base != parts.index)
12389 cost++;
12390
12391 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12392 since it's predecode logic can't detect the length of instructions
12393 and it degenerates to vector decoded. Increase cost of such
12394 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12395 to split such addresses or even refuse such addresses at all.
12396
12397 Following addressing modes are affected:
12398 [base+scale*index]
12399 [scale*index+disp]
12400 [base+index]
12401
12402 The first and last case may be avoidable by explicitly coding the zero in
12403 memory address, but I don't have AMD-K6 machine handy to check this
12404 theory. */
12405
12406 if (TARGET_K6
12407 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12408 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12409 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12410 cost += 10;
12411
12412 return cost;
12413 }
12414 \f
12415 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12416 this is used for to form addresses to local data when -fPIC is in
12417 use. */
12418
12419 static bool
12420 darwin_local_data_pic (rtx disp)
12421 {
12422 return (GET_CODE (disp) == UNSPEC
12423 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
12424 }
12425
12426 /* Determine if a given RTX is a valid constant. We already know this
12427 satisfies CONSTANT_P. */
12428
12429 static bool
12430 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
12431 {
12432 switch (GET_CODE (x))
12433 {
12434 case CONST:
12435 x = XEXP (x, 0);
12436
12437 if (GET_CODE (x) == PLUS)
12438 {
12439 if (!CONST_INT_P (XEXP (x, 1)))
12440 return false;
12441 x = XEXP (x, 0);
12442 }
12443
12444 if (TARGET_MACHO && darwin_local_data_pic (x))
12445 return true;
12446
12447 /* Only some unspecs are valid as "constants". */
12448 if (GET_CODE (x) == UNSPEC)
12449 switch (XINT (x, 1))
12450 {
12451 case UNSPEC_GOT:
12452 case UNSPEC_GOTOFF:
12453 case UNSPEC_PLTOFF:
12454 return TARGET_64BIT;
12455 case UNSPEC_TPOFF:
12456 case UNSPEC_NTPOFF:
12457 x = XVECEXP (x, 0, 0);
12458 return (GET_CODE (x) == SYMBOL_REF
12459 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12460 case UNSPEC_DTPOFF:
12461 x = XVECEXP (x, 0, 0);
12462 return (GET_CODE (x) == SYMBOL_REF
12463 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
12464 default:
12465 return false;
12466 }
12467
12468 /* We must have drilled down to a symbol. */
12469 if (GET_CODE (x) == LABEL_REF)
12470 return true;
12471 if (GET_CODE (x) != SYMBOL_REF)
12472 return false;
12473 /* FALLTHRU */
12474
12475 case SYMBOL_REF:
12476 /* TLS symbols are never valid. */
12477 if (SYMBOL_REF_TLS_MODEL (x))
12478 return false;
12479
12480 /* DLLIMPORT symbols are never valid. */
12481 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12482 && SYMBOL_REF_DLLIMPORT_P (x))
12483 return false;
12484
12485 #if TARGET_MACHO
12486 /* mdynamic-no-pic */
12487 if (MACHO_DYNAMIC_NO_PIC_P)
12488 return machopic_symbol_defined_p (x);
12489 #endif
12490 break;
12491
12492 case CONST_DOUBLE:
12493 if (GET_MODE (x) == TImode
12494 && x != CONST0_RTX (TImode)
12495 && !TARGET_64BIT)
12496 return false;
12497 break;
12498
12499 case CONST_VECTOR:
12500 if (!standard_sse_constant_p (x))
12501 return false;
12502
12503 default:
12504 break;
12505 }
12506
12507 /* Otherwise we handle everything else in the move patterns. */
12508 return true;
12509 }
12510
12511 /* Determine if it's legal to put X into the constant pool. This
12512 is not possible for the address of thread-local symbols, which
12513 is checked above. */
12514
12515 static bool
12516 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
12517 {
12518 /* We can always put integral constants and vectors in memory. */
12519 switch (GET_CODE (x))
12520 {
12521 case CONST_INT:
12522 case CONST_DOUBLE:
12523 case CONST_VECTOR:
12524 return false;
12525
12526 default:
12527 break;
12528 }
12529 return !ix86_legitimate_constant_p (mode, x);
12530 }
12531
12532 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12533 otherwise zero. */
12534
12535 static bool
12536 is_imported_p (rtx x)
12537 {
12538 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12539 || GET_CODE (x) != SYMBOL_REF)
12540 return false;
12541
12542 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
12543 }
12544
12545
12546 /* Nonzero if the constant value X is a legitimate general operand
12547 when generating PIC code. It is given that flag_pic is on and
12548 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12549
12550 bool
12551 legitimate_pic_operand_p (rtx x)
12552 {
12553 rtx inner;
12554
12555 switch (GET_CODE (x))
12556 {
12557 case CONST:
12558 inner = XEXP (x, 0);
12559 if (GET_CODE (inner) == PLUS
12560 && CONST_INT_P (XEXP (inner, 1)))
12561 inner = XEXP (inner, 0);
12562
12563 /* Only some unspecs are valid as "constants". */
12564 if (GET_CODE (inner) == UNSPEC)
12565 switch (XINT (inner, 1))
12566 {
12567 case UNSPEC_GOT:
12568 case UNSPEC_GOTOFF:
12569 case UNSPEC_PLTOFF:
12570 return TARGET_64BIT;
12571 case UNSPEC_TPOFF:
12572 x = XVECEXP (inner, 0, 0);
12573 return (GET_CODE (x) == SYMBOL_REF
12574 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12575 case UNSPEC_MACHOPIC_OFFSET:
12576 return legitimate_pic_address_disp_p (x);
12577 default:
12578 return false;
12579 }
12580 /* FALLTHRU */
12581
12582 case SYMBOL_REF:
12583 case LABEL_REF:
12584 return legitimate_pic_address_disp_p (x);
12585
12586 default:
12587 return true;
12588 }
12589 }
12590
12591 /* Determine if a given CONST RTX is a valid memory displacement
12592 in PIC mode. */
12593
12594 bool
12595 legitimate_pic_address_disp_p (rtx disp)
12596 {
12597 bool saw_plus;
12598
12599 /* In 64bit mode we can allow direct addresses of symbols and labels
12600 when they are not dynamic symbols. */
12601 if (TARGET_64BIT)
12602 {
12603 rtx op0 = disp, op1;
12604
12605 switch (GET_CODE (disp))
12606 {
12607 case LABEL_REF:
12608 return true;
12609
12610 case CONST:
12611 if (GET_CODE (XEXP (disp, 0)) != PLUS)
12612 break;
12613 op0 = XEXP (XEXP (disp, 0), 0);
12614 op1 = XEXP (XEXP (disp, 0), 1);
12615 if (!CONST_INT_P (op1)
12616 || INTVAL (op1) >= 16*1024*1024
12617 || INTVAL (op1) < -16*1024*1024)
12618 break;
12619 if (GET_CODE (op0) == LABEL_REF)
12620 return true;
12621 if (GET_CODE (op0) == CONST
12622 && GET_CODE (XEXP (op0, 0)) == UNSPEC
12623 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
12624 return true;
12625 if (GET_CODE (op0) == UNSPEC
12626 && XINT (op0, 1) == UNSPEC_PCREL)
12627 return true;
12628 if (GET_CODE (op0) != SYMBOL_REF)
12629 break;
12630 /* FALLTHRU */
12631
12632 case SYMBOL_REF:
12633 /* TLS references should always be enclosed in UNSPEC.
12634 The dllimported symbol needs always to be resolved. */
12635 if (SYMBOL_REF_TLS_MODEL (op0)
12636 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
12637 return false;
12638
12639 if (TARGET_PECOFF)
12640 {
12641 if (is_imported_p (op0))
12642 return true;
12643
12644 if (SYMBOL_REF_FAR_ADDR_P (op0)
12645 || !SYMBOL_REF_LOCAL_P (op0))
12646 break;
12647
12648 /* Function-symbols need to be resolved only for
12649 large-model.
12650 For the small-model we don't need to resolve anything
12651 here. */
12652 if ((ix86_cmodel != CM_LARGE_PIC
12653 && SYMBOL_REF_FUNCTION_P (op0))
12654 || ix86_cmodel == CM_SMALL_PIC)
12655 return true;
12656 /* Non-external symbols don't need to be resolved for
12657 large, and medium-model. */
12658 if ((ix86_cmodel == CM_LARGE_PIC
12659 || ix86_cmodel == CM_MEDIUM_PIC)
12660 && !SYMBOL_REF_EXTERNAL_P (op0))
12661 return true;
12662 }
12663 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
12664 && SYMBOL_REF_LOCAL_P (op0)
12665 && ix86_cmodel != CM_LARGE_PIC)
12666 return true;
12667 break;
12668
12669 default:
12670 break;
12671 }
12672 }
12673 if (GET_CODE (disp) != CONST)
12674 return false;
12675 disp = XEXP (disp, 0);
12676
12677 if (TARGET_64BIT)
12678 {
12679 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12680 of GOT tables. We should not need these anyway. */
12681 if (GET_CODE (disp) != UNSPEC
12682 || (XINT (disp, 1) != UNSPEC_GOTPCREL
12683 && XINT (disp, 1) != UNSPEC_GOTOFF
12684 && XINT (disp, 1) != UNSPEC_PCREL
12685 && XINT (disp, 1) != UNSPEC_PLTOFF))
12686 return false;
12687
12688 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
12689 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
12690 return false;
12691 return true;
12692 }
12693
12694 saw_plus = false;
12695 if (GET_CODE (disp) == PLUS)
12696 {
12697 if (!CONST_INT_P (XEXP (disp, 1)))
12698 return false;
12699 disp = XEXP (disp, 0);
12700 saw_plus = true;
12701 }
12702
12703 if (TARGET_MACHO && darwin_local_data_pic (disp))
12704 return true;
12705
12706 if (GET_CODE (disp) != UNSPEC)
12707 return false;
12708
12709 switch (XINT (disp, 1))
12710 {
12711 case UNSPEC_GOT:
12712 if (saw_plus)
12713 return false;
12714 /* We need to check for both symbols and labels because VxWorks loads
12715 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12716 details. */
12717 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12718 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
12719 case UNSPEC_GOTOFF:
12720 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12721 While ABI specify also 32bit relocation but we don't produce it in
12722 small PIC model at all. */
12723 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12724 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
12725 && !TARGET_64BIT)
12726 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12727 return false;
12728 case UNSPEC_GOTTPOFF:
12729 case UNSPEC_GOTNTPOFF:
12730 case UNSPEC_INDNTPOFF:
12731 if (saw_plus)
12732 return false;
12733 disp = XVECEXP (disp, 0, 0);
12734 return (GET_CODE (disp) == SYMBOL_REF
12735 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12736 case UNSPEC_NTPOFF:
12737 disp = XVECEXP (disp, 0, 0);
12738 return (GET_CODE (disp) == SYMBOL_REF
12739 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12740 case UNSPEC_DTPOFF:
12741 disp = XVECEXP (disp, 0, 0);
12742 return (GET_CODE (disp) == SYMBOL_REF
12743 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12744 }
12745
12746 return false;
12747 }
12748
12749 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12750 replace the input X, or the original X if no replacement is called for.
12751 The output parameter *WIN is 1 if the calling macro should goto WIN,
12752 0 if it should not. */
12753
12754 bool
12755 ix86_legitimize_reload_address (rtx x,
12756 enum machine_mode mode ATTRIBUTE_UNUSED,
12757 int opnum, int type,
12758 int ind_levels ATTRIBUTE_UNUSED)
12759 {
12760 /* Reload can generate:
12761
12762 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12763 (reg:DI 97))
12764 (reg:DI 2 cx))
12765
12766 This RTX is rejected from ix86_legitimate_address_p due to
12767 non-strictness of base register 97. Following this rejection,
12768 reload pushes all three components into separate registers,
12769 creating invalid memory address RTX.
12770
12771 Following code reloads only the invalid part of the
12772 memory address RTX. */
12773
12774 if (GET_CODE (x) == PLUS
12775 && REG_P (XEXP (x, 1))
12776 && GET_CODE (XEXP (x, 0)) == PLUS
12777 && REG_P (XEXP (XEXP (x, 0), 1)))
12778 {
12779 rtx base, index;
12780 bool something_reloaded = false;
12781
12782 base = XEXP (XEXP (x, 0), 1);
12783 if (!REG_OK_FOR_BASE_STRICT_P (base))
12784 {
12785 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
12786 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12787 opnum, (enum reload_type) type);
12788 something_reloaded = true;
12789 }
12790
12791 index = XEXP (x, 1);
12792 if (!REG_OK_FOR_INDEX_STRICT_P (index))
12793 {
12794 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
12795 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12796 opnum, (enum reload_type) type);
12797 something_reloaded = true;
12798 }
12799
12800 gcc_assert (something_reloaded);
12801 return true;
12802 }
12803
12804 return false;
12805 }
12806
12807 /* Determine if op is suitable RTX for an address register.
12808 Return naked register if a register or a register subreg is
12809 found, otherwise return NULL_RTX. */
12810
12811 static rtx
12812 ix86_validate_address_register (rtx op)
12813 {
12814 enum machine_mode mode = GET_MODE (op);
12815
12816 /* Only SImode or DImode registers can form the address. */
12817 if (mode != SImode && mode != DImode)
12818 return NULL_RTX;
12819
12820 if (REG_P (op))
12821 return op;
12822 else if (GET_CODE (op) == SUBREG)
12823 {
12824 rtx reg = SUBREG_REG (op);
12825
12826 if (!REG_P (reg))
12827 return NULL_RTX;
12828
12829 mode = GET_MODE (reg);
12830
12831 /* Don't allow SUBREGs that span more than a word. It can
12832 lead to spill failures when the register is one word out
12833 of a two word structure. */
12834 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
12835 return NULL_RTX;
12836
12837 /* Allow only SUBREGs of non-eliminable hard registers. */
12838 if (register_no_elim_operand (reg, mode))
12839 return reg;
12840 }
12841
12842 /* Op is not a register. */
12843 return NULL_RTX;
12844 }
12845
12846 /* Recognizes RTL expressions that are valid memory addresses for an
12847 instruction. The MODE argument is the machine mode for the MEM
12848 expression that wants to use this address.
12849
12850 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12851 convert common non-canonical forms to canonical form so that they will
12852 be recognized. */
12853
12854 static bool
12855 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
12856 rtx addr, bool strict)
12857 {
12858 struct ix86_address parts;
12859 rtx base, index, disp;
12860 HOST_WIDE_INT scale;
12861 enum ix86_address_seg seg;
12862
12863 if (ix86_decompose_address (addr, &parts) <= 0)
12864 /* Decomposition failed. */
12865 return false;
12866
12867 base = parts.base;
12868 index = parts.index;
12869 disp = parts.disp;
12870 scale = parts.scale;
12871 seg = parts.seg;
12872
12873 /* Validate base register. */
12874 if (base)
12875 {
12876 rtx reg = ix86_validate_address_register (base);
12877
12878 if (reg == NULL_RTX)
12879 return false;
12880
12881 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12882 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12883 /* Base is not valid. */
12884 return false;
12885 }
12886
12887 /* Validate index register. */
12888 if (index)
12889 {
12890 rtx reg = ix86_validate_address_register (index);
12891
12892 if (reg == NULL_RTX)
12893 return false;
12894
12895 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12896 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12897 /* Index is not valid. */
12898 return false;
12899 }
12900
12901 /* Index and base should have the same mode. */
12902 if (base && index
12903 && GET_MODE (base) != GET_MODE (index))
12904 return false;
12905
12906 /* Address override works only on the (%reg) part of %fs:(%reg). */
12907 if (seg != SEG_DEFAULT
12908 && ((base && GET_MODE (base) != word_mode)
12909 || (index && GET_MODE (index) != word_mode)))
12910 return false;
12911
12912 /* Validate scale factor. */
12913 if (scale != 1)
12914 {
12915 if (!index)
12916 /* Scale without index. */
12917 return false;
12918
12919 if (scale != 2 && scale != 4 && scale != 8)
12920 /* Scale is not a valid multiplier. */
12921 return false;
12922 }
12923
12924 /* Validate displacement. */
12925 if (disp)
12926 {
12927 if (GET_CODE (disp) == CONST
12928 && GET_CODE (XEXP (disp, 0)) == UNSPEC
12929 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12930 switch (XINT (XEXP (disp, 0), 1))
12931 {
12932 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12933 used. While ABI specify also 32bit relocations, we don't produce
12934 them at all and use IP relative instead. */
12935 case UNSPEC_GOT:
12936 case UNSPEC_GOTOFF:
12937 gcc_assert (flag_pic);
12938 if (!TARGET_64BIT)
12939 goto is_legitimate_pic;
12940
12941 /* 64bit address unspec. */
12942 return false;
12943
12944 case UNSPEC_GOTPCREL:
12945 case UNSPEC_PCREL:
12946 gcc_assert (flag_pic);
12947 goto is_legitimate_pic;
12948
12949 case UNSPEC_GOTTPOFF:
12950 case UNSPEC_GOTNTPOFF:
12951 case UNSPEC_INDNTPOFF:
12952 case UNSPEC_NTPOFF:
12953 case UNSPEC_DTPOFF:
12954 break;
12955
12956 case UNSPEC_STACK_CHECK:
12957 gcc_assert (flag_split_stack);
12958 break;
12959
12960 default:
12961 /* Invalid address unspec. */
12962 return false;
12963 }
12964
12965 else if (SYMBOLIC_CONST (disp)
12966 && (flag_pic
12967 || (TARGET_MACHO
12968 #if TARGET_MACHO
12969 && MACHOPIC_INDIRECT
12970 && !machopic_operand_p (disp)
12971 #endif
12972 )))
12973 {
12974
12975 is_legitimate_pic:
12976 if (TARGET_64BIT && (index || base))
12977 {
12978 /* foo@dtpoff(%rX) is ok. */
12979 if (GET_CODE (disp) != CONST
12980 || GET_CODE (XEXP (disp, 0)) != PLUS
12981 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
12982 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
12983 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
12984 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
12985 /* Non-constant pic memory reference. */
12986 return false;
12987 }
12988 else if ((!TARGET_MACHO || flag_pic)
12989 && ! legitimate_pic_address_disp_p (disp))
12990 /* Displacement is an invalid pic construct. */
12991 return false;
12992 #if TARGET_MACHO
12993 else if (MACHO_DYNAMIC_NO_PIC_P
12994 && !ix86_legitimate_constant_p (Pmode, disp))
12995 /* displacment must be referenced via non_lazy_pointer */
12996 return false;
12997 #endif
12998
12999 /* This code used to verify that a symbolic pic displacement
13000 includes the pic_offset_table_rtx register.
13001
13002 While this is good idea, unfortunately these constructs may
13003 be created by "adds using lea" optimization for incorrect
13004 code like:
13005
13006 int a;
13007 int foo(int i)
13008 {
13009 return *(&a+i);
13010 }
13011
13012 This code is nonsensical, but results in addressing
13013 GOT table with pic_offset_table_rtx base. We can't
13014 just refuse it easily, since it gets matched by
13015 "addsi3" pattern, that later gets split to lea in the
13016 case output register differs from input. While this
13017 can be handled by separate addsi pattern for this case
13018 that never results in lea, this seems to be easier and
13019 correct fix for crash to disable this test. */
13020 }
13021 else if (GET_CODE (disp) != LABEL_REF
13022 && !CONST_INT_P (disp)
13023 && (GET_CODE (disp) != CONST
13024 || !ix86_legitimate_constant_p (Pmode, disp))
13025 && (GET_CODE (disp) != SYMBOL_REF
13026 || !ix86_legitimate_constant_p (Pmode, disp)))
13027 /* Displacement is not constant. */
13028 return false;
13029 else if (TARGET_64BIT
13030 && !x86_64_immediate_operand (disp, VOIDmode))
13031 /* Displacement is out of range. */
13032 return false;
13033 /* In x32 mode, constant addresses are sign extended to 64bit, so
13034 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13035 else if (TARGET_X32 && !(index || base)
13036 && CONST_INT_P (disp)
13037 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13038 return false;
13039 }
13040
13041 /* Everything looks valid. */
13042 return true;
13043 }
13044
13045 /* Determine if a given RTX is a valid constant address. */
13046
13047 bool
13048 constant_address_p (rtx x)
13049 {
13050 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13051 }
13052 \f
13053 /* Return a unique alias set for the GOT. */
13054
13055 static alias_set_type
13056 ix86_GOT_alias_set (void)
13057 {
13058 static alias_set_type set = -1;
13059 if (set == -1)
13060 set = new_alias_set ();
13061 return set;
13062 }
13063
13064 /* Return a legitimate reference for ORIG (an address) using the
13065 register REG. If REG is 0, a new pseudo is generated.
13066
13067 There are two types of references that must be handled:
13068
13069 1. Global data references must load the address from the GOT, via
13070 the PIC reg. An insn is emitted to do this load, and the reg is
13071 returned.
13072
13073 2. Static data references, constant pool addresses, and code labels
13074 compute the address as an offset from the GOT, whose base is in
13075 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13076 differentiate them from global data objects. The returned
13077 address is the PIC reg + an unspec constant.
13078
13079 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13080 reg also appears in the address. */
13081
13082 static rtx
13083 legitimize_pic_address (rtx orig, rtx reg)
13084 {
13085 rtx addr = orig;
13086 rtx new_rtx = orig;
13087
13088 #if TARGET_MACHO
13089 if (TARGET_MACHO && !TARGET_64BIT)
13090 {
13091 if (reg == 0)
13092 reg = gen_reg_rtx (Pmode);
13093 /* Use the generic Mach-O PIC machinery. */
13094 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13095 }
13096 #endif
13097
13098 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13099 {
13100 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13101 if (tmp)
13102 return tmp;
13103 }
13104
13105 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13106 new_rtx = addr;
13107 else if (TARGET_64BIT && !TARGET_PECOFF
13108 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13109 {
13110 rtx tmpreg;
13111 /* This symbol may be referenced via a displacement from the PIC
13112 base address (@GOTOFF). */
13113
13114 if (reload_in_progress)
13115 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13116 if (GET_CODE (addr) == CONST)
13117 addr = XEXP (addr, 0);
13118 if (GET_CODE (addr) == PLUS)
13119 {
13120 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13121 UNSPEC_GOTOFF);
13122 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13123 }
13124 else
13125 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13126 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13127 if (!reg)
13128 tmpreg = gen_reg_rtx (Pmode);
13129 else
13130 tmpreg = reg;
13131 emit_move_insn (tmpreg, new_rtx);
13132
13133 if (reg != 0)
13134 {
13135 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13136 tmpreg, 1, OPTAB_DIRECT);
13137 new_rtx = reg;
13138 }
13139 else
13140 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13141 }
13142 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13143 {
13144 /* This symbol may be referenced via a displacement from the PIC
13145 base address (@GOTOFF). */
13146
13147 if (reload_in_progress)
13148 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13149 if (GET_CODE (addr) == CONST)
13150 addr = XEXP (addr, 0);
13151 if (GET_CODE (addr) == PLUS)
13152 {
13153 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13154 UNSPEC_GOTOFF);
13155 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13156 }
13157 else
13158 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13159 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13160 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13161
13162 if (reg != 0)
13163 {
13164 emit_move_insn (reg, new_rtx);
13165 new_rtx = reg;
13166 }
13167 }
13168 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13169 /* We can't use @GOTOFF for text labels on VxWorks;
13170 see gotoff_operand. */
13171 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13172 {
13173 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13174 if (tmp)
13175 return tmp;
13176
13177 /* For x64 PE-COFF there is no GOT table. So we use address
13178 directly. */
13179 if (TARGET_64BIT && TARGET_PECOFF)
13180 {
13181 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13182 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13183
13184 if (reg == 0)
13185 reg = gen_reg_rtx (Pmode);
13186 emit_move_insn (reg, new_rtx);
13187 new_rtx = reg;
13188 }
13189 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13190 {
13191 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13192 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13193 new_rtx = gen_const_mem (Pmode, new_rtx);
13194 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13195
13196 if (reg == 0)
13197 reg = gen_reg_rtx (Pmode);
13198 /* Use directly gen_movsi, otherwise the address is loaded
13199 into register for CSE. We don't want to CSE this addresses,
13200 instead we CSE addresses from the GOT table, so skip this. */
13201 emit_insn (gen_movsi (reg, new_rtx));
13202 new_rtx = reg;
13203 }
13204 else
13205 {
13206 /* This symbol must be referenced via a load from the
13207 Global Offset Table (@GOT). */
13208
13209 if (reload_in_progress)
13210 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13211 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13212 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13213 if (TARGET_64BIT)
13214 new_rtx = force_reg (Pmode, new_rtx);
13215 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13216 new_rtx = gen_const_mem (Pmode, new_rtx);
13217 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13218
13219 if (reg == 0)
13220 reg = gen_reg_rtx (Pmode);
13221 emit_move_insn (reg, new_rtx);
13222 new_rtx = reg;
13223 }
13224 }
13225 else
13226 {
13227 if (CONST_INT_P (addr)
13228 && !x86_64_immediate_operand (addr, VOIDmode))
13229 {
13230 if (reg)
13231 {
13232 emit_move_insn (reg, addr);
13233 new_rtx = reg;
13234 }
13235 else
13236 new_rtx = force_reg (Pmode, addr);
13237 }
13238 else if (GET_CODE (addr) == CONST)
13239 {
13240 addr = XEXP (addr, 0);
13241
13242 /* We must match stuff we generate before. Assume the only
13243 unspecs that can get here are ours. Not that we could do
13244 anything with them anyway.... */
13245 if (GET_CODE (addr) == UNSPEC
13246 || (GET_CODE (addr) == PLUS
13247 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13248 return orig;
13249 gcc_assert (GET_CODE (addr) == PLUS);
13250 }
13251 if (GET_CODE (addr) == PLUS)
13252 {
13253 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13254
13255 /* Check first to see if this is a constant offset from a @GOTOFF
13256 symbol reference. */
13257 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13258 && CONST_INT_P (op1))
13259 {
13260 if (!TARGET_64BIT)
13261 {
13262 if (reload_in_progress)
13263 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13264 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13265 UNSPEC_GOTOFF);
13266 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13267 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13268 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13269
13270 if (reg != 0)
13271 {
13272 emit_move_insn (reg, new_rtx);
13273 new_rtx = reg;
13274 }
13275 }
13276 else
13277 {
13278 if (INTVAL (op1) < -16*1024*1024
13279 || INTVAL (op1) >= 16*1024*1024)
13280 {
13281 if (!x86_64_immediate_operand (op1, Pmode))
13282 op1 = force_reg (Pmode, op1);
13283 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13284 }
13285 }
13286 }
13287 else
13288 {
13289 rtx base = legitimize_pic_address (op0, reg);
13290 enum machine_mode mode = GET_MODE (base);
13291 new_rtx
13292 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13293
13294 if (CONST_INT_P (new_rtx))
13295 {
13296 if (INTVAL (new_rtx) < -16*1024*1024
13297 || INTVAL (new_rtx) >= 16*1024*1024)
13298 {
13299 if (!x86_64_immediate_operand (new_rtx, mode))
13300 new_rtx = force_reg (mode, new_rtx);
13301 new_rtx
13302 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13303 }
13304 else
13305 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13306 }
13307 else
13308 {
13309 if (GET_CODE (new_rtx) == PLUS
13310 && CONSTANT_P (XEXP (new_rtx, 1)))
13311 {
13312 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13313 new_rtx = XEXP (new_rtx, 1);
13314 }
13315 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13316 }
13317 }
13318 }
13319 }
13320 return new_rtx;
13321 }
13322 \f
13323 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13324
13325 static rtx
13326 get_thread_pointer (enum machine_mode tp_mode, bool to_reg)
13327 {
13328 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13329
13330 if (GET_MODE (tp) != tp_mode)
13331 {
13332 gcc_assert (GET_MODE (tp) == SImode);
13333 gcc_assert (tp_mode == DImode);
13334
13335 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13336 }
13337
13338 if (to_reg)
13339 tp = copy_to_mode_reg (tp_mode, tp);
13340
13341 return tp;
13342 }
13343
13344 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13345
13346 static GTY(()) rtx ix86_tls_symbol;
13347
13348 static rtx
13349 ix86_tls_get_addr (void)
13350 {
13351 if (!ix86_tls_symbol)
13352 {
13353 const char *sym
13354 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13355 ? "___tls_get_addr" : "__tls_get_addr");
13356
13357 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13358 }
13359
13360 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13361 {
13362 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13363 UNSPEC_PLTOFF);
13364 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13365 gen_rtx_CONST (Pmode, unspec));
13366 }
13367
13368 return ix86_tls_symbol;
13369 }
13370
13371 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13372
13373 static GTY(()) rtx ix86_tls_module_base_symbol;
13374
13375 rtx
13376 ix86_tls_module_base (void)
13377 {
13378 if (!ix86_tls_module_base_symbol)
13379 {
13380 ix86_tls_module_base_symbol
13381 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13382
13383 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13384 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13385 }
13386
13387 return ix86_tls_module_base_symbol;
13388 }
13389
13390 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13391 false if we expect this to be used for a memory address and true if
13392 we expect to load the address into a register. */
13393
13394 static rtx
13395 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13396 {
13397 rtx dest, base, off;
13398 rtx pic = NULL_RTX, tp = NULL_RTX;
13399 enum machine_mode tp_mode = Pmode;
13400 int type;
13401
13402 switch (model)
13403 {
13404 case TLS_MODEL_GLOBAL_DYNAMIC:
13405 dest = gen_reg_rtx (Pmode);
13406
13407 if (!TARGET_64BIT)
13408 {
13409 if (flag_pic && !TARGET_PECOFF)
13410 pic = pic_offset_table_rtx;
13411 else
13412 {
13413 pic = gen_reg_rtx (Pmode);
13414 emit_insn (gen_set_got (pic));
13415 }
13416 }
13417
13418 if (TARGET_GNU2_TLS)
13419 {
13420 if (TARGET_64BIT)
13421 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
13422 else
13423 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
13424
13425 tp = get_thread_pointer (Pmode, true);
13426 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
13427
13428 if (GET_MODE (x) != Pmode)
13429 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13430
13431 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13432 }
13433 else
13434 {
13435 rtx caddr = ix86_tls_get_addr ();
13436
13437 if (TARGET_64BIT)
13438 {
13439 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13440 rtx insns;
13441
13442 start_sequence ();
13443 emit_call_insn
13444 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
13445 insns = get_insns ();
13446 end_sequence ();
13447
13448 if (GET_MODE (x) != Pmode)
13449 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13450
13451 RTL_CONST_CALL_P (insns) = 1;
13452 emit_libcall_block (insns, dest, rax, x);
13453 }
13454 else
13455 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
13456 }
13457 break;
13458
13459 case TLS_MODEL_LOCAL_DYNAMIC:
13460 base = gen_reg_rtx (Pmode);
13461
13462 if (!TARGET_64BIT)
13463 {
13464 if (flag_pic)
13465 pic = pic_offset_table_rtx;
13466 else
13467 {
13468 pic = gen_reg_rtx (Pmode);
13469 emit_insn (gen_set_got (pic));
13470 }
13471 }
13472
13473 if (TARGET_GNU2_TLS)
13474 {
13475 rtx tmp = ix86_tls_module_base ();
13476
13477 if (TARGET_64BIT)
13478 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
13479 else
13480 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
13481
13482 tp = get_thread_pointer (Pmode, true);
13483 set_unique_reg_note (get_last_insn (), REG_EQUAL,
13484 gen_rtx_MINUS (Pmode, tmp, tp));
13485 }
13486 else
13487 {
13488 rtx caddr = ix86_tls_get_addr ();
13489
13490 if (TARGET_64BIT)
13491 {
13492 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13493 rtx insns, eqv;
13494
13495 start_sequence ();
13496 emit_call_insn
13497 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
13498 insns = get_insns ();
13499 end_sequence ();
13500
13501 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13502 share the LD_BASE result with other LD model accesses. */
13503 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
13504 UNSPEC_TLS_LD_BASE);
13505
13506 RTL_CONST_CALL_P (insns) = 1;
13507 emit_libcall_block (insns, base, rax, eqv);
13508 }
13509 else
13510 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
13511 }
13512
13513 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
13514 off = gen_rtx_CONST (Pmode, off);
13515
13516 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
13517
13518 if (TARGET_GNU2_TLS)
13519 {
13520 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
13521
13522 if (GET_MODE (x) != Pmode)
13523 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13524
13525 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13526 }
13527 break;
13528
13529 case TLS_MODEL_INITIAL_EXEC:
13530 if (TARGET_64BIT)
13531 {
13532 if (TARGET_SUN_TLS && !TARGET_X32)
13533 {
13534 /* The Sun linker took the AMD64 TLS spec literally
13535 and can only handle %rax as destination of the
13536 initial executable code sequence. */
13537
13538 dest = gen_reg_rtx (DImode);
13539 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
13540 return dest;
13541 }
13542
13543 /* Generate DImode references to avoid %fs:(%reg32)
13544 problems and linker IE->LE relaxation bug. */
13545 tp_mode = DImode;
13546 pic = NULL;
13547 type = UNSPEC_GOTNTPOFF;
13548 }
13549 else if (flag_pic)
13550 {
13551 if (reload_in_progress)
13552 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13553 pic = pic_offset_table_rtx;
13554 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
13555 }
13556 else if (!TARGET_ANY_GNU_TLS)
13557 {
13558 pic = gen_reg_rtx (Pmode);
13559 emit_insn (gen_set_got (pic));
13560 type = UNSPEC_GOTTPOFF;
13561 }
13562 else
13563 {
13564 pic = NULL;
13565 type = UNSPEC_INDNTPOFF;
13566 }
13567
13568 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
13569 off = gen_rtx_CONST (tp_mode, off);
13570 if (pic)
13571 off = gen_rtx_PLUS (tp_mode, pic, off);
13572 off = gen_const_mem (tp_mode, off);
13573 set_mem_alias_set (off, ix86_GOT_alias_set ());
13574
13575 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13576 {
13577 base = get_thread_pointer (tp_mode,
13578 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13579 off = force_reg (tp_mode, off);
13580 return gen_rtx_PLUS (tp_mode, base, off);
13581 }
13582 else
13583 {
13584 base = get_thread_pointer (Pmode, true);
13585 dest = gen_reg_rtx (Pmode);
13586 emit_insn (ix86_gen_sub3 (dest, base, off));
13587 }
13588 break;
13589
13590 case TLS_MODEL_LOCAL_EXEC:
13591 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
13592 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13593 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
13594 off = gen_rtx_CONST (Pmode, off);
13595
13596 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13597 {
13598 base = get_thread_pointer (Pmode,
13599 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13600 return gen_rtx_PLUS (Pmode, base, off);
13601 }
13602 else
13603 {
13604 base = get_thread_pointer (Pmode, true);
13605 dest = gen_reg_rtx (Pmode);
13606 emit_insn (ix86_gen_sub3 (dest, base, off));
13607 }
13608 break;
13609
13610 default:
13611 gcc_unreachable ();
13612 }
13613
13614 return dest;
13615 }
13616
13617 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13618 to symbol DECL if BEIMPORT is true. Otherwise create or return the
13619 unique refptr-DECL symbol corresponding to symbol DECL. */
13620
13621 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
13622 htab_t dllimport_map;
13623
13624 static tree
13625 get_dllimport_decl (tree decl, bool beimport)
13626 {
13627 struct tree_map *h, in;
13628 void **loc;
13629 const char *name;
13630 const char *prefix;
13631 size_t namelen, prefixlen;
13632 char *imp_name;
13633 tree to;
13634 rtx rtl;
13635
13636 if (!dllimport_map)
13637 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
13638
13639 in.hash = htab_hash_pointer (decl);
13640 in.base.from = decl;
13641 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
13642 h = (struct tree_map *) *loc;
13643 if (h)
13644 return h->to;
13645
13646 *loc = h = ggc_alloc_tree_map ();
13647 h->hash = in.hash;
13648 h->base.from = decl;
13649 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
13650 VAR_DECL, NULL, ptr_type_node);
13651 DECL_ARTIFICIAL (to) = 1;
13652 DECL_IGNORED_P (to) = 1;
13653 DECL_EXTERNAL (to) = 1;
13654 TREE_READONLY (to) = 1;
13655
13656 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
13657 name = targetm.strip_name_encoding (name);
13658 if (beimport)
13659 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
13660 ? "*__imp_" : "*__imp__";
13661 else
13662 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
13663 namelen = strlen (name);
13664 prefixlen = strlen (prefix);
13665 imp_name = (char *) alloca (namelen + prefixlen + 1);
13666 memcpy (imp_name, prefix, prefixlen);
13667 memcpy (imp_name + prefixlen, name, namelen + 1);
13668
13669 name = ggc_alloc_string (imp_name, namelen + prefixlen);
13670 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
13671 SET_SYMBOL_REF_DECL (rtl, to);
13672 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
13673 if (!beimport)
13674 {
13675 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
13676 #ifdef SUB_TARGET_RECORD_STUB
13677 SUB_TARGET_RECORD_STUB (name);
13678 #endif
13679 }
13680
13681 rtl = gen_const_mem (Pmode, rtl);
13682 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
13683
13684 SET_DECL_RTL (to, rtl);
13685 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
13686
13687 return to;
13688 }
13689
13690 /* Expand SYMBOL into its corresponding far-addresse symbol.
13691 WANT_REG is true if we require the result be a register. */
13692
13693 static rtx
13694 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
13695 {
13696 tree imp_decl;
13697 rtx x;
13698
13699 gcc_assert (SYMBOL_REF_DECL (symbol));
13700 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
13701
13702 x = DECL_RTL (imp_decl);
13703 if (want_reg)
13704 x = force_reg (Pmode, x);
13705 return x;
13706 }
13707
13708 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13709 true if we require the result be a register. */
13710
13711 static rtx
13712 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
13713 {
13714 tree imp_decl;
13715 rtx x;
13716
13717 gcc_assert (SYMBOL_REF_DECL (symbol));
13718 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
13719
13720 x = DECL_RTL (imp_decl);
13721 if (want_reg)
13722 x = force_reg (Pmode, x);
13723 return x;
13724 }
13725
13726 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
13727 is true if we require the result be a register. */
13728
13729 static rtx
13730 legitimize_pe_coff_symbol (rtx addr, bool inreg)
13731 {
13732 if (!TARGET_PECOFF)
13733 return NULL_RTX;
13734
13735 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13736 {
13737 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
13738 return legitimize_dllimport_symbol (addr, inreg);
13739 if (GET_CODE (addr) == CONST
13740 && GET_CODE (XEXP (addr, 0)) == PLUS
13741 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13742 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
13743 {
13744 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
13745 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13746 }
13747 }
13748
13749 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
13750 return NULL_RTX;
13751 if (GET_CODE (addr) == SYMBOL_REF
13752 && !is_imported_p (addr)
13753 && SYMBOL_REF_EXTERNAL_P (addr)
13754 && SYMBOL_REF_DECL (addr))
13755 return legitimize_pe_coff_extern_decl (addr, inreg);
13756
13757 if (GET_CODE (addr) == CONST
13758 && GET_CODE (XEXP (addr, 0)) == PLUS
13759 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13760 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
13761 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
13762 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
13763 {
13764 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
13765 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13766 }
13767 return NULL_RTX;
13768 }
13769
13770 /* Try machine-dependent ways of modifying an illegitimate address
13771 to be legitimate. If we find one, return the new, valid address.
13772 This macro is used in only one place: `memory_address' in explow.c.
13773
13774 OLDX is the address as it was before break_out_memory_refs was called.
13775 In some cases it is useful to look at this to decide what needs to be done.
13776
13777 It is always safe for this macro to do nothing. It exists to recognize
13778 opportunities to optimize the output.
13779
13780 For the 80386, we handle X+REG by loading X into a register R and
13781 using R+REG. R will go in a general reg and indexing will be used.
13782 However, if REG is a broken-out memory address or multiplication,
13783 nothing needs to be done because REG can certainly go in a general reg.
13784
13785 When -fpic is used, special handling is needed for symbolic references.
13786 See comments by legitimize_pic_address in i386.c for details. */
13787
13788 static rtx
13789 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
13790 enum machine_mode mode)
13791 {
13792 int changed = 0;
13793 unsigned log;
13794
13795 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
13796 if (log)
13797 return legitimize_tls_address (x, (enum tls_model) log, false);
13798 if (GET_CODE (x) == CONST
13799 && GET_CODE (XEXP (x, 0)) == PLUS
13800 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
13801 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
13802 {
13803 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
13804 (enum tls_model) log, false);
13805 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
13806 }
13807
13808 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13809 {
13810 rtx tmp = legitimize_pe_coff_symbol (x, true);
13811 if (tmp)
13812 return tmp;
13813 }
13814
13815 if (flag_pic && SYMBOLIC_CONST (x))
13816 return legitimize_pic_address (x, 0);
13817
13818 #if TARGET_MACHO
13819 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
13820 return machopic_indirect_data_reference (x, 0);
13821 #endif
13822
13823 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13824 if (GET_CODE (x) == ASHIFT
13825 && CONST_INT_P (XEXP (x, 1))
13826 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
13827 {
13828 changed = 1;
13829 log = INTVAL (XEXP (x, 1));
13830 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
13831 GEN_INT (1 << log));
13832 }
13833
13834 if (GET_CODE (x) == PLUS)
13835 {
13836 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13837
13838 if (GET_CODE (XEXP (x, 0)) == ASHIFT
13839 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13840 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
13841 {
13842 changed = 1;
13843 log = INTVAL (XEXP (XEXP (x, 0), 1));
13844 XEXP (x, 0) = gen_rtx_MULT (Pmode,
13845 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
13846 GEN_INT (1 << log));
13847 }
13848
13849 if (GET_CODE (XEXP (x, 1)) == ASHIFT
13850 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
13851 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
13852 {
13853 changed = 1;
13854 log = INTVAL (XEXP (XEXP (x, 1), 1));
13855 XEXP (x, 1) = gen_rtx_MULT (Pmode,
13856 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
13857 GEN_INT (1 << log));
13858 }
13859
13860 /* Put multiply first if it isn't already. */
13861 if (GET_CODE (XEXP (x, 1)) == MULT)
13862 {
13863 rtx tmp = XEXP (x, 0);
13864 XEXP (x, 0) = XEXP (x, 1);
13865 XEXP (x, 1) = tmp;
13866 changed = 1;
13867 }
13868
13869 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13870 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13871 created by virtual register instantiation, register elimination, and
13872 similar optimizations. */
13873 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
13874 {
13875 changed = 1;
13876 x = gen_rtx_PLUS (Pmode,
13877 gen_rtx_PLUS (Pmode, XEXP (x, 0),
13878 XEXP (XEXP (x, 1), 0)),
13879 XEXP (XEXP (x, 1), 1));
13880 }
13881
13882 /* Canonicalize
13883 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13884 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13885 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13886 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13887 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13888 && CONSTANT_P (XEXP (x, 1)))
13889 {
13890 rtx constant;
13891 rtx other = NULL_RTX;
13892
13893 if (CONST_INT_P (XEXP (x, 1)))
13894 {
13895 constant = XEXP (x, 1);
13896 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
13897 }
13898 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
13899 {
13900 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
13901 other = XEXP (x, 1);
13902 }
13903 else
13904 constant = 0;
13905
13906 if (constant)
13907 {
13908 changed = 1;
13909 x = gen_rtx_PLUS (Pmode,
13910 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
13911 XEXP (XEXP (XEXP (x, 0), 1), 0)),
13912 plus_constant (Pmode, other,
13913 INTVAL (constant)));
13914 }
13915 }
13916
13917 if (changed && ix86_legitimate_address_p (mode, x, false))
13918 return x;
13919
13920 if (GET_CODE (XEXP (x, 0)) == MULT)
13921 {
13922 changed = 1;
13923 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
13924 }
13925
13926 if (GET_CODE (XEXP (x, 1)) == MULT)
13927 {
13928 changed = 1;
13929 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
13930 }
13931
13932 if (changed
13933 && REG_P (XEXP (x, 1))
13934 && REG_P (XEXP (x, 0)))
13935 return x;
13936
13937 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
13938 {
13939 changed = 1;
13940 x = legitimize_pic_address (x, 0);
13941 }
13942
13943 if (changed && ix86_legitimate_address_p (mode, x, false))
13944 return x;
13945
13946 if (REG_P (XEXP (x, 0)))
13947 {
13948 rtx temp = gen_reg_rtx (Pmode);
13949 rtx val = force_operand (XEXP (x, 1), temp);
13950 if (val != temp)
13951 {
13952 val = convert_to_mode (Pmode, val, 1);
13953 emit_move_insn (temp, val);
13954 }
13955
13956 XEXP (x, 1) = temp;
13957 return x;
13958 }
13959
13960 else if (REG_P (XEXP (x, 1)))
13961 {
13962 rtx temp = gen_reg_rtx (Pmode);
13963 rtx val = force_operand (XEXP (x, 0), temp);
13964 if (val != temp)
13965 {
13966 val = convert_to_mode (Pmode, val, 1);
13967 emit_move_insn (temp, val);
13968 }
13969
13970 XEXP (x, 0) = temp;
13971 return x;
13972 }
13973 }
13974
13975 return x;
13976 }
13977 \f
13978 /* Print an integer constant expression in assembler syntax. Addition
13979 and subtraction are the only arithmetic that may appear in these
13980 expressions. FILE is the stdio stream to write to, X is the rtx, and
13981 CODE is the operand print code from the output string. */
13982
13983 static void
13984 output_pic_addr_const (FILE *file, rtx x, int code)
13985 {
13986 char buf[256];
13987
13988 switch (GET_CODE (x))
13989 {
13990 case PC:
13991 gcc_assert (flag_pic);
13992 putc ('.', file);
13993 break;
13994
13995 case SYMBOL_REF:
13996 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
13997 output_addr_const (file, x);
13998 else
13999 {
14000 const char *name = XSTR (x, 0);
14001
14002 /* Mark the decl as referenced so that cgraph will
14003 output the function. */
14004 if (SYMBOL_REF_DECL (x))
14005 mark_decl_referenced (SYMBOL_REF_DECL (x));
14006
14007 #if TARGET_MACHO
14008 if (MACHOPIC_INDIRECT
14009 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14010 name = machopic_indirection_name (x, /*stub_p=*/true);
14011 #endif
14012 assemble_name (file, name);
14013 }
14014 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14015 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14016 fputs ("@PLT", file);
14017 break;
14018
14019 case LABEL_REF:
14020 x = XEXP (x, 0);
14021 /* FALLTHRU */
14022 case CODE_LABEL:
14023 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14024 assemble_name (asm_out_file, buf);
14025 break;
14026
14027 case CONST_INT:
14028 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14029 break;
14030
14031 case CONST:
14032 /* This used to output parentheses around the expression,
14033 but that does not work on the 386 (either ATT or BSD assembler). */
14034 output_pic_addr_const (file, XEXP (x, 0), code);
14035 break;
14036
14037 case CONST_DOUBLE:
14038 if (GET_MODE (x) == VOIDmode)
14039 {
14040 /* We can use %d if the number is <32 bits and positive. */
14041 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14042 fprintf (file, "0x%lx%08lx",
14043 (unsigned long) CONST_DOUBLE_HIGH (x),
14044 (unsigned long) CONST_DOUBLE_LOW (x));
14045 else
14046 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14047 }
14048 else
14049 /* We can't handle floating point constants;
14050 TARGET_PRINT_OPERAND must handle them. */
14051 output_operand_lossage ("floating constant misused");
14052 break;
14053
14054 case PLUS:
14055 /* Some assemblers need integer constants to appear first. */
14056 if (CONST_INT_P (XEXP (x, 0)))
14057 {
14058 output_pic_addr_const (file, XEXP (x, 0), code);
14059 putc ('+', file);
14060 output_pic_addr_const (file, XEXP (x, 1), code);
14061 }
14062 else
14063 {
14064 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14065 output_pic_addr_const (file, XEXP (x, 1), code);
14066 putc ('+', file);
14067 output_pic_addr_const (file, XEXP (x, 0), code);
14068 }
14069 break;
14070
14071 case MINUS:
14072 if (!TARGET_MACHO)
14073 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14074 output_pic_addr_const (file, XEXP (x, 0), code);
14075 putc ('-', file);
14076 output_pic_addr_const (file, XEXP (x, 1), code);
14077 if (!TARGET_MACHO)
14078 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14079 break;
14080
14081 case UNSPEC:
14082 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14083 {
14084 bool f = i386_asm_output_addr_const_extra (file, x);
14085 gcc_assert (f);
14086 break;
14087 }
14088
14089 gcc_assert (XVECLEN (x, 0) == 1);
14090 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14091 switch (XINT (x, 1))
14092 {
14093 case UNSPEC_GOT:
14094 fputs ("@GOT", file);
14095 break;
14096 case UNSPEC_GOTOFF:
14097 fputs ("@GOTOFF", file);
14098 break;
14099 case UNSPEC_PLTOFF:
14100 fputs ("@PLTOFF", file);
14101 break;
14102 case UNSPEC_PCREL:
14103 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14104 "(%rip)" : "[rip]", file);
14105 break;
14106 case UNSPEC_GOTPCREL:
14107 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14108 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14109 break;
14110 case UNSPEC_GOTTPOFF:
14111 /* FIXME: This might be @TPOFF in Sun ld too. */
14112 fputs ("@gottpoff", file);
14113 break;
14114 case UNSPEC_TPOFF:
14115 fputs ("@tpoff", file);
14116 break;
14117 case UNSPEC_NTPOFF:
14118 if (TARGET_64BIT)
14119 fputs ("@tpoff", file);
14120 else
14121 fputs ("@ntpoff", file);
14122 break;
14123 case UNSPEC_DTPOFF:
14124 fputs ("@dtpoff", file);
14125 break;
14126 case UNSPEC_GOTNTPOFF:
14127 if (TARGET_64BIT)
14128 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14129 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14130 else
14131 fputs ("@gotntpoff", file);
14132 break;
14133 case UNSPEC_INDNTPOFF:
14134 fputs ("@indntpoff", file);
14135 break;
14136 #if TARGET_MACHO
14137 case UNSPEC_MACHOPIC_OFFSET:
14138 putc ('-', file);
14139 machopic_output_function_base_name (file);
14140 break;
14141 #endif
14142 default:
14143 output_operand_lossage ("invalid UNSPEC as operand");
14144 break;
14145 }
14146 break;
14147
14148 default:
14149 output_operand_lossage ("invalid expression as operand");
14150 }
14151 }
14152
14153 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14154 We need to emit DTP-relative relocations. */
14155
14156 static void ATTRIBUTE_UNUSED
14157 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14158 {
14159 fputs (ASM_LONG, file);
14160 output_addr_const (file, x);
14161 fputs ("@dtpoff", file);
14162 switch (size)
14163 {
14164 case 4:
14165 break;
14166 case 8:
14167 fputs (", 0", file);
14168 break;
14169 default:
14170 gcc_unreachable ();
14171 }
14172 }
14173
14174 /* Return true if X is a representation of the PIC register. This copes
14175 with calls from ix86_find_base_term, where the register might have
14176 been replaced by a cselib value. */
14177
14178 static bool
14179 ix86_pic_register_p (rtx x)
14180 {
14181 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14182 return (pic_offset_table_rtx
14183 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14184 else
14185 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14186 }
14187
14188 /* Helper function for ix86_delegitimize_address.
14189 Attempt to delegitimize TLS local-exec accesses. */
14190
14191 static rtx
14192 ix86_delegitimize_tls_address (rtx orig_x)
14193 {
14194 rtx x = orig_x, unspec;
14195 struct ix86_address addr;
14196
14197 if (!TARGET_TLS_DIRECT_SEG_REFS)
14198 return orig_x;
14199 if (MEM_P (x))
14200 x = XEXP (x, 0);
14201 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14202 return orig_x;
14203 if (ix86_decompose_address (x, &addr) == 0
14204 || addr.seg != DEFAULT_TLS_SEG_REG
14205 || addr.disp == NULL_RTX
14206 || GET_CODE (addr.disp) != CONST)
14207 return orig_x;
14208 unspec = XEXP (addr.disp, 0);
14209 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14210 unspec = XEXP (unspec, 0);
14211 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14212 return orig_x;
14213 x = XVECEXP (unspec, 0, 0);
14214 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14215 if (unspec != XEXP (addr.disp, 0))
14216 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14217 if (addr.index)
14218 {
14219 rtx idx = addr.index;
14220 if (addr.scale != 1)
14221 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14222 x = gen_rtx_PLUS (Pmode, idx, x);
14223 }
14224 if (addr.base)
14225 x = gen_rtx_PLUS (Pmode, addr.base, x);
14226 if (MEM_P (orig_x))
14227 x = replace_equiv_address_nv (orig_x, x);
14228 return x;
14229 }
14230
14231 /* In the name of slightly smaller debug output, and to cater to
14232 general assembler lossage, recognize PIC+GOTOFF and turn it back
14233 into a direct symbol reference.
14234
14235 On Darwin, this is necessary to avoid a crash, because Darwin
14236 has a different PIC label for each routine but the DWARF debugging
14237 information is not associated with any particular routine, so it's
14238 necessary to remove references to the PIC label from RTL stored by
14239 the DWARF output code. */
14240
14241 static rtx
14242 ix86_delegitimize_address (rtx x)
14243 {
14244 rtx orig_x = delegitimize_mem_from_attrs (x);
14245 /* addend is NULL or some rtx if x is something+GOTOFF where
14246 something doesn't include the PIC register. */
14247 rtx addend = NULL_RTX;
14248 /* reg_addend is NULL or a multiple of some register. */
14249 rtx reg_addend = NULL_RTX;
14250 /* const_addend is NULL or a const_int. */
14251 rtx const_addend = NULL_RTX;
14252 /* This is the result, or NULL. */
14253 rtx result = NULL_RTX;
14254
14255 x = orig_x;
14256
14257 if (MEM_P (x))
14258 x = XEXP (x, 0);
14259
14260 if (TARGET_64BIT)
14261 {
14262 if (GET_CODE (x) == CONST
14263 && GET_CODE (XEXP (x, 0)) == PLUS
14264 && GET_MODE (XEXP (x, 0)) == Pmode
14265 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14266 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14267 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14268 {
14269 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14270 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14271 if (MEM_P (orig_x))
14272 x = replace_equiv_address_nv (orig_x, x);
14273 return x;
14274 }
14275
14276 if (GET_CODE (x) == CONST
14277 && GET_CODE (XEXP (x, 0)) == UNSPEC
14278 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14279 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14280 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14281 {
14282 x = XVECEXP (XEXP (x, 0), 0, 0);
14283 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14284 {
14285 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14286 GET_MODE (x), 0);
14287 if (x == NULL_RTX)
14288 return orig_x;
14289 }
14290 return x;
14291 }
14292
14293 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14294 return ix86_delegitimize_tls_address (orig_x);
14295
14296 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14297 and -mcmodel=medium -fpic. */
14298 }
14299
14300 if (GET_CODE (x) != PLUS
14301 || GET_CODE (XEXP (x, 1)) != CONST)
14302 return ix86_delegitimize_tls_address (orig_x);
14303
14304 if (ix86_pic_register_p (XEXP (x, 0)))
14305 /* %ebx + GOT/GOTOFF */
14306 ;
14307 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14308 {
14309 /* %ebx + %reg * scale + GOT/GOTOFF */
14310 reg_addend = XEXP (x, 0);
14311 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14312 reg_addend = XEXP (reg_addend, 1);
14313 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14314 reg_addend = XEXP (reg_addend, 0);
14315 else
14316 {
14317 reg_addend = NULL_RTX;
14318 addend = XEXP (x, 0);
14319 }
14320 }
14321 else
14322 addend = XEXP (x, 0);
14323
14324 x = XEXP (XEXP (x, 1), 0);
14325 if (GET_CODE (x) == PLUS
14326 && CONST_INT_P (XEXP (x, 1)))
14327 {
14328 const_addend = XEXP (x, 1);
14329 x = XEXP (x, 0);
14330 }
14331
14332 if (GET_CODE (x) == UNSPEC
14333 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14334 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14335 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14336 && !MEM_P (orig_x) && !addend)))
14337 result = XVECEXP (x, 0, 0);
14338
14339 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14340 && !MEM_P (orig_x))
14341 result = XVECEXP (x, 0, 0);
14342
14343 if (! result)
14344 return ix86_delegitimize_tls_address (orig_x);
14345
14346 if (const_addend)
14347 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14348 if (reg_addend)
14349 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14350 if (addend)
14351 {
14352 /* If the rest of original X doesn't involve the PIC register, add
14353 addend and subtract pic_offset_table_rtx. This can happen e.g.
14354 for code like:
14355 leal (%ebx, %ecx, 4), %ecx
14356 ...
14357 movl foo@GOTOFF(%ecx), %edx
14358 in which case we return (%ecx - %ebx) + foo. */
14359 if (pic_offset_table_rtx)
14360 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14361 pic_offset_table_rtx),
14362 result);
14363 else
14364 return orig_x;
14365 }
14366 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14367 {
14368 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14369 if (result == NULL_RTX)
14370 return orig_x;
14371 }
14372 return result;
14373 }
14374
14375 /* If X is a machine specific address (i.e. a symbol or label being
14376 referenced as a displacement from the GOT implemented using an
14377 UNSPEC), then return the base term. Otherwise return X. */
14378
14379 rtx
14380 ix86_find_base_term (rtx x)
14381 {
14382 rtx term;
14383
14384 if (TARGET_64BIT)
14385 {
14386 if (GET_CODE (x) != CONST)
14387 return x;
14388 term = XEXP (x, 0);
14389 if (GET_CODE (term) == PLUS
14390 && (CONST_INT_P (XEXP (term, 1))
14391 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
14392 term = XEXP (term, 0);
14393 if (GET_CODE (term) != UNSPEC
14394 || (XINT (term, 1) != UNSPEC_GOTPCREL
14395 && XINT (term, 1) != UNSPEC_PCREL))
14396 return x;
14397
14398 return XVECEXP (term, 0, 0);
14399 }
14400
14401 return ix86_delegitimize_address (x);
14402 }
14403 \f
14404 static void
14405 put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
14406 bool fp, FILE *file)
14407 {
14408 const char *suffix;
14409
14410 if (mode == CCFPmode || mode == CCFPUmode)
14411 {
14412 code = ix86_fp_compare_code_to_integer (code);
14413 mode = CCmode;
14414 }
14415 if (reverse)
14416 code = reverse_condition (code);
14417
14418 switch (code)
14419 {
14420 case EQ:
14421 switch (mode)
14422 {
14423 case CCAmode:
14424 suffix = "a";
14425 break;
14426
14427 case CCCmode:
14428 suffix = "c";
14429 break;
14430
14431 case CCOmode:
14432 suffix = "o";
14433 break;
14434
14435 case CCSmode:
14436 suffix = "s";
14437 break;
14438
14439 default:
14440 suffix = "e";
14441 }
14442 break;
14443 case NE:
14444 switch (mode)
14445 {
14446 case CCAmode:
14447 suffix = "na";
14448 break;
14449
14450 case CCCmode:
14451 suffix = "nc";
14452 break;
14453
14454 case CCOmode:
14455 suffix = "no";
14456 break;
14457
14458 case CCSmode:
14459 suffix = "ns";
14460 break;
14461
14462 default:
14463 suffix = "ne";
14464 }
14465 break;
14466 case GT:
14467 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
14468 suffix = "g";
14469 break;
14470 case GTU:
14471 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14472 Those same assemblers have the same but opposite lossage on cmov. */
14473 if (mode == CCmode)
14474 suffix = fp ? "nbe" : "a";
14475 else
14476 gcc_unreachable ();
14477 break;
14478 case LT:
14479 switch (mode)
14480 {
14481 case CCNOmode:
14482 case CCGOCmode:
14483 suffix = "s";
14484 break;
14485
14486 case CCmode:
14487 case CCGCmode:
14488 suffix = "l";
14489 break;
14490
14491 default:
14492 gcc_unreachable ();
14493 }
14494 break;
14495 case LTU:
14496 if (mode == CCmode)
14497 suffix = "b";
14498 else if (mode == CCCmode)
14499 suffix = "c";
14500 else
14501 gcc_unreachable ();
14502 break;
14503 case GE:
14504 switch (mode)
14505 {
14506 case CCNOmode:
14507 case CCGOCmode:
14508 suffix = "ns";
14509 break;
14510
14511 case CCmode:
14512 case CCGCmode:
14513 suffix = "ge";
14514 break;
14515
14516 default:
14517 gcc_unreachable ();
14518 }
14519 break;
14520 case GEU:
14521 if (mode == CCmode)
14522 suffix = fp ? "nb" : "ae";
14523 else if (mode == CCCmode)
14524 suffix = "nc";
14525 else
14526 gcc_unreachable ();
14527 break;
14528 case LE:
14529 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
14530 suffix = "le";
14531 break;
14532 case LEU:
14533 if (mode == CCmode)
14534 suffix = "be";
14535 else
14536 gcc_unreachable ();
14537 break;
14538 case UNORDERED:
14539 suffix = fp ? "u" : "p";
14540 break;
14541 case ORDERED:
14542 suffix = fp ? "nu" : "np";
14543 break;
14544 default:
14545 gcc_unreachable ();
14546 }
14547 fputs (suffix, file);
14548 }
14549
14550 /* Print the name of register X to FILE based on its machine mode and number.
14551 If CODE is 'w', pretend the mode is HImode.
14552 If CODE is 'b', pretend the mode is QImode.
14553 If CODE is 'k', pretend the mode is SImode.
14554 If CODE is 'q', pretend the mode is DImode.
14555 If CODE is 'x', pretend the mode is V4SFmode.
14556 If CODE is 't', pretend the mode is V8SFmode.
14557 If CODE is 'g', pretend the mode is V16SFmode.
14558 If CODE is 'h', pretend the reg is the 'high' byte register.
14559 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
14560 If CODE is 'd', duplicate the operand for AVX instruction.
14561 */
14562
14563 void
14564 print_reg (rtx x, int code, FILE *file)
14565 {
14566 const char *reg;
14567 unsigned int regno;
14568 bool duplicated = code == 'd' && TARGET_AVX;
14569
14570 if (ASSEMBLER_DIALECT == ASM_ATT)
14571 putc ('%', file);
14572
14573 if (x == pc_rtx)
14574 {
14575 gcc_assert (TARGET_64BIT);
14576 fputs ("rip", file);
14577 return;
14578 }
14579
14580 regno = true_regnum (x);
14581 gcc_assert (regno != ARG_POINTER_REGNUM
14582 && regno != FRAME_POINTER_REGNUM
14583 && regno != FLAGS_REG
14584 && regno != FPSR_REG
14585 && regno != FPCR_REG);
14586
14587 if (code == 'w' || MMX_REG_P (x))
14588 code = 2;
14589 else if (code == 'b')
14590 code = 1;
14591 else if (code == 'k')
14592 code = 4;
14593 else if (code == 'q')
14594 code = 8;
14595 else if (code == 'y')
14596 code = 3;
14597 else if (code == 'h')
14598 code = 0;
14599 else if (code == 'x')
14600 code = 16;
14601 else if (code == 't')
14602 code = 32;
14603 else if (code == 'g')
14604 code = 64;
14605 else
14606 code = GET_MODE_SIZE (GET_MODE (x));
14607
14608 /* Irritatingly, AMD extended registers use different naming convention
14609 from the normal registers: "r%d[bwd]" */
14610 if (REX_INT_REGNO_P (regno))
14611 {
14612 gcc_assert (TARGET_64BIT);
14613 putc ('r', file);
14614 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
14615 switch (code)
14616 {
14617 case 0:
14618 error ("extended registers have no high halves");
14619 break;
14620 case 1:
14621 putc ('b', file);
14622 break;
14623 case 2:
14624 putc ('w', file);
14625 break;
14626 case 4:
14627 putc ('d', file);
14628 break;
14629 case 8:
14630 /* no suffix */
14631 break;
14632 default:
14633 error ("unsupported operand size for extended register");
14634 break;
14635 }
14636 return;
14637 }
14638
14639 reg = NULL;
14640 switch (code)
14641 {
14642 case 3:
14643 if (STACK_TOP_P (x))
14644 {
14645 reg = "st(0)";
14646 break;
14647 }
14648 /* FALLTHRU */
14649 case 8:
14650 case 4:
14651 case 12:
14652 if (! ANY_FP_REG_P (x))
14653 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
14654 /* FALLTHRU */
14655 case 16:
14656 case 2:
14657 normal:
14658 reg = hi_reg_name[regno];
14659 break;
14660 case 1:
14661 if (regno >= ARRAY_SIZE (qi_reg_name))
14662 goto normal;
14663 reg = qi_reg_name[regno];
14664 break;
14665 case 0:
14666 if (regno >= ARRAY_SIZE (qi_high_reg_name))
14667 goto normal;
14668 reg = qi_high_reg_name[regno];
14669 break;
14670 case 32:
14671 if (SSE_REG_P (x))
14672 {
14673 gcc_assert (!duplicated);
14674 putc ('y', file);
14675 fputs (hi_reg_name[regno] + 1, file);
14676 return;
14677 }
14678 case 64:
14679 if (SSE_REG_P (x))
14680 {
14681 gcc_assert (!duplicated);
14682 putc ('z', file);
14683 fputs (hi_reg_name[REGNO (x)] + 1, file);
14684 return;
14685 }
14686 break;
14687 default:
14688 gcc_unreachable ();
14689 }
14690
14691 fputs (reg, file);
14692 if (duplicated)
14693 {
14694 if (ASSEMBLER_DIALECT == ASM_ATT)
14695 fprintf (file, ", %%%s", reg);
14696 else
14697 fprintf (file, ", %s", reg);
14698 }
14699 }
14700
14701 /* Locate some local-dynamic symbol still in use by this function
14702 so that we can print its name in some tls_local_dynamic_base
14703 pattern. */
14704
14705 static int
14706 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
14707 {
14708 rtx x = *px;
14709
14710 if (GET_CODE (x) == SYMBOL_REF
14711 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
14712 {
14713 cfun->machine->some_ld_name = XSTR (x, 0);
14714 return 1;
14715 }
14716
14717 return 0;
14718 }
14719
14720 static const char *
14721 get_some_local_dynamic_name (void)
14722 {
14723 rtx insn;
14724
14725 if (cfun->machine->some_ld_name)
14726 return cfun->machine->some_ld_name;
14727
14728 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
14729 if (NONDEBUG_INSN_P (insn)
14730 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
14731 return cfun->machine->some_ld_name;
14732
14733 return NULL;
14734 }
14735
14736 /* Meaning of CODE:
14737 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14738 C -- print opcode suffix for set/cmov insn.
14739 c -- like C, but print reversed condition
14740 F,f -- likewise, but for floating-point.
14741 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14742 otherwise nothing
14743 R -- print embeded rounding and sae.
14744 r -- print only sae.
14745 z -- print the opcode suffix for the size of the current operand.
14746 Z -- likewise, with special suffixes for x87 instructions.
14747 * -- print a star (in certain assembler syntax)
14748 A -- print an absolute memory reference.
14749 E -- print address with DImode register names if TARGET_64BIT.
14750 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14751 s -- print a shift double count, followed by the assemblers argument
14752 delimiter.
14753 b -- print the QImode name of the register for the indicated operand.
14754 %b0 would print %al if operands[0] is reg 0.
14755 w -- likewise, print the HImode name of the register.
14756 k -- likewise, print the SImode name of the register.
14757 q -- likewise, print the DImode name of the register.
14758 x -- likewise, print the V4SFmode name of the register.
14759 t -- likewise, print the V8SFmode name of the register.
14760 g -- likewise, print the V16SFmode name of the register.
14761 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14762 y -- print "st(0)" instead of "st" as a register.
14763 d -- print duplicated register operand for AVX instruction.
14764 D -- print condition for SSE cmp instruction.
14765 P -- if PIC, print an @PLT suffix.
14766 p -- print raw symbol name.
14767 X -- don't print any sort of PIC '@' suffix for a symbol.
14768 & -- print some in-use local-dynamic symbol name.
14769 H -- print a memory address offset by 8; used for sse high-parts
14770 Y -- print condition for XOP pcom* instruction.
14771 + -- print a branch hint as 'cs' or 'ds' prefix
14772 ; -- print a semicolon (after prefixes due to bug in older gas).
14773 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14774 @ -- print a segment register of thread base pointer load
14775 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14776 */
14777
14778 void
14779 ix86_print_operand (FILE *file, rtx x, int code)
14780 {
14781 if (code)
14782 {
14783 switch (code)
14784 {
14785 case 'A':
14786 switch (ASSEMBLER_DIALECT)
14787 {
14788 case ASM_ATT:
14789 putc ('*', file);
14790 break;
14791
14792 case ASM_INTEL:
14793 /* Intel syntax. For absolute addresses, registers should not
14794 be surrounded by braces. */
14795 if (!REG_P (x))
14796 {
14797 putc ('[', file);
14798 ix86_print_operand (file, x, 0);
14799 putc (']', file);
14800 return;
14801 }
14802 break;
14803
14804 default:
14805 gcc_unreachable ();
14806 }
14807
14808 ix86_print_operand (file, x, 0);
14809 return;
14810
14811 case 'E':
14812 /* Wrap address in an UNSPEC to declare special handling. */
14813 if (TARGET_64BIT)
14814 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
14815
14816 output_address (x);
14817 return;
14818
14819 case 'L':
14820 if (ASSEMBLER_DIALECT == ASM_ATT)
14821 putc ('l', file);
14822 return;
14823
14824 case 'W':
14825 if (ASSEMBLER_DIALECT == ASM_ATT)
14826 putc ('w', file);
14827 return;
14828
14829 case 'B':
14830 if (ASSEMBLER_DIALECT == ASM_ATT)
14831 putc ('b', file);
14832 return;
14833
14834 case 'Q':
14835 if (ASSEMBLER_DIALECT == ASM_ATT)
14836 putc ('l', file);
14837 return;
14838
14839 case 'S':
14840 if (ASSEMBLER_DIALECT == ASM_ATT)
14841 putc ('s', file);
14842 return;
14843
14844 case 'T':
14845 if (ASSEMBLER_DIALECT == ASM_ATT)
14846 putc ('t', file);
14847 return;
14848
14849 case 'O':
14850 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14851 if (ASSEMBLER_DIALECT != ASM_ATT)
14852 return;
14853
14854 switch (GET_MODE_SIZE (GET_MODE (x)))
14855 {
14856 case 2:
14857 putc ('w', file);
14858 break;
14859
14860 case 4:
14861 putc ('l', file);
14862 break;
14863
14864 case 8:
14865 putc ('q', file);
14866 break;
14867
14868 default:
14869 output_operand_lossage
14870 ("invalid operand size for operand code 'O'");
14871 return;
14872 }
14873
14874 putc ('.', file);
14875 #endif
14876 return;
14877
14878 case 'z':
14879 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14880 {
14881 /* Opcodes don't get size suffixes if using Intel opcodes. */
14882 if (ASSEMBLER_DIALECT == ASM_INTEL)
14883 return;
14884
14885 switch (GET_MODE_SIZE (GET_MODE (x)))
14886 {
14887 case 1:
14888 putc ('b', file);
14889 return;
14890
14891 case 2:
14892 putc ('w', file);
14893 return;
14894
14895 case 4:
14896 putc ('l', file);
14897 return;
14898
14899 case 8:
14900 putc ('q', file);
14901 return;
14902
14903 default:
14904 output_operand_lossage
14905 ("invalid operand size for operand code 'z'");
14906 return;
14907 }
14908 }
14909
14910 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14911 warning
14912 (0, "non-integer operand used with operand code 'z'");
14913 /* FALLTHRU */
14914
14915 case 'Z':
14916 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14917 if (ASSEMBLER_DIALECT == ASM_INTEL)
14918 return;
14919
14920 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14921 {
14922 switch (GET_MODE_SIZE (GET_MODE (x)))
14923 {
14924 case 2:
14925 #ifdef HAVE_AS_IX86_FILDS
14926 putc ('s', file);
14927 #endif
14928 return;
14929
14930 case 4:
14931 putc ('l', file);
14932 return;
14933
14934 case 8:
14935 #ifdef HAVE_AS_IX86_FILDQ
14936 putc ('q', file);
14937 #else
14938 fputs ("ll", file);
14939 #endif
14940 return;
14941
14942 default:
14943 break;
14944 }
14945 }
14946 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14947 {
14948 /* 387 opcodes don't get size suffixes
14949 if the operands are registers. */
14950 if (STACK_REG_P (x))
14951 return;
14952
14953 switch (GET_MODE_SIZE (GET_MODE (x)))
14954 {
14955 case 4:
14956 putc ('s', file);
14957 return;
14958
14959 case 8:
14960 putc ('l', file);
14961 return;
14962
14963 case 12:
14964 case 16:
14965 putc ('t', file);
14966 return;
14967
14968 default:
14969 break;
14970 }
14971 }
14972 else
14973 {
14974 output_operand_lossage
14975 ("invalid operand type used with operand code 'Z'");
14976 return;
14977 }
14978
14979 output_operand_lossage
14980 ("invalid operand size for operand code 'Z'");
14981 return;
14982
14983 case 'd':
14984 case 'b':
14985 case 'w':
14986 case 'k':
14987 case 'q':
14988 case 'h':
14989 case 't':
14990 case 'g':
14991 case 'y':
14992 case 'x':
14993 case 'X':
14994 case 'P':
14995 case 'p':
14996 break;
14997
14998 case 's':
14999 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15000 {
15001 ix86_print_operand (file, x, 0);
15002 fputs (", ", file);
15003 }
15004 return;
15005
15006 case 'Y':
15007 switch (GET_CODE (x))
15008 {
15009 case NE:
15010 fputs ("neq", file);
15011 break;
15012 case EQ:
15013 fputs ("eq", file);
15014 break;
15015 case GE:
15016 case GEU:
15017 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15018 break;
15019 case GT:
15020 case GTU:
15021 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15022 break;
15023 case LE:
15024 case LEU:
15025 fputs ("le", file);
15026 break;
15027 case LT:
15028 case LTU:
15029 fputs ("lt", file);
15030 break;
15031 case UNORDERED:
15032 fputs ("unord", file);
15033 break;
15034 case ORDERED:
15035 fputs ("ord", file);
15036 break;
15037 case UNEQ:
15038 fputs ("ueq", file);
15039 break;
15040 case UNGE:
15041 fputs ("nlt", file);
15042 break;
15043 case UNGT:
15044 fputs ("nle", file);
15045 break;
15046 case UNLE:
15047 fputs ("ule", file);
15048 break;
15049 case UNLT:
15050 fputs ("ult", file);
15051 break;
15052 case LTGT:
15053 fputs ("une", file);
15054 break;
15055 default:
15056 output_operand_lossage ("operand is not a condition code, "
15057 "invalid operand code 'Y'");
15058 return;
15059 }
15060 return;
15061
15062 case 'D':
15063 /* Little bit of braindamage here. The SSE compare instructions
15064 does use completely different names for the comparisons that the
15065 fp conditional moves. */
15066 switch (GET_CODE (x))
15067 {
15068 case UNEQ:
15069 if (TARGET_AVX)
15070 {
15071 fputs ("eq_us", file);
15072 break;
15073 }
15074 case EQ:
15075 fputs ("eq", file);
15076 break;
15077 case UNLT:
15078 if (TARGET_AVX)
15079 {
15080 fputs ("nge", file);
15081 break;
15082 }
15083 case LT:
15084 fputs ("lt", file);
15085 break;
15086 case UNLE:
15087 if (TARGET_AVX)
15088 {
15089 fputs ("ngt", file);
15090 break;
15091 }
15092 case LE:
15093 fputs ("le", file);
15094 break;
15095 case UNORDERED:
15096 fputs ("unord", file);
15097 break;
15098 case LTGT:
15099 if (TARGET_AVX)
15100 {
15101 fputs ("neq_oq", file);
15102 break;
15103 }
15104 case NE:
15105 fputs ("neq", file);
15106 break;
15107 case GE:
15108 if (TARGET_AVX)
15109 {
15110 fputs ("ge", file);
15111 break;
15112 }
15113 case UNGE:
15114 fputs ("nlt", file);
15115 break;
15116 case GT:
15117 if (TARGET_AVX)
15118 {
15119 fputs ("gt", file);
15120 break;
15121 }
15122 case UNGT:
15123 fputs ("nle", file);
15124 break;
15125 case ORDERED:
15126 fputs ("ord", file);
15127 break;
15128 default:
15129 output_operand_lossage ("operand is not a condition code, "
15130 "invalid operand code 'D'");
15131 return;
15132 }
15133 return;
15134
15135 case 'F':
15136 case 'f':
15137 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15138 if (ASSEMBLER_DIALECT == ASM_ATT)
15139 putc ('.', file);
15140 #endif
15141
15142 case 'C':
15143 case 'c':
15144 if (!COMPARISON_P (x))
15145 {
15146 output_operand_lossage ("operand is not a condition code, "
15147 "invalid operand code '%c'", code);
15148 return;
15149 }
15150 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15151 code == 'c' || code == 'f',
15152 code == 'F' || code == 'f',
15153 file);
15154 return;
15155
15156 case 'H':
15157 if (!offsettable_memref_p (x))
15158 {
15159 output_operand_lossage ("operand is not an offsettable memory "
15160 "reference, invalid operand code 'H'");
15161 return;
15162 }
15163 /* It doesn't actually matter what mode we use here, as we're
15164 only going to use this for printing. */
15165 x = adjust_address_nv (x, DImode, 8);
15166 /* Output 'qword ptr' for intel assembler dialect. */
15167 if (ASSEMBLER_DIALECT == ASM_INTEL)
15168 code = 'q';
15169 break;
15170
15171 case 'K':
15172 gcc_assert (CONST_INT_P (x));
15173
15174 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15175 #ifdef HAVE_AS_IX86_HLE
15176 fputs ("xacquire ", file);
15177 #else
15178 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15179 #endif
15180 else if (INTVAL (x) & IX86_HLE_RELEASE)
15181 #ifdef HAVE_AS_IX86_HLE
15182 fputs ("xrelease ", file);
15183 #else
15184 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15185 #endif
15186 /* We do not want to print value of the operand. */
15187 return;
15188
15189 case 'N':
15190 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15191 fputs ("{z}", file);
15192 return;
15193
15194 case 'r':
15195 gcc_assert (CONST_INT_P (x));
15196 gcc_assert (INTVAL (x) == ROUND_SAE);
15197
15198 if (ASSEMBLER_DIALECT == ASM_INTEL)
15199 fputs (", ", file);
15200
15201 fputs ("{sae}", file);
15202
15203 if (ASSEMBLER_DIALECT == ASM_ATT)
15204 fputs (", ", file);
15205
15206 return;
15207
15208 case 'R':
15209 gcc_assert (CONST_INT_P (x));
15210
15211 if (ASSEMBLER_DIALECT == ASM_INTEL)
15212 fputs (", ", file);
15213
15214 switch (INTVAL (x))
15215 {
15216 case ROUND_NEAREST_INT | ROUND_SAE:
15217 fputs ("{rn-sae}", file);
15218 break;
15219 case ROUND_NEG_INF | ROUND_SAE:
15220 fputs ("{rd-sae}", file);
15221 break;
15222 case ROUND_POS_INF | ROUND_SAE:
15223 fputs ("{ru-sae}", file);
15224 break;
15225 case ROUND_ZERO | ROUND_SAE:
15226 fputs ("{rz-sae}", file);
15227 break;
15228 default:
15229 gcc_unreachable ();
15230 }
15231
15232 if (ASSEMBLER_DIALECT == ASM_ATT)
15233 fputs (", ", file);
15234
15235 return;
15236
15237 case '*':
15238 if (ASSEMBLER_DIALECT == ASM_ATT)
15239 putc ('*', file);
15240 return;
15241
15242 case '&':
15243 {
15244 const char *name = get_some_local_dynamic_name ();
15245 if (name == NULL)
15246 output_operand_lossage ("'%%&' used without any "
15247 "local dynamic TLS references");
15248 else
15249 assemble_name (file, name);
15250 return;
15251 }
15252
15253 case '+':
15254 {
15255 rtx x;
15256
15257 if (!optimize
15258 || optimize_function_for_size_p (cfun)
15259 || !TARGET_BRANCH_PREDICTION_HINTS)
15260 return;
15261
15262 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15263 if (x)
15264 {
15265 int pred_val = XINT (x, 0);
15266
15267 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15268 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15269 {
15270 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15271 bool cputaken
15272 = final_forward_branch_p (current_output_insn) == 0;
15273
15274 /* Emit hints only in the case default branch prediction
15275 heuristics would fail. */
15276 if (taken != cputaken)
15277 {
15278 /* We use 3e (DS) prefix for taken branches and
15279 2e (CS) prefix for not taken branches. */
15280 if (taken)
15281 fputs ("ds ; ", file);
15282 else
15283 fputs ("cs ; ", file);
15284 }
15285 }
15286 }
15287 return;
15288 }
15289
15290 case ';':
15291 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15292 putc (';', file);
15293 #endif
15294 return;
15295
15296 case '@':
15297 if (ASSEMBLER_DIALECT == ASM_ATT)
15298 putc ('%', file);
15299
15300 /* The kernel uses a different segment register for performance
15301 reasons; a system call would not have to trash the userspace
15302 segment register, which would be expensive. */
15303 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15304 fputs ("fs", file);
15305 else
15306 fputs ("gs", file);
15307 return;
15308
15309 case '~':
15310 putc (TARGET_AVX2 ? 'i' : 'f', file);
15311 return;
15312
15313 case '^':
15314 if (TARGET_64BIT && Pmode != word_mode)
15315 fputs ("addr32 ", file);
15316 return;
15317
15318 default:
15319 output_operand_lossage ("invalid operand code '%c'", code);
15320 }
15321 }
15322
15323 if (REG_P (x))
15324 print_reg (x, code, file);
15325
15326 else if (MEM_P (x))
15327 {
15328 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15329 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15330 && GET_MODE (x) != BLKmode)
15331 {
15332 const char * size;
15333 switch (GET_MODE_SIZE (GET_MODE (x)))
15334 {
15335 case 1: size = "BYTE"; break;
15336 case 2: size = "WORD"; break;
15337 case 4: size = "DWORD"; break;
15338 case 8: size = "QWORD"; break;
15339 case 12: size = "TBYTE"; break;
15340 case 16:
15341 if (GET_MODE (x) == XFmode)
15342 size = "TBYTE";
15343 else
15344 size = "XMMWORD";
15345 break;
15346 case 32: size = "YMMWORD"; break;
15347 case 64: size = "ZMMWORD"; break;
15348 default:
15349 gcc_unreachable ();
15350 }
15351
15352 /* Check for explicit size override (codes 'b', 'w', 'k',
15353 'q' and 'x') */
15354 if (code == 'b')
15355 size = "BYTE";
15356 else if (code == 'w')
15357 size = "WORD";
15358 else if (code == 'k')
15359 size = "DWORD";
15360 else if (code == 'q')
15361 size = "QWORD";
15362 else if (code == 'x')
15363 size = "XMMWORD";
15364
15365 fputs (size, file);
15366 fputs (" PTR ", file);
15367 }
15368
15369 x = XEXP (x, 0);
15370 /* Avoid (%rip) for call operands. */
15371 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15372 && !CONST_INT_P (x))
15373 output_addr_const (file, x);
15374 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15375 output_operand_lossage ("invalid constraints for operand");
15376 else
15377 output_address (x);
15378 }
15379
15380 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
15381 {
15382 REAL_VALUE_TYPE r;
15383 long l;
15384
15385 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15386 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15387
15388 if (ASSEMBLER_DIALECT == ASM_ATT)
15389 putc ('$', file);
15390 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15391 if (code == 'q')
15392 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15393 (unsigned long long) (int) l);
15394 else
15395 fprintf (file, "0x%08x", (unsigned int) l);
15396 }
15397
15398 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
15399 {
15400 REAL_VALUE_TYPE r;
15401 long l[2];
15402
15403 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15404 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15405
15406 if (ASSEMBLER_DIALECT == ASM_ATT)
15407 putc ('$', file);
15408 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
15409 }
15410
15411 /* These float cases don't actually occur as immediate operands. */
15412 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
15413 {
15414 char dstr[30];
15415
15416 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
15417 fputs (dstr, file);
15418 }
15419
15420 else
15421 {
15422 /* We have patterns that allow zero sets of memory, for instance.
15423 In 64-bit mode, we should probably support all 8-byte vectors,
15424 since we can in fact encode that into an immediate. */
15425 if (GET_CODE (x) == CONST_VECTOR)
15426 {
15427 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
15428 x = const0_rtx;
15429 }
15430
15431 if (code != 'P' && code != 'p')
15432 {
15433 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
15434 {
15435 if (ASSEMBLER_DIALECT == ASM_ATT)
15436 putc ('$', file);
15437 }
15438 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
15439 || GET_CODE (x) == LABEL_REF)
15440 {
15441 if (ASSEMBLER_DIALECT == ASM_ATT)
15442 putc ('$', file);
15443 else
15444 fputs ("OFFSET FLAT:", file);
15445 }
15446 }
15447 if (CONST_INT_P (x))
15448 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15449 else if (flag_pic || MACHOPIC_INDIRECT)
15450 output_pic_addr_const (file, x, code);
15451 else
15452 output_addr_const (file, x);
15453 }
15454 }
15455
15456 static bool
15457 ix86_print_operand_punct_valid_p (unsigned char code)
15458 {
15459 return (code == '@' || code == '*' || code == '+' || code == '&'
15460 || code == ';' || code == '~' || code == '^');
15461 }
15462 \f
15463 /* Print a memory operand whose address is ADDR. */
15464
15465 static void
15466 ix86_print_operand_address (FILE *file, rtx addr)
15467 {
15468 struct ix86_address parts;
15469 rtx base, index, disp;
15470 int scale;
15471 int ok;
15472 bool vsib = false;
15473 int code = 0;
15474
15475 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
15476 {
15477 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15478 gcc_assert (parts.index == NULL_RTX);
15479 parts.index = XVECEXP (addr, 0, 1);
15480 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
15481 addr = XVECEXP (addr, 0, 0);
15482 vsib = true;
15483 }
15484 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
15485 {
15486 gcc_assert (TARGET_64BIT);
15487 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15488 code = 'q';
15489 }
15490 else
15491 ok = ix86_decompose_address (addr, &parts);
15492
15493 gcc_assert (ok);
15494
15495 base = parts.base;
15496 index = parts.index;
15497 disp = parts.disp;
15498 scale = parts.scale;
15499
15500 switch (parts.seg)
15501 {
15502 case SEG_DEFAULT:
15503 break;
15504 case SEG_FS:
15505 case SEG_GS:
15506 if (ASSEMBLER_DIALECT == ASM_ATT)
15507 putc ('%', file);
15508 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
15509 break;
15510 default:
15511 gcc_unreachable ();
15512 }
15513
15514 /* Use one byte shorter RIP relative addressing for 64bit mode. */
15515 if (TARGET_64BIT && !base && !index)
15516 {
15517 rtx symbol = disp;
15518
15519 if (GET_CODE (disp) == CONST
15520 && GET_CODE (XEXP (disp, 0)) == PLUS
15521 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15522 symbol = XEXP (XEXP (disp, 0), 0);
15523
15524 if (GET_CODE (symbol) == LABEL_REF
15525 || (GET_CODE (symbol) == SYMBOL_REF
15526 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
15527 base = pc_rtx;
15528 }
15529 if (!base && !index)
15530 {
15531 /* Displacement only requires special attention. */
15532
15533 if (CONST_INT_P (disp))
15534 {
15535 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
15536 fputs ("ds:", file);
15537 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
15538 }
15539 else if (flag_pic)
15540 output_pic_addr_const (file, disp, 0);
15541 else
15542 output_addr_const (file, disp);
15543 }
15544 else
15545 {
15546 /* Print SImode register names to force addr32 prefix. */
15547 if (SImode_address_operand (addr, VOIDmode))
15548 {
15549 #ifdef ENABLE_CHECKING
15550 gcc_assert (TARGET_64BIT);
15551 switch (GET_CODE (addr))
15552 {
15553 case SUBREG:
15554 gcc_assert (GET_MODE (addr) == SImode);
15555 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
15556 break;
15557 case ZERO_EXTEND:
15558 case AND:
15559 gcc_assert (GET_MODE (addr) == DImode);
15560 break;
15561 default:
15562 gcc_unreachable ();
15563 }
15564 #endif
15565 gcc_assert (!code);
15566 code = 'k';
15567 }
15568 else if (code == 0
15569 && TARGET_X32
15570 && disp
15571 && CONST_INT_P (disp)
15572 && INTVAL (disp) < -16*1024*1024)
15573 {
15574 /* X32 runs in 64-bit mode, where displacement, DISP, in
15575 address DISP(%r64), is encoded as 32-bit immediate sign-
15576 extended from 32-bit to 64-bit. For -0x40000300(%r64),
15577 address is %r64 + 0xffffffffbffffd00. When %r64 <
15578 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
15579 which is invalid for x32. The correct address is %r64
15580 - 0x40000300 == 0xf7ffdd64. To properly encode
15581 -0x40000300(%r64) for x32, we zero-extend negative
15582 displacement by forcing addr32 prefix which truncates
15583 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
15584 zero-extend all negative displacements, including -1(%rsp).
15585 However, for small negative displacements, sign-extension
15586 won't cause overflow. We only zero-extend negative
15587 displacements if they < -16*1024*1024, which is also used
15588 to check legitimate address displacements for PIC. */
15589 code = 'k';
15590 }
15591
15592 if (ASSEMBLER_DIALECT == ASM_ATT)
15593 {
15594 if (disp)
15595 {
15596 if (flag_pic)
15597 output_pic_addr_const (file, disp, 0);
15598 else if (GET_CODE (disp) == LABEL_REF)
15599 output_asm_label (disp);
15600 else
15601 output_addr_const (file, disp);
15602 }
15603
15604 putc ('(', file);
15605 if (base)
15606 print_reg (base, code, file);
15607 if (index)
15608 {
15609 putc (',', file);
15610 print_reg (index, vsib ? 0 : code, file);
15611 if (scale != 1 || vsib)
15612 fprintf (file, ",%d", scale);
15613 }
15614 putc (')', file);
15615 }
15616 else
15617 {
15618 rtx offset = NULL_RTX;
15619
15620 if (disp)
15621 {
15622 /* Pull out the offset of a symbol; print any symbol itself. */
15623 if (GET_CODE (disp) == CONST
15624 && GET_CODE (XEXP (disp, 0)) == PLUS
15625 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15626 {
15627 offset = XEXP (XEXP (disp, 0), 1);
15628 disp = gen_rtx_CONST (VOIDmode,
15629 XEXP (XEXP (disp, 0), 0));
15630 }
15631
15632 if (flag_pic)
15633 output_pic_addr_const (file, disp, 0);
15634 else if (GET_CODE (disp) == LABEL_REF)
15635 output_asm_label (disp);
15636 else if (CONST_INT_P (disp))
15637 offset = disp;
15638 else
15639 output_addr_const (file, disp);
15640 }
15641
15642 putc ('[', file);
15643 if (base)
15644 {
15645 print_reg (base, code, file);
15646 if (offset)
15647 {
15648 if (INTVAL (offset) >= 0)
15649 putc ('+', file);
15650 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15651 }
15652 }
15653 else if (offset)
15654 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15655 else
15656 putc ('0', file);
15657
15658 if (index)
15659 {
15660 putc ('+', file);
15661 print_reg (index, vsib ? 0 : code, file);
15662 if (scale != 1 || vsib)
15663 fprintf (file, "*%d", scale);
15664 }
15665 putc (']', file);
15666 }
15667 }
15668 }
15669
15670 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15671
15672 static bool
15673 i386_asm_output_addr_const_extra (FILE *file, rtx x)
15674 {
15675 rtx op;
15676
15677 if (GET_CODE (x) != UNSPEC)
15678 return false;
15679
15680 op = XVECEXP (x, 0, 0);
15681 switch (XINT (x, 1))
15682 {
15683 case UNSPEC_GOTTPOFF:
15684 output_addr_const (file, op);
15685 /* FIXME: This might be @TPOFF in Sun ld. */
15686 fputs ("@gottpoff", file);
15687 break;
15688 case UNSPEC_TPOFF:
15689 output_addr_const (file, op);
15690 fputs ("@tpoff", file);
15691 break;
15692 case UNSPEC_NTPOFF:
15693 output_addr_const (file, op);
15694 if (TARGET_64BIT)
15695 fputs ("@tpoff", file);
15696 else
15697 fputs ("@ntpoff", file);
15698 break;
15699 case UNSPEC_DTPOFF:
15700 output_addr_const (file, op);
15701 fputs ("@dtpoff", file);
15702 break;
15703 case UNSPEC_GOTNTPOFF:
15704 output_addr_const (file, op);
15705 if (TARGET_64BIT)
15706 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15707 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
15708 else
15709 fputs ("@gotntpoff", file);
15710 break;
15711 case UNSPEC_INDNTPOFF:
15712 output_addr_const (file, op);
15713 fputs ("@indntpoff", file);
15714 break;
15715 #if TARGET_MACHO
15716 case UNSPEC_MACHOPIC_OFFSET:
15717 output_addr_const (file, op);
15718 putc ('-', file);
15719 machopic_output_function_base_name (file);
15720 break;
15721 #endif
15722
15723 case UNSPEC_STACK_CHECK:
15724 {
15725 int offset;
15726
15727 gcc_assert (flag_split_stack);
15728
15729 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15730 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
15731 #else
15732 gcc_unreachable ();
15733 #endif
15734
15735 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
15736 }
15737 break;
15738
15739 default:
15740 return false;
15741 }
15742
15743 return true;
15744 }
15745 \f
15746 /* Split one or more double-mode RTL references into pairs of half-mode
15747 references. The RTL can be REG, offsettable MEM, integer constant, or
15748 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
15749 split and "num" is its length. lo_half and hi_half are output arrays
15750 that parallel "operands". */
15751
15752 void
15753 split_double_mode (enum machine_mode mode, rtx operands[],
15754 int num, rtx lo_half[], rtx hi_half[])
15755 {
15756 enum machine_mode half_mode;
15757 unsigned int byte;
15758
15759 switch (mode)
15760 {
15761 case TImode:
15762 half_mode = DImode;
15763 break;
15764 case DImode:
15765 half_mode = SImode;
15766 break;
15767 default:
15768 gcc_unreachable ();
15769 }
15770
15771 byte = GET_MODE_SIZE (half_mode);
15772
15773 while (num--)
15774 {
15775 rtx op = operands[num];
15776
15777 /* simplify_subreg refuse to split volatile memory addresses,
15778 but we still have to handle it. */
15779 if (MEM_P (op))
15780 {
15781 lo_half[num] = adjust_address (op, half_mode, 0);
15782 hi_half[num] = adjust_address (op, half_mode, byte);
15783 }
15784 else
15785 {
15786 lo_half[num] = simplify_gen_subreg (half_mode, op,
15787 GET_MODE (op) == VOIDmode
15788 ? mode : GET_MODE (op), 0);
15789 hi_half[num] = simplify_gen_subreg (half_mode, op,
15790 GET_MODE (op) == VOIDmode
15791 ? mode : GET_MODE (op), byte);
15792 }
15793 }
15794 }
15795 \f
15796 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15797 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15798 is the expression of the binary operation. The output may either be
15799 emitted here, or returned to the caller, like all output_* functions.
15800
15801 There is no guarantee that the operands are the same mode, as they
15802 might be within FLOAT or FLOAT_EXTEND expressions. */
15803
15804 #ifndef SYSV386_COMPAT
15805 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15806 wants to fix the assemblers because that causes incompatibility
15807 with gcc. No-one wants to fix gcc because that causes
15808 incompatibility with assemblers... You can use the option of
15809 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15810 #define SYSV386_COMPAT 1
15811 #endif
15812
15813 const char *
15814 output_387_binary_op (rtx insn, rtx *operands)
15815 {
15816 static char buf[40];
15817 const char *p;
15818 const char *ssep;
15819 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
15820
15821 #ifdef ENABLE_CHECKING
15822 /* Even if we do not want to check the inputs, this documents input
15823 constraints. Which helps in understanding the following code. */
15824 if (STACK_REG_P (operands[0])
15825 && ((REG_P (operands[1])
15826 && REGNO (operands[0]) == REGNO (operands[1])
15827 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
15828 || (REG_P (operands[2])
15829 && REGNO (operands[0]) == REGNO (operands[2])
15830 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
15831 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
15832 ; /* ok */
15833 else
15834 gcc_assert (is_sse);
15835 #endif
15836
15837 switch (GET_CODE (operands[3]))
15838 {
15839 case PLUS:
15840 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15841 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15842 p = "fiadd";
15843 else
15844 p = "fadd";
15845 ssep = "vadd";
15846 break;
15847
15848 case MINUS:
15849 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15850 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15851 p = "fisub";
15852 else
15853 p = "fsub";
15854 ssep = "vsub";
15855 break;
15856
15857 case MULT:
15858 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15859 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15860 p = "fimul";
15861 else
15862 p = "fmul";
15863 ssep = "vmul";
15864 break;
15865
15866 case DIV:
15867 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15868 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15869 p = "fidiv";
15870 else
15871 p = "fdiv";
15872 ssep = "vdiv";
15873 break;
15874
15875 default:
15876 gcc_unreachable ();
15877 }
15878
15879 if (is_sse)
15880 {
15881 if (TARGET_AVX)
15882 {
15883 strcpy (buf, ssep);
15884 if (GET_MODE (operands[0]) == SFmode)
15885 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
15886 else
15887 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
15888 }
15889 else
15890 {
15891 strcpy (buf, ssep + 1);
15892 if (GET_MODE (operands[0]) == SFmode)
15893 strcat (buf, "ss\t{%2, %0|%0, %2}");
15894 else
15895 strcat (buf, "sd\t{%2, %0|%0, %2}");
15896 }
15897 return buf;
15898 }
15899 strcpy (buf, p);
15900
15901 switch (GET_CODE (operands[3]))
15902 {
15903 case MULT:
15904 case PLUS:
15905 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
15906 {
15907 rtx temp = operands[2];
15908 operands[2] = operands[1];
15909 operands[1] = temp;
15910 }
15911
15912 /* know operands[0] == operands[1]. */
15913
15914 if (MEM_P (operands[2]))
15915 {
15916 p = "%Z2\t%2";
15917 break;
15918 }
15919
15920 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15921 {
15922 if (STACK_TOP_P (operands[0]))
15923 /* How is it that we are storing to a dead operand[2]?
15924 Well, presumably operands[1] is dead too. We can't
15925 store the result to st(0) as st(0) gets popped on this
15926 instruction. Instead store to operands[2] (which I
15927 think has to be st(1)). st(1) will be popped later.
15928 gcc <= 2.8.1 didn't have this check and generated
15929 assembly code that the Unixware assembler rejected. */
15930 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15931 else
15932 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15933 break;
15934 }
15935
15936 if (STACK_TOP_P (operands[0]))
15937 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15938 else
15939 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15940 break;
15941
15942 case MINUS:
15943 case DIV:
15944 if (MEM_P (operands[1]))
15945 {
15946 p = "r%Z1\t%1";
15947 break;
15948 }
15949
15950 if (MEM_P (operands[2]))
15951 {
15952 p = "%Z2\t%2";
15953 break;
15954 }
15955
15956 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15957 {
15958 #if SYSV386_COMPAT
15959 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15960 derived assemblers, confusingly reverse the direction of
15961 the operation for fsub{r} and fdiv{r} when the
15962 destination register is not st(0). The Intel assembler
15963 doesn't have this brain damage. Read !SYSV386_COMPAT to
15964 figure out what the hardware really does. */
15965 if (STACK_TOP_P (operands[0]))
15966 p = "{p\t%0, %2|rp\t%2, %0}";
15967 else
15968 p = "{rp\t%2, %0|p\t%0, %2}";
15969 #else
15970 if (STACK_TOP_P (operands[0]))
15971 /* As above for fmul/fadd, we can't store to st(0). */
15972 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15973 else
15974 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15975 #endif
15976 break;
15977 }
15978
15979 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
15980 {
15981 #if SYSV386_COMPAT
15982 if (STACK_TOP_P (operands[0]))
15983 p = "{rp\t%0, %1|p\t%1, %0}";
15984 else
15985 p = "{p\t%1, %0|rp\t%0, %1}";
15986 #else
15987 if (STACK_TOP_P (operands[0]))
15988 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15989 else
15990 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15991 #endif
15992 break;
15993 }
15994
15995 if (STACK_TOP_P (operands[0]))
15996 {
15997 if (STACK_TOP_P (operands[1]))
15998 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15999 else
16000 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16001 break;
16002 }
16003 else if (STACK_TOP_P (operands[1]))
16004 {
16005 #if SYSV386_COMPAT
16006 p = "{\t%1, %0|r\t%0, %1}";
16007 #else
16008 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16009 #endif
16010 }
16011 else
16012 {
16013 #if SYSV386_COMPAT
16014 p = "{r\t%2, %0|\t%0, %2}";
16015 #else
16016 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16017 #endif
16018 }
16019 break;
16020
16021 default:
16022 gcc_unreachable ();
16023 }
16024
16025 strcat (buf, p);
16026 return buf;
16027 }
16028
16029 /* Check if a 256bit AVX register is referenced inside of EXP. */
16030
16031 static int
16032 ix86_check_avx256_register (rtx *pexp, void *data ATTRIBUTE_UNUSED)
16033 {
16034 rtx exp = *pexp;
16035
16036 if (GET_CODE (exp) == SUBREG)
16037 exp = SUBREG_REG (exp);
16038
16039 if (REG_P (exp)
16040 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)))
16041 return 1;
16042
16043 return 0;
16044 }
16045
16046 /* Return needed mode for entity in optimize_mode_switching pass. */
16047
16048 static int
16049 ix86_avx_u128_mode_needed (rtx insn)
16050 {
16051 if (CALL_P (insn))
16052 {
16053 rtx link;
16054
16055 /* Needed mode is set to AVX_U128_CLEAN if there are
16056 no 256bit modes used in function arguments. */
16057 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16058 link;
16059 link = XEXP (link, 1))
16060 {
16061 if (GET_CODE (XEXP (link, 0)) == USE)
16062 {
16063 rtx arg = XEXP (XEXP (link, 0), 0);
16064
16065 if (ix86_check_avx256_register (&arg, NULL))
16066 return AVX_U128_DIRTY;
16067 }
16068 }
16069
16070 return AVX_U128_CLEAN;
16071 }
16072
16073 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16074 changes state only when a 256bit register is written to, but we need
16075 to prevent the compiler from moving optimal insertion point above
16076 eventual read from 256bit register. */
16077 if (for_each_rtx (&PATTERN (insn), ix86_check_avx256_register, NULL))
16078 return AVX_U128_DIRTY;
16079
16080 return AVX_U128_ANY;
16081 }
16082
16083 /* Return mode that i387 must be switched into
16084 prior to the execution of insn. */
16085
16086 static int
16087 ix86_i387_mode_needed (int entity, rtx insn)
16088 {
16089 enum attr_i387_cw mode;
16090
16091 /* The mode UNINITIALIZED is used to store control word after a
16092 function call or ASM pattern. The mode ANY specify that function
16093 has no requirements on the control word and make no changes in the
16094 bits we are interested in. */
16095
16096 if (CALL_P (insn)
16097 || (NONJUMP_INSN_P (insn)
16098 && (asm_noperands (PATTERN (insn)) >= 0
16099 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16100 return I387_CW_UNINITIALIZED;
16101
16102 if (recog_memoized (insn) < 0)
16103 return I387_CW_ANY;
16104
16105 mode = get_attr_i387_cw (insn);
16106
16107 switch (entity)
16108 {
16109 case I387_TRUNC:
16110 if (mode == I387_CW_TRUNC)
16111 return mode;
16112 break;
16113
16114 case I387_FLOOR:
16115 if (mode == I387_CW_FLOOR)
16116 return mode;
16117 break;
16118
16119 case I387_CEIL:
16120 if (mode == I387_CW_CEIL)
16121 return mode;
16122 break;
16123
16124 case I387_MASK_PM:
16125 if (mode == I387_CW_MASK_PM)
16126 return mode;
16127 break;
16128
16129 default:
16130 gcc_unreachable ();
16131 }
16132
16133 return I387_CW_ANY;
16134 }
16135
16136 /* Return mode that entity must be switched into
16137 prior to the execution of insn. */
16138
16139 int
16140 ix86_mode_needed (int entity, rtx insn)
16141 {
16142 switch (entity)
16143 {
16144 case AVX_U128:
16145 return ix86_avx_u128_mode_needed (insn);
16146 case I387_TRUNC:
16147 case I387_FLOOR:
16148 case I387_CEIL:
16149 case I387_MASK_PM:
16150 return ix86_i387_mode_needed (entity, insn);
16151 default:
16152 gcc_unreachable ();
16153 }
16154 return 0;
16155 }
16156
16157 /* Check if a 256bit AVX register is referenced in stores. */
16158
16159 static void
16160 ix86_check_avx256_stores (rtx dest, const_rtx set ATTRIBUTE_UNUSED, void *data)
16161 {
16162 if (ix86_check_avx256_register (&dest, NULL))
16163 {
16164 bool *used = (bool *) data;
16165 *used = true;
16166 }
16167 }
16168
16169 /* Calculate mode of upper 128bit AVX registers after the insn. */
16170
16171 static int
16172 ix86_avx_u128_mode_after (int mode, rtx insn)
16173 {
16174 rtx pat = PATTERN (insn);
16175
16176 if (vzeroupper_operation (pat, VOIDmode)
16177 || vzeroall_operation (pat, VOIDmode))
16178 return AVX_U128_CLEAN;
16179
16180 /* We know that state is clean after CALL insn if there are no
16181 256bit registers used in the function return register. */
16182 if (CALL_P (insn))
16183 {
16184 bool avx_reg256_found = false;
16185 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16186
16187 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16188 }
16189
16190 /* Otherwise, return current mode. Remember that if insn
16191 references AVX 256bit registers, the mode was already changed
16192 to DIRTY from MODE_NEEDED. */
16193 return mode;
16194 }
16195
16196 /* Return the mode that an insn results in. */
16197
16198 int
16199 ix86_mode_after (int entity, int mode, rtx insn)
16200 {
16201 switch (entity)
16202 {
16203 case AVX_U128:
16204 return ix86_avx_u128_mode_after (mode, insn);
16205 case I387_TRUNC:
16206 case I387_FLOOR:
16207 case I387_CEIL:
16208 case I387_MASK_PM:
16209 return mode;
16210 default:
16211 gcc_unreachable ();
16212 }
16213 }
16214
16215 static int
16216 ix86_avx_u128_mode_entry (void)
16217 {
16218 tree arg;
16219
16220 /* Entry mode is set to AVX_U128_DIRTY if there are
16221 256bit modes used in function arguments. */
16222 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16223 arg = TREE_CHAIN (arg))
16224 {
16225 rtx incoming = DECL_INCOMING_RTL (arg);
16226
16227 if (incoming && ix86_check_avx256_register (&incoming, NULL))
16228 return AVX_U128_DIRTY;
16229 }
16230
16231 return AVX_U128_CLEAN;
16232 }
16233
16234 /* Return a mode that ENTITY is assumed to be
16235 switched to at function entry. */
16236
16237 int
16238 ix86_mode_entry (int entity)
16239 {
16240 switch (entity)
16241 {
16242 case AVX_U128:
16243 return ix86_avx_u128_mode_entry ();
16244 case I387_TRUNC:
16245 case I387_FLOOR:
16246 case I387_CEIL:
16247 case I387_MASK_PM:
16248 return I387_CW_ANY;
16249 default:
16250 gcc_unreachable ();
16251 }
16252 }
16253
16254 static int
16255 ix86_avx_u128_mode_exit (void)
16256 {
16257 rtx reg = crtl->return_rtx;
16258
16259 /* Exit mode is set to AVX_U128_DIRTY if there are
16260 256bit modes used in the function return register. */
16261 if (reg && ix86_check_avx256_register (&reg, NULL))
16262 return AVX_U128_DIRTY;
16263
16264 return AVX_U128_CLEAN;
16265 }
16266
16267 /* Return a mode that ENTITY is assumed to be
16268 switched to at function exit. */
16269
16270 int
16271 ix86_mode_exit (int entity)
16272 {
16273 switch (entity)
16274 {
16275 case AVX_U128:
16276 return ix86_avx_u128_mode_exit ();
16277 case I387_TRUNC:
16278 case I387_FLOOR:
16279 case I387_CEIL:
16280 case I387_MASK_PM:
16281 return I387_CW_ANY;
16282 default:
16283 gcc_unreachable ();
16284 }
16285 }
16286
16287 /* Output code to initialize control word copies used by trunc?f?i and
16288 rounding patterns. CURRENT_MODE is set to current control word,
16289 while NEW_MODE is set to new control word. */
16290
16291 static void
16292 emit_i387_cw_initialization (int mode)
16293 {
16294 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16295 rtx new_mode;
16296
16297 enum ix86_stack_slot slot;
16298
16299 rtx reg = gen_reg_rtx (HImode);
16300
16301 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16302 emit_move_insn (reg, copy_rtx (stored_mode));
16303
16304 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16305 || optimize_insn_for_size_p ())
16306 {
16307 switch (mode)
16308 {
16309 case I387_CW_TRUNC:
16310 /* round toward zero (truncate) */
16311 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16312 slot = SLOT_CW_TRUNC;
16313 break;
16314
16315 case I387_CW_FLOOR:
16316 /* round down toward -oo */
16317 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16318 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16319 slot = SLOT_CW_FLOOR;
16320 break;
16321
16322 case I387_CW_CEIL:
16323 /* round up toward +oo */
16324 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16325 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16326 slot = SLOT_CW_CEIL;
16327 break;
16328
16329 case I387_CW_MASK_PM:
16330 /* mask precision exception for nearbyint() */
16331 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16332 slot = SLOT_CW_MASK_PM;
16333 break;
16334
16335 default:
16336 gcc_unreachable ();
16337 }
16338 }
16339 else
16340 {
16341 switch (mode)
16342 {
16343 case I387_CW_TRUNC:
16344 /* round toward zero (truncate) */
16345 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16346 slot = SLOT_CW_TRUNC;
16347 break;
16348
16349 case I387_CW_FLOOR:
16350 /* round down toward -oo */
16351 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16352 slot = SLOT_CW_FLOOR;
16353 break;
16354
16355 case I387_CW_CEIL:
16356 /* round up toward +oo */
16357 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16358 slot = SLOT_CW_CEIL;
16359 break;
16360
16361 case I387_CW_MASK_PM:
16362 /* mask precision exception for nearbyint() */
16363 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16364 slot = SLOT_CW_MASK_PM;
16365 break;
16366
16367 default:
16368 gcc_unreachable ();
16369 }
16370 }
16371
16372 gcc_assert (slot < MAX_386_STACK_LOCALS);
16373
16374 new_mode = assign_386_stack_local (HImode, slot);
16375 emit_move_insn (new_mode, reg);
16376 }
16377
16378 /* Emit vzeroupper. */
16379
16380 void
16381 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16382 {
16383 int i;
16384
16385 /* Cancel automatic vzeroupper insertion if there are
16386 live call-saved SSE registers at the insertion point. */
16387
16388 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16389 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16390 return;
16391
16392 if (TARGET_64BIT)
16393 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16394 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16395 return;
16396
16397 emit_insn (gen_avx_vzeroupper ());
16398 }
16399
16400 /* Generate one or more insns to set ENTITY to MODE. */
16401
16402 void
16403 ix86_emit_mode_set (int entity, int mode, HARD_REG_SET regs_live)
16404 {
16405 switch (entity)
16406 {
16407 case AVX_U128:
16408 if (mode == AVX_U128_CLEAN)
16409 ix86_avx_emit_vzeroupper (regs_live);
16410 break;
16411 case I387_TRUNC:
16412 case I387_FLOOR:
16413 case I387_CEIL:
16414 case I387_MASK_PM:
16415 if (mode != I387_CW_ANY
16416 && mode != I387_CW_UNINITIALIZED)
16417 emit_i387_cw_initialization (mode);
16418 break;
16419 default:
16420 gcc_unreachable ();
16421 }
16422 }
16423
16424 /* Output code for INSN to convert a float to a signed int. OPERANDS
16425 are the insn operands. The output may be [HSD]Imode and the input
16426 operand may be [SDX]Fmode. */
16427
16428 const char *
16429 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
16430 {
16431 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16432 int dimode_p = GET_MODE (operands[0]) == DImode;
16433 int round_mode = get_attr_i387_cw (insn);
16434
16435 /* Jump through a hoop or two for DImode, since the hardware has no
16436 non-popping instruction. We used to do this a different way, but
16437 that was somewhat fragile and broke with post-reload splitters. */
16438 if ((dimode_p || fisttp) && !stack_top_dies)
16439 output_asm_insn ("fld\t%y1", operands);
16440
16441 gcc_assert (STACK_TOP_P (operands[1]));
16442 gcc_assert (MEM_P (operands[0]));
16443 gcc_assert (GET_MODE (operands[1]) != TFmode);
16444
16445 if (fisttp)
16446 output_asm_insn ("fisttp%Z0\t%0", operands);
16447 else
16448 {
16449 if (round_mode != I387_CW_ANY)
16450 output_asm_insn ("fldcw\t%3", operands);
16451 if (stack_top_dies || dimode_p)
16452 output_asm_insn ("fistp%Z0\t%0", operands);
16453 else
16454 output_asm_insn ("fist%Z0\t%0", operands);
16455 if (round_mode != I387_CW_ANY)
16456 output_asm_insn ("fldcw\t%2", operands);
16457 }
16458
16459 return "";
16460 }
16461
16462 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16463 have the values zero or one, indicates the ffreep insn's operand
16464 from the OPERANDS array. */
16465
16466 static const char *
16467 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
16468 {
16469 if (TARGET_USE_FFREEP)
16470 #ifdef HAVE_AS_IX86_FFREEP
16471 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
16472 #else
16473 {
16474 static char retval[32];
16475 int regno = REGNO (operands[opno]);
16476
16477 gcc_assert (STACK_REGNO_P (regno));
16478
16479 regno -= FIRST_STACK_REG;
16480
16481 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
16482 return retval;
16483 }
16484 #endif
16485
16486 return opno ? "fstp\t%y1" : "fstp\t%y0";
16487 }
16488
16489
16490 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16491 should be used. UNORDERED_P is true when fucom should be used. */
16492
16493 const char *
16494 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
16495 {
16496 int stack_top_dies;
16497 rtx cmp_op0, cmp_op1;
16498 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
16499
16500 if (eflags_p)
16501 {
16502 cmp_op0 = operands[0];
16503 cmp_op1 = operands[1];
16504 }
16505 else
16506 {
16507 cmp_op0 = operands[1];
16508 cmp_op1 = operands[2];
16509 }
16510
16511 if (is_sse)
16512 {
16513 if (GET_MODE (operands[0]) == SFmode)
16514 if (unordered_p)
16515 return "%vucomiss\t{%1, %0|%0, %1}";
16516 else
16517 return "%vcomiss\t{%1, %0|%0, %1}";
16518 else
16519 if (unordered_p)
16520 return "%vucomisd\t{%1, %0|%0, %1}";
16521 else
16522 return "%vcomisd\t{%1, %0|%0, %1}";
16523 }
16524
16525 gcc_assert (STACK_TOP_P (cmp_op0));
16526
16527 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16528
16529 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
16530 {
16531 if (stack_top_dies)
16532 {
16533 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
16534 return output_387_ffreep (operands, 1);
16535 }
16536 else
16537 return "ftst\n\tfnstsw\t%0";
16538 }
16539
16540 if (STACK_REG_P (cmp_op1)
16541 && stack_top_dies
16542 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
16543 && REGNO (cmp_op1) != FIRST_STACK_REG)
16544 {
16545 /* If both the top of the 387 stack dies, and the other operand
16546 is also a stack register that dies, then this must be a
16547 `fcompp' float compare */
16548
16549 if (eflags_p)
16550 {
16551 /* There is no double popping fcomi variant. Fortunately,
16552 eflags is immune from the fstp's cc clobbering. */
16553 if (unordered_p)
16554 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
16555 else
16556 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
16557 return output_387_ffreep (operands, 0);
16558 }
16559 else
16560 {
16561 if (unordered_p)
16562 return "fucompp\n\tfnstsw\t%0";
16563 else
16564 return "fcompp\n\tfnstsw\t%0";
16565 }
16566 }
16567 else
16568 {
16569 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
16570
16571 static const char * const alt[16] =
16572 {
16573 "fcom%Z2\t%y2\n\tfnstsw\t%0",
16574 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
16575 "fucom%Z2\t%y2\n\tfnstsw\t%0",
16576 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
16577
16578 "ficom%Z2\t%y2\n\tfnstsw\t%0",
16579 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
16580 NULL,
16581 NULL,
16582
16583 "fcomi\t{%y1, %0|%0, %y1}",
16584 "fcomip\t{%y1, %0|%0, %y1}",
16585 "fucomi\t{%y1, %0|%0, %y1}",
16586 "fucomip\t{%y1, %0|%0, %y1}",
16587
16588 NULL,
16589 NULL,
16590 NULL,
16591 NULL
16592 };
16593
16594 int mask;
16595 const char *ret;
16596
16597 mask = eflags_p << 3;
16598 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
16599 mask |= unordered_p << 1;
16600 mask |= stack_top_dies;
16601
16602 gcc_assert (mask < 16);
16603 ret = alt[mask];
16604 gcc_assert (ret);
16605
16606 return ret;
16607 }
16608 }
16609
16610 void
16611 ix86_output_addr_vec_elt (FILE *file, int value)
16612 {
16613 const char *directive = ASM_LONG;
16614
16615 #ifdef ASM_QUAD
16616 if (TARGET_LP64)
16617 directive = ASM_QUAD;
16618 #else
16619 gcc_assert (!TARGET_64BIT);
16620 #endif
16621
16622 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
16623 }
16624
16625 void
16626 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
16627 {
16628 const char *directive = ASM_LONG;
16629
16630 #ifdef ASM_QUAD
16631 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
16632 directive = ASM_QUAD;
16633 #else
16634 gcc_assert (!TARGET_64BIT);
16635 #endif
16636 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
16637 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
16638 fprintf (file, "%s%s%d-%s%d\n",
16639 directive, LPREFIX, value, LPREFIX, rel);
16640 else if (HAVE_AS_GOTOFF_IN_DATA)
16641 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
16642 #if TARGET_MACHO
16643 else if (TARGET_MACHO)
16644 {
16645 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
16646 machopic_output_function_base_name (file);
16647 putc ('\n', file);
16648 }
16649 #endif
16650 else
16651 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
16652 GOT_SYMBOL_NAME, LPREFIX, value);
16653 }
16654 \f
16655 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
16656 for the target. */
16657
16658 void
16659 ix86_expand_clear (rtx dest)
16660 {
16661 rtx tmp;
16662
16663 /* We play register width games, which are only valid after reload. */
16664 gcc_assert (reload_completed);
16665
16666 /* Avoid HImode and its attendant prefix byte. */
16667 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
16668 dest = gen_rtx_REG (SImode, REGNO (dest));
16669 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
16670
16671 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
16672 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
16673 {
16674 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16675 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
16676 }
16677
16678 emit_insn (tmp);
16679 }
16680
16681 /* X is an unchanging MEM. If it is a constant pool reference, return
16682 the constant pool rtx, else NULL. */
16683
16684 rtx
16685 maybe_get_pool_constant (rtx x)
16686 {
16687 x = ix86_delegitimize_address (XEXP (x, 0));
16688
16689 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
16690 return get_pool_constant (x);
16691
16692 return NULL_RTX;
16693 }
16694
16695 void
16696 ix86_expand_move (enum machine_mode mode, rtx operands[])
16697 {
16698 rtx op0, op1;
16699 enum tls_model model;
16700
16701 op0 = operands[0];
16702 op1 = operands[1];
16703
16704 if (GET_CODE (op1) == SYMBOL_REF)
16705 {
16706 rtx tmp;
16707
16708 model = SYMBOL_REF_TLS_MODEL (op1);
16709 if (model)
16710 {
16711 op1 = legitimize_tls_address (op1, model, true);
16712 op1 = force_operand (op1, op0);
16713 if (op1 == op0)
16714 return;
16715 op1 = convert_to_mode (mode, op1, 1);
16716 }
16717 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
16718 op1 = tmp;
16719 }
16720 else if (GET_CODE (op1) == CONST
16721 && GET_CODE (XEXP (op1, 0)) == PLUS
16722 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
16723 {
16724 rtx addend = XEXP (XEXP (op1, 0), 1);
16725 rtx symbol = XEXP (XEXP (op1, 0), 0);
16726 rtx tmp;
16727
16728 model = SYMBOL_REF_TLS_MODEL (symbol);
16729 if (model)
16730 tmp = legitimize_tls_address (symbol, model, true);
16731 else
16732 tmp = legitimize_pe_coff_symbol (symbol, true);
16733
16734 if (tmp)
16735 {
16736 tmp = force_operand (tmp, NULL);
16737 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
16738 op0, 1, OPTAB_DIRECT);
16739 if (tmp == op0)
16740 return;
16741 op1 = convert_to_mode (mode, tmp, 1);
16742 }
16743 }
16744
16745 if ((flag_pic || MACHOPIC_INDIRECT)
16746 && symbolic_operand (op1, mode))
16747 {
16748 if (TARGET_MACHO && !TARGET_64BIT)
16749 {
16750 #if TARGET_MACHO
16751 /* dynamic-no-pic */
16752 if (MACHOPIC_INDIRECT)
16753 {
16754 rtx temp = ((reload_in_progress
16755 || ((op0 && REG_P (op0))
16756 && mode == Pmode))
16757 ? op0 : gen_reg_rtx (Pmode));
16758 op1 = machopic_indirect_data_reference (op1, temp);
16759 if (MACHOPIC_PURE)
16760 op1 = machopic_legitimize_pic_address (op1, mode,
16761 temp == op1 ? 0 : temp);
16762 }
16763 if (op0 != op1 && GET_CODE (op0) != MEM)
16764 {
16765 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
16766 emit_insn (insn);
16767 return;
16768 }
16769 if (GET_CODE (op0) == MEM)
16770 op1 = force_reg (Pmode, op1);
16771 else
16772 {
16773 rtx temp = op0;
16774 if (GET_CODE (temp) != REG)
16775 temp = gen_reg_rtx (Pmode);
16776 temp = legitimize_pic_address (op1, temp);
16777 if (temp == op0)
16778 return;
16779 op1 = temp;
16780 }
16781 /* dynamic-no-pic */
16782 #endif
16783 }
16784 else
16785 {
16786 if (MEM_P (op0))
16787 op1 = force_reg (mode, op1);
16788 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
16789 {
16790 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
16791 op1 = legitimize_pic_address (op1, reg);
16792 if (op0 == op1)
16793 return;
16794 op1 = convert_to_mode (mode, op1, 1);
16795 }
16796 }
16797 }
16798 else
16799 {
16800 if (MEM_P (op0)
16801 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
16802 || !push_operand (op0, mode))
16803 && MEM_P (op1))
16804 op1 = force_reg (mode, op1);
16805
16806 if (push_operand (op0, mode)
16807 && ! general_no_elim_operand (op1, mode))
16808 op1 = copy_to_mode_reg (mode, op1);
16809
16810 /* Force large constants in 64bit compilation into register
16811 to get them CSEed. */
16812 if (can_create_pseudo_p ()
16813 && (mode == DImode) && TARGET_64BIT
16814 && immediate_operand (op1, mode)
16815 && !x86_64_zext_immediate_operand (op1, VOIDmode)
16816 && !register_operand (op0, mode)
16817 && optimize)
16818 op1 = copy_to_mode_reg (mode, op1);
16819
16820 if (can_create_pseudo_p ()
16821 && FLOAT_MODE_P (mode)
16822 && GET_CODE (op1) == CONST_DOUBLE)
16823 {
16824 /* If we are loading a floating point constant to a register,
16825 force the value to memory now, since we'll get better code
16826 out the back end. */
16827
16828 op1 = validize_mem (force_const_mem (mode, op1));
16829 if (!register_operand (op0, mode))
16830 {
16831 rtx temp = gen_reg_rtx (mode);
16832 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
16833 emit_move_insn (op0, temp);
16834 return;
16835 }
16836 }
16837 }
16838
16839 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16840 }
16841
16842 void
16843 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
16844 {
16845 rtx op0 = operands[0], op1 = operands[1];
16846 unsigned int align = GET_MODE_ALIGNMENT (mode);
16847
16848 if (push_operand (op0, VOIDmode))
16849 op0 = emit_move_resolve_push (mode, op0);
16850
16851 /* Force constants other than zero into memory. We do not know how
16852 the instructions used to build constants modify the upper 64 bits
16853 of the register, once we have that information we may be able
16854 to handle some of them more efficiently. */
16855 if (can_create_pseudo_p ()
16856 && register_operand (op0, mode)
16857 && (CONSTANT_P (op1)
16858 || (GET_CODE (op1) == SUBREG
16859 && CONSTANT_P (SUBREG_REG (op1))))
16860 && !standard_sse_constant_p (op1))
16861 op1 = validize_mem (force_const_mem (mode, op1));
16862
16863 /* We need to check memory alignment for SSE mode since attribute
16864 can make operands unaligned. */
16865 if (can_create_pseudo_p ()
16866 && SSE_REG_MODE_P (mode)
16867 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
16868 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
16869 {
16870 rtx tmp[2];
16871
16872 /* ix86_expand_vector_move_misalign() does not like constants ... */
16873 if (CONSTANT_P (op1)
16874 || (GET_CODE (op1) == SUBREG
16875 && CONSTANT_P (SUBREG_REG (op1))))
16876 op1 = validize_mem (force_const_mem (mode, op1));
16877
16878 /* ... nor both arguments in memory. */
16879 if (!register_operand (op0, mode)
16880 && !register_operand (op1, mode))
16881 op1 = force_reg (mode, op1);
16882
16883 tmp[0] = op0; tmp[1] = op1;
16884 ix86_expand_vector_move_misalign (mode, tmp);
16885 return;
16886 }
16887
16888 /* Make operand1 a register if it isn't already. */
16889 if (can_create_pseudo_p ()
16890 && !register_operand (op0, mode)
16891 && !register_operand (op1, mode))
16892 {
16893 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
16894 return;
16895 }
16896
16897 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16898 }
16899
16900 /* Split 32-byte AVX unaligned load and store if needed. */
16901
16902 static void
16903 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
16904 {
16905 rtx m;
16906 rtx (*extract) (rtx, rtx, rtx);
16907 rtx (*load_unaligned) (rtx, rtx);
16908 rtx (*store_unaligned) (rtx, rtx);
16909 enum machine_mode mode;
16910
16911 switch (GET_MODE (op0))
16912 {
16913 default:
16914 gcc_unreachable ();
16915 case V32QImode:
16916 extract = gen_avx_vextractf128v32qi;
16917 load_unaligned = gen_avx_loaddquv32qi;
16918 store_unaligned = gen_avx_storedquv32qi;
16919 mode = V16QImode;
16920 break;
16921 case V8SFmode:
16922 extract = gen_avx_vextractf128v8sf;
16923 load_unaligned = gen_avx_loadups256;
16924 store_unaligned = gen_avx_storeups256;
16925 mode = V4SFmode;
16926 break;
16927 case V4DFmode:
16928 extract = gen_avx_vextractf128v4df;
16929 load_unaligned = gen_avx_loadupd256;
16930 store_unaligned = gen_avx_storeupd256;
16931 mode = V2DFmode;
16932 break;
16933 }
16934
16935 if (MEM_P (op1))
16936 {
16937 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
16938 {
16939 rtx r = gen_reg_rtx (mode);
16940 m = adjust_address (op1, mode, 0);
16941 emit_move_insn (r, m);
16942 m = adjust_address (op1, mode, 16);
16943 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
16944 emit_move_insn (op0, r);
16945 }
16946 /* Normal *mov<mode>_internal pattern will handle
16947 unaligned loads just fine if misaligned_operand
16948 is true, and without the UNSPEC it can be combined
16949 with arithmetic instructions. */
16950 else if (misaligned_operand (op1, GET_MODE (op1)))
16951 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16952 else
16953 emit_insn (load_unaligned (op0, op1));
16954 }
16955 else if (MEM_P (op0))
16956 {
16957 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
16958 {
16959 m = adjust_address (op0, mode, 0);
16960 emit_insn (extract (m, op1, const0_rtx));
16961 m = adjust_address (op0, mode, 16);
16962 emit_insn (extract (m, op1, const1_rtx));
16963 }
16964 else
16965 emit_insn (store_unaligned (op0, op1));
16966 }
16967 else
16968 gcc_unreachable ();
16969 }
16970
16971 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
16972 straight to ix86_expand_vector_move. */
16973 /* Code generation for scalar reg-reg moves of single and double precision data:
16974 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
16975 movaps reg, reg
16976 else
16977 movss reg, reg
16978 if (x86_sse_partial_reg_dependency == true)
16979 movapd reg, reg
16980 else
16981 movsd reg, reg
16982
16983 Code generation for scalar loads of double precision data:
16984 if (x86_sse_split_regs == true)
16985 movlpd mem, reg (gas syntax)
16986 else
16987 movsd mem, reg
16988
16989 Code generation for unaligned packed loads of single precision data
16990 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
16991 if (x86_sse_unaligned_move_optimal)
16992 movups mem, reg
16993
16994 if (x86_sse_partial_reg_dependency == true)
16995 {
16996 xorps reg, reg
16997 movlps mem, reg
16998 movhps mem+8, reg
16999 }
17000 else
17001 {
17002 movlps mem, reg
17003 movhps mem+8, reg
17004 }
17005
17006 Code generation for unaligned packed loads of double precision data
17007 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17008 if (x86_sse_unaligned_move_optimal)
17009 movupd mem, reg
17010
17011 if (x86_sse_split_regs == true)
17012 {
17013 movlpd mem, reg
17014 movhpd mem+8, reg
17015 }
17016 else
17017 {
17018 movsd mem, reg
17019 movhpd mem+8, reg
17020 }
17021 */
17022
17023 void
17024 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
17025 {
17026 rtx op0, op1, orig_op0 = NULL_RTX, m;
17027 rtx (*load_unaligned) (rtx, rtx);
17028 rtx (*store_unaligned) (rtx, rtx);
17029
17030 op0 = operands[0];
17031 op1 = operands[1];
17032
17033 if (GET_MODE_SIZE (mode) == 64)
17034 {
17035 switch (GET_MODE_CLASS (mode))
17036 {
17037 case MODE_VECTOR_INT:
17038 case MODE_INT:
17039 if (GET_MODE (op0) != V16SImode)
17040 {
17041 if (!MEM_P (op0))
17042 {
17043 orig_op0 = op0;
17044 op0 = gen_reg_rtx (V16SImode);
17045 }
17046 else
17047 op0 = gen_lowpart (V16SImode, op0);
17048 }
17049 op1 = gen_lowpart (V16SImode, op1);
17050 /* FALLTHRU */
17051
17052 case MODE_VECTOR_FLOAT:
17053 switch (GET_MODE (op0))
17054 {
17055 default:
17056 gcc_unreachable ();
17057 case V16SImode:
17058 load_unaligned = gen_avx512f_loaddquv16si;
17059 store_unaligned = gen_avx512f_storedquv16si;
17060 break;
17061 case V16SFmode:
17062 load_unaligned = gen_avx512f_loadups512;
17063 store_unaligned = gen_avx512f_storeups512;
17064 break;
17065 case V8DFmode:
17066 load_unaligned = gen_avx512f_loadupd512;
17067 store_unaligned = gen_avx512f_storeupd512;
17068 break;
17069 }
17070
17071 if (MEM_P (op1))
17072 emit_insn (load_unaligned (op0, op1));
17073 else if (MEM_P (op0))
17074 emit_insn (store_unaligned (op0, op1));
17075 else
17076 gcc_unreachable ();
17077 if (orig_op0)
17078 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17079 break;
17080
17081 default:
17082 gcc_unreachable ();
17083 }
17084
17085 return;
17086 }
17087
17088 if (TARGET_AVX
17089 && GET_MODE_SIZE (mode) == 32)
17090 {
17091 switch (GET_MODE_CLASS (mode))
17092 {
17093 case MODE_VECTOR_INT:
17094 case MODE_INT:
17095 if (GET_MODE (op0) != V32QImode)
17096 {
17097 if (!MEM_P (op0))
17098 {
17099 orig_op0 = op0;
17100 op0 = gen_reg_rtx (V32QImode);
17101 }
17102 else
17103 op0 = gen_lowpart (V32QImode, op0);
17104 }
17105 op1 = gen_lowpart (V32QImode, op1);
17106 /* FALLTHRU */
17107
17108 case MODE_VECTOR_FLOAT:
17109 ix86_avx256_split_vector_move_misalign (op0, op1);
17110 if (orig_op0)
17111 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17112 break;
17113
17114 default:
17115 gcc_unreachable ();
17116 }
17117
17118 return;
17119 }
17120
17121 if (MEM_P (op1))
17122 {
17123 /* Normal *mov<mode>_internal pattern will handle
17124 unaligned loads just fine if misaligned_operand
17125 is true, and without the UNSPEC it can be combined
17126 with arithmetic instructions. */
17127 if (TARGET_AVX
17128 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17129 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17130 && misaligned_operand (op1, GET_MODE (op1)))
17131 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17132 /* ??? If we have typed data, then it would appear that using
17133 movdqu is the only way to get unaligned data loaded with
17134 integer type. */
17135 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17136 {
17137 if (GET_MODE (op0) != V16QImode)
17138 {
17139 orig_op0 = op0;
17140 op0 = gen_reg_rtx (V16QImode);
17141 }
17142 op1 = gen_lowpart (V16QImode, op1);
17143 /* We will eventually emit movups based on insn attributes. */
17144 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17145 if (orig_op0)
17146 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17147 }
17148 else if (TARGET_SSE2 && mode == V2DFmode)
17149 {
17150 rtx zero;
17151
17152 if (TARGET_AVX
17153 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17154 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17155 || optimize_insn_for_size_p ())
17156 {
17157 /* We will eventually emit movups based on insn attributes. */
17158 emit_insn (gen_sse2_loadupd (op0, op1));
17159 return;
17160 }
17161
17162 /* When SSE registers are split into halves, we can avoid
17163 writing to the top half twice. */
17164 if (TARGET_SSE_SPLIT_REGS)
17165 {
17166 emit_clobber (op0);
17167 zero = op0;
17168 }
17169 else
17170 {
17171 /* ??? Not sure about the best option for the Intel chips.
17172 The following would seem to satisfy; the register is
17173 entirely cleared, breaking the dependency chain. We
17174 then store to the upper half, with a dependency depth
17175 of one. A rumor has it that Intel recommends two movsd
17176 followed by an unpacklpd, but this is unconfirmed. And
17177 given that the dependency depth of the unpacklpd would
17178 still be one, I'm not sure why this would be better. */
17179 zero = CONST0_RTX (V2DFmode);
17180 }
17181
17182 m = adjust_address (op1, DFmode, 0);
17183 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17184 m = adjust_address (op1, DFmode, 8);
17185 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17186 }
17187 else
17188 {
17189 rtx t;
17190
17191 if (TARGET_AVX
17192 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17193 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17194 || optimize_insn_for_size_p ())
17195 {
17196 if (GET_MODE (op0) != V4SFmode)
17197 {
17198 orig_op0 = op0;
17199 op0 = gen_reg_rtx (V4SFmode);
17200 }
17201 op1 = gen_lowpart (V4SFmode, op1);
17202 emit_insn (gen_sse_loadups (op0, op1));
17203 if (orig_op0)
17204 emit_move_insn (orig_op0,
17205 gen_lowpart (GET_MODE (orig_op0), op0));
17206 return;
17207 }
17208
17209 if (mode != V4SFmode)
17210 t = gen_reg_rtx (V4SFmode);
17211 else
17212 t = op0;
17213
17214 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17215 emit_move_insn (t, CONST0_RTX (V4SFmode));
17216 else
17217 emit_clobber (t);
17218
17219 m = adjust_address (op1, V2SFmode, 0);
17220 emit_insn (gen_sse_loadlps (t, t, m));
17221 m = adjust_address (op1, V2SFmode, 8);
17222 emit_insn (gen_sse_loadhps (t, t, m));
17223 if (mode != V4SFmode)
17224 emit_move_insn (op0, gen_lowpart (mode, t));
17225 }
17226 }
17227 else if (MEM_P (op0))
17228 {
17229 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17230 {
17231 op0 = gen_lowpart (V16QImode, op0);
17232 op1 = gen_lowpart (V16QImode, op1);
17233 /* We will eventually emit movups based on insn attributes. */
17234 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17235 }
17236 else if (TARGET_SSE2 && mode == V2DFmode)
17237 {
17238 if (TARGET_AVX
17239 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17240 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17241 || optimize_insn_for_size_p ())
17242 /* We will eventually emit movups based on insn attributes. */
17243 emit_insn (gen_sse2_storeupd (op0, op1));
17244 else
17245 {
17246 m = adjust_address (op0, DFmode, 0);
17247 emit_insn (gen_sse2_storelpd (m, op1));
17248 m = adjust_address (op0, DFmode, 8);
17249 emit_insn (gen_sse2_storehpd (m, op1));
17250 }
17251 }
17252 else
17253 {
17254 if (mode != V4SFmode)
17255 op1 = gen_lowpart (V4SFmode, op1);
17256
17257 if (TARGET_AVX
17258 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17259 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17260 || optimize_insn_for_size_p ())
17261 {
17262 op0 = gen_lowpart (V4SFmode, op0);
17263 emit_insn (gen_sse_storeups (op0, op1));
17264 }
17265 else
17266 {
17267 m = adjust_address (op0, V2SFmode, 0);
17268 emit_insn (gen_sse_storelps (m, op1));
17269 m = adjust_address (op0, V2SFmode, 8);
17270 emit_insn (gen_sse_storehps (m, op1));
17271 }
17272 }
17273 }
17274 else
17275 gcc_unreachable ();
17276 }
17277
17278 /* Helper function of ix86_fixup_binary_operands to canonicalize
17279 operand order. Returns true if the operands should be swapped. */
17280
17281 static bool
17282 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
17283 rtx operands[])
17284 {
17285 rtx dst = operands[0];
17286 rtx src1 = operands[1];
17287 rtx src2 = operands[2];
17288
17289 /* If the operation is not commutative, we can't do anything. */
17290 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17291 return false;
17292
17293 /* Highest priority is that src1 should match dst. */
17294 if (rtx_equal_p (dst, src1))
17295 return false;
17296 if (rtx_equal_p (dst, src2))
17297 return true;
17298
17299 /* Next highest priority is that immediate constants come second. */
17300 if (immediate_operand (src2, mode))
17301 return false;
17302 if (immediate_operand (src1, mode))
17303 return true;
17304
17305 /* Lowest priority is that memory references should come second. */
17306 if (MEM_P (src2))
17307 return false;
17308 if (MEM_P (src1))
17309 return true;
17310
17311 return false;
17312 }
17313
17314
17315 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17316 destination to use for the operation. If different from the true
17317 destination in operands[0], a copy operation will be required. */
17318
17319 rtx
17320 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
17321 rtx operands[])
17322 {
17323 rtx dst = operands[0];
17324 rtx src1 = operands[1];
17325 rtx src2 = operands[2];
17326
17327 /* Canonicalize operand order. */
17328 if (ix86_swap_binary_operands_p (code, mode, operands))
17329 {
17330 rtx temp;
17331
17332 /* It is invalid to swap operands of different modes. */
17333 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17334
17335 temp = src1;
17336 src1 = src2;
17337 src2 = temp;
17338 }
17339
17340 /* Both source operands cannot be in memory. */
17341 if (MEM_P (src1) && MEM_P (src2))
17342 {
17343 /* Optimization: Only read from memory once. */
17344 if (rtx_equal_p (src1, src2))
17345 {
17346 src2 = force_reg (mode, src2);
17347 src1 = src2;
17348 }
17349 else if (rtx_equal_p (dst, src1))
17350 src2 = force_reg (mode, src2);
17351 else
17352 src1 = force_reg (mode, src1);
17353 }
17354
17355 /* If the destination is memory, and we do not have matching source
17356 operands, do things in registers. */
17357 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17358 dst = gen_reg_rtx (mode);
17359
17360 /* Source 1 cannot be a constant. */
17361 if (CONSTANT_P (src1))
17362 src1 = force_reg (mode, src1);
17363
17364 /* Source 1 cannot be a non-matching memory. */
17365 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17366 src1 = force_reg (mode, src1);
17367
17368 /* Improve address combine. */
17369 if (code == PLUS
17370 && GET_MODE_CLASS (mode) == MODE_INT
17371 && MEM_P (src2))
17372 src2 = force_reg (mode, src2);
17373
17374 operands[1] = src1;
17375 operands[2] = src2;
17376 return dst;
17377 }
17378
17379 /* Similarly, but assume that the destination has already been
17380 set up properly. */
17381
17382 void
17383 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17384 enum machine_mode mode, rtx operands[])
17385 {
17386 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
17387 gcc_assert (dst == operands[0]);
17388 }
17389
17390 /* Attempt to expand a binary operator. Make the expansion closer to the
17391 actual machine, then just general_operand, which will allow 3 separate
17392 memory references (one output, two input) in a single insn. */
17393
17394 void
17395 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
17396 rtx operands[])
17397 {
17398 rtx src1, src2, dst, op, clob;
17399
17400 dst = ix86_fixup_binary_operands (code, mode, operands);
17401 src1 = operands[1];
17402 src2 = operands[2];
17403
17404 /* Emit the instruction. */
17405
17406 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
17407 if (reload_in_progress)
17408 {
17409 /* Reload doesn't know about the flags register, and doesn't know that
17410 it doesn't want to clobber it. We can only do this with PLUS. */
17411 gcc_assert (code == PLUS);
17412 emit_insn (op);
17413 }
17414 else if (reload_completed
17415 && code == PLUS
17416 && !rtx_equal_p (dst, src1))
17417 {
17418 /* This is going to be an LEA; avoid splitting it later. */
17419 emit_insn (op);
17420 }
17421 else
17422 {
17423 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17424 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17425 }
17426
17427 /* Fix up the destination if needed. */
17428 if (dst != operands[0])
17429 emit_move_insn (operands[0], dst);
17430 }
17431
17432 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17433 the given OPERANDS. */
17434
17435 void
17436 ix86_expand_vector_logical_operator (enum rtx_code code, enum machine_mode mode,
17437 rtx operands[])
17438 {
17439 rtx op1 = NULL_RTX, op2 = NULL_RTX;
17440 if (GET_CODE (operands[1]) == SUBREG)
17441 {
17442 op1 = operands[1];
17443 op2 = operands[2];
17444 }
17445 else if (GET_CODE (operands[2]) == SUBREG)
17446 {
17447 op1 = operands[2];
17448 op2 = operands[1];
17449 }
17450 /* Optimize (__m128i) d | (__m128i) e and similar code
17451 when d and e are float vectors into float vector logical
17452 insn. In C/C++ without using intrinsics there is no other way
17453 to express vector logical operation on float vectors than
17454 to cast them temporarily to integer vectors. */
17455 if (op1
17456 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17457 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
17458 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
17459 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
17460 && SUBREG_BYTE (op1) == 0
17461 && (GET_CODE (op2) == CONST_VECTOR
17462 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
17463 && SUBREG_BYTE (op2) == 0))
17464 && can_create_pseudo_p ())
17465 {
17466 rtx dst;
17467 switch (GET_MODE (SUBREG_REG (op1)))
17468 {
17469 case V4SFmode:
17470 case V8SFmode:
17471 case V2DFmode:
17472 case V4DFmode:
17473 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
17474 if (GET_CODE (op2) == CONST_VECTOR)
17475 {
17476 op2 = gen_lowpart (GET_MODE (dst), op2);
17477 op2 = force_reg (GET_MODE (dst), op2);
17478 }
17479 else
17480 {
17481 op1 = operands[1];
17482 op2 = SUBREG_REG (operands[2]);
17483 if (!nonimmediate_operand (op2, GET_MODE (dst)))
17484 op2 = force_reg (GET_MODE (dst), op2);
17485 }
17486 op1 = SUBREG_REG (op1);
17487 if (!nonimmediate_operand (op1, GET_MODE (dst)))
17488 op1 = force_reg (GET_MODE (dst), op1);
17489 emit_insn (gen_rtx_SET (VOIDmode, dst,
17490 gen_rtx_fmt_ee (code, GET_MODE (dst),
17491 op1, op2)));
17492 emit_move_insn (operands[0], gen_lowpart (mode, dst));
17493 return;
17494 default:
17495 break;
17496 }
17497 }
17498 if (!nonimmediate_operand (operands[1], mode))
17499 operands[1] = force_reg (mode, operands[1]);
17500 if (!nonimmediate_operand (operands[2], mode))
17501 operands[2] = force_reg (mode, operands[2]);
17502 ix86_fixup_binary_operands_no_copy (code, mode, operands);
17503 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17504 gen_rtx_fmt_ee (code, mode, operands[1],
17505 operands[2])));
17506 }
17507
17508 /* Return TRUE or FALSE depending on whether the binary operator meets the
17509 appropriate constraints. */
17510
17511 bool
17512 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
17513 rtx operands[3])
17514 {
17515 rtx dst = operands[0];
17516 rtx src1 = operands[1];
17517 rtx src2 = operands[2];
17518
17519 /* Both source operands cannot be in memory. */
17520 if (MEM_P (src1) && MEM_P (src2))
17521 return false;
17522
17523 /* Canonicalize operand order for commutative operators. */
17524 if (ix86_swap_binary_operands_p (code, mode, operands))
17525 {
17526 rtx temp = src1;
17527 src1 = src2;
17528 src2 = temp;
17529 }
17530
17531 /* If the destination is memory, we must have a matching source operand. */
17532 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17533 return false;
17534
17535 /* Source 1 cannot be a constant. */
17536 if (CONSTANT_P (src1))
17537 return false;
17538
17539 /* Source 1 cannot be a non-matching memory. */
17540 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17541 /* Support "andhi/andsi/anddi" as a zero-extending move. */
17542 return (code == AND
17543 && (mode == HImode
17544 || mode == SImode
17545 || (TARGET_64BIT && mode == DImode))
17546 && satisfies_constraint_L (src2));
17547
17548 return true;
17549 }
17550
17551 /* Attempt to expand a unary operator. Make the expansion closer to the
17552 actual machine, then just general_operand, which will allow 2 separate
17553 memory references (one output, one input) in a single insn. */
17554
17555 void
17556 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
17557 rtx operands[])
17558 {
17559 int matching_memory;
17560 rtx src, dst, op, clob;
17561
17562 dst = operands[0];
17563 src = operands[1];
17564
17565 /* If the destination is memory, and we do not have matching source
17566 operands, do things in registers. */
17567 matching_memory = 0;
17568 if (MEM_P (dst))
17569 {
17570 if (rtx_equal_p (dst, src))
17571 matching_memory = 1;
17572 else
17573 dst = gen_reg_rtx (mode);
17574 }
17575
17576 /* When source operand is memory, destination must match. */
17577 if (MEM_P (src) && !matching_memory)
17578 src = force_reg (mode, src);
17579
17580 /* Emit the instruction. */
17581
17582 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
17583 if (reload_in_progress || code == NOT)
17584 {
17585 /* Reload doesn't know about the flags register, and doesn't know that
17586 it doesn't want to clobber it. */
17587 gcc_assert (code == NOT);
17588 emit_insn (op);
17589 }
17590 else
17591 {
17592 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17593 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17594 }
17595
17596 /* Fix up the destination if needed. */
17597 if (dst != operands[0])
17598 emit_move_insn (operands[0], dst);
17599 }
17600
17601 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
17602 divisor are within the range [0-255]. */
17603
17604 void
17605 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
17606 bool signed_p)
17607 {
17608 rtx end_label, qimode_label;
17609 rtx insn, div, mod;
17610 rtx scratch, tmp0, tmp1, tmp2;
17611 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
17612 rtx (*gen_zero_extend) (rtx, rtx);
17613 rtx (*gen_test_ccno_1) (rtx, rtx);
17614
17615 switch (mode)
17616 {
17617 case SImode:
17618 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
17619 gen_test_ccno_1 = gen_testsi_ccno_1;
17620 gen_zero_extend = gen_zero_extendqisi2;
17621 break;
17622 case DImode:
17623 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
17624 gen_test_ccno_1 = gen_testdi_ccno_1;
17625 gen_zero_extend = gen_zero_extendqidi2;
17626 break;
17627 default:
17628 gcc_unreachable ();
17629 }
17630
17631 end_label = gen_label_rtx ();
17632 qimode_label = gen_label_rtx ();
17633
17634 scratch = gen_reg_rtx (mode);
17635
17636 /* Use 8bit unsigned divimod if dividend and divisor are within
17637 the range [0-255]. */
17638 emit_move_insn (scratch, operands[2]);
17639 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
17640 scratch, 1, OPTAB_DIRECT);
17641 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
17642 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
17643 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
17644 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
17645 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
17646 pc_rtx);
17647 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
17648 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17649 JUMP_LABEL (insn) = qimode_label;
17650
17651 /* Generate original signed/unsigned divimod. */
17652 div = gen_divmod4_1 (operands[0], operands[1],
17653 operands[2], operands[3]);
17654 emit_insn (div);
17655
17656 /* Branch to the end. */
17657 emit_jump_insn (gen_jump (end_label));
17658 emit_barrier ();
17659
17660 /* Generate 8bit unsigned divide. */
17661 emit_label (qimode_label);
17662 /* Don't use operands[0] for result of 8bit divide since not all
17663 registers support QImode ZERO_EXTRACT. */
17664 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
17665 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
17666 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
17667 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
17668
17669 if (signed_p)
17670 {
17671 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
17672 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
17673 }
17674 else
17675 {
17676 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
17677 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
17678 }
17679
17680 /* Extract remainder from AH. */
17681 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
17682 if (REG_P (operands[1]))
17683 insn = emit_move_insn (operands[1], tmp1);
17684 else
17685 {
17686 /* Need a new scratch register since the old one has result
17687 of 8bit divide. */
17688 scratch = gen_reg_rtx (mode);
17689 emit_move_insn (scratch, tmp1);
17690 insn = emit_move_insn (operands[1], scratch);
17691 }
17692 set_unique_reg_note (insn, REG_EQUAL, mod);
17693
17694 /* Zero extend quotient from AL. */
17695 tmp1 = gen_lowpart (QImode, tmp0);
17696 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
17697 set_unique_reg_note (insn, REG_EQUAL, div);
17698
17699 emit_label (end_label);
17700 }
17701
17702 /* Whether it is OK to emit CFI directives when emitting asm code. */
17703
17704 bool
17705 ix86_emit_cfi ()
17706 {
17707 return dwarf2out_do_cfi_asm ();
17708 }
17709
17710 #define LEA_MAX_STALL (3)
17711 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
17712
17713 /* Increase given DISTANCE in half-cycles according to
17714 dependencies between PREV and NEXT instructions.
17715 Add 1 half-cycle if there is no dependency and
17716 go to next cycle if there is some dependecy. */
17717
17718 static unsigned int
17719 increase_distance (rtx prev, rtx next, unsigned int distance)
17720 {
17721 df_ref *use_rec;
17722 df_ref *def_rec;
17723
17724 if (!prev || !next)
17725 return distance + (distance & 1) + 2;
17726
17727 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
17728 return distance + 1;
17729
17730 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
17731 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
17732 if (!DF_REF_IS_ARTIFICIAL (*def_rec)
17733 && DF_REF_REGNO (*use_rec) == DF_REF_REGNO (*def_rec))
17734 return distance + (distance & 1) + 2;
17735
17736 return distance + 1;
17737 }
17738
17739 /* Function checks if instruction INSN defines register number
17740 REGNO1 or REGNO2. */
17741
17742 static bool
17743 insn_defines_reg (unsigned int regno1, unsigned int regno2,
17744 rtx insn)
17745 {
17746 df_ref *def_rec;
17747
17748 for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
17749 if (DF_REF_REG_DEF_P (*def_rec)
17750 && !DF_REF_IS_ARTIFICIAL (*def_rec)
17751 && (regno1 == DF_REF_REGNO (*def_rec)
17752 || regno2 == DF_REF_REGNO (*def_rec)))
17753 {
17754 return true;
17755 }
17756
17757 return false;
17758 }
17759
17760 /* Function checks if instruction INSN uses register number
17761 REGNO as a part of address expression. */
17762
17763 static bool
17764 insn_uses_reg_mem (unsigned int regno, rtx insn)
17765 {
17766 df_ref *use_rec;
17767
17768 for (use_rec = DF_INSN_USES (insn); *use_rec; use_rec++)
17769 if (DF_REF_REG_MEM_P (*use_rec) && regno == DF_REF_REGNO (*use_rec))
17770 return true;
17771
17772 return false;
17773 }
17774
17775 /* Search backward for non-agu definition of register number REGNO1
17776 or register number REGNO2 in basic block starting from instruction
17777 START up to head of basic block or instruction INSN.
17778
17779 Function puts true value into *FOUND var if definition was found
17780 and false otherwise.
17781
17782 Distance in half-cycles between START and found instruction or head
17783 of BB is added to DISTANCE and returned. */
17784
17785 static int
17786 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
17787 rtx insn, int distance,
17788 rtx start, bool *found)
17789 {
17790 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
17791 rtx prev = start;
17792 rtx next = NULL;
17793
17794 *found = false;
17795
17796 while (prev
17797 && prev != insn
17798 && distance < LEA_SEARCH_THRESHOLD)
17799 {
17800 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
17801 {
17802 distance = increase_distance (prev, next, distance);
17803 if (insn_defines_reg (regno1, regno2, prev))
17804 {
17805 if (recog_memoized (prev) < 0
17806 || get_attr_type (prev) != TYPE_LEA)
17807 {
17808 *found = true;
17809 return distance;
17810 }
17811 }
17812
17813 next = prev;
17814 }
17815 if (prev == BB_HEAD (bb))
17816 break;
17817
17818 prev = PREV_INSN (prev);
17819 }
17820
17821 return distance;
17822 }
17823
17824 /* Search backward for non-agu definition of register number REGNO1
17825 or register number REGNO2 in INSN's basic block until
17826 1. Pass LEA_SEARCH_THRESHOLD instructions, or
17827 2. Reach neighbour BBs boundary, or
17828 3. Reach agu definition.
17829 Returns the distance between the non-agu definition point and INSN.
17830 If no definition point, returns -1. */
17831
17832 static int
17833 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
17834 rtx insn)
17835 {
17836 basic_block bb = BLOCK_FOR_INSN (insn);
17837 int distance = 0;
17838 bool found = false;
17839
17840 if (insn != BB_HEAD (bb))
17841 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
17842 distance, PREV_INSN (insn),
17843 &found);
17844
17845 if (!found && distance < LEA_SEARCH_THRESHOLD)
17846 {
17847 edge e;
17848 edge_iterator ei;
17849 bool simple_loop = false;
17850
17851 FOR_EACH_EDGE (e, ei, bb->preds)
17852 if (e->src == bb)
17853 {
17854 simple_loop = true;
17855 break;
17856 }
17857
17858 if (simple_loop)
17859 distance = distance_non_agu_define_in_bb (regno1, regno2,
17860 insn, distance,
17861 BB_END (bb), &found);
17862 else
17863 {
17864 int shortest_dist = -1;
17865 bool found_in_bb = false;
17866
17867 FOR_EACH_EDGE (e, ei, bb->preds)
17868 {
17869 int bb_dist
17870 = distance_non_agu_define_in_bb (regno1, regno2,
17871 insn, distance,
17872 BB_END (e->src),
17873 &found_in_bb);
17874 if (found_in_bb)
17875 {
17876 if (shortest_dist < 0)
17877 shortest_dist = bb_dist;
17878 else if (bb_dist > 0)
17879 shortest_dist = MIN (bb_dist, shortest_dist);
17880
17881 found = true;
17882 }
17883 }
17884
17885 distance = shortest_dist;
17886 }
17887 }
17888
17889 /* get_attr_type may modify recog data. We want to make sure
17890 that recog data is valid for instruction INSN, on which
17891 distance_non_agu_define is called. INSN is unchanged here. */
17892 extract_insn_cached (insn);
17893
17894 if (!found)
17895 return -1;
17896
17897 return distance >> 1;
17898 }
17899
17900 /* Return the distance in half-cycles between INSN and the next
17901 insn that uses register number REGNO in memory address added
17902 to DISTANCE. Return -1 if REGNO0 is set.
17903
17904 Put true value into *FOUND if register usage was found and
17905 false otherwise.
17906 Put true value into *REDEFINED if register redefinition was
17907 found and false otherwise. */
17908
17909 static int
17910 distance_agu_use_in_bb (unsigned int regno,
17911 rtx insn, int distance, rtx start,
17912 bool *found, bool *redefined)
17913 {
17914 basic_block bb = NULL;
17915 rtx next = start;
17916 rtx prev = NULL;
17917
17918 *found = false;
17919 *redefined = false;
17920
17921 if (start != NULL_RTX)
17922 {
17923 bb = BLOCK_FOR_INSN (start);
17924 if (start != BB_HEAD (bb))
17925 /* If insn and start belong to the same bb, set prev to insn,
17926 so the call to increase_distance will increase the distance
17927 between insns by 1. */
17928 prev = insn;
17929 }
17930
17931 while (next
17932 && next != insn
17933 && distance < LEA_SEARCH_THRESHOLD)
17934 {
17935 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
17936 {
17937 distance = increase_distance(prev, next, distance);
17938 if (insn_uses_reg_mem (regno, next))
17939 {
17940 /* Return DISTANCE if OP0 is used in memory
17941 address in NEXT. */
17942 *found = true;
17943 return distance;
17944 }
17945
17946 if (insn_defines_reg (regno, INVALID_REGNUM, next))
17947 {
17948 /* Return -1 if OP0 is set in NEXT. */
17949 *redefined = true;
17950 return -1;
17951 }
17952
17953 prev = next;
17954 }
17955
17956 if (next == BB_END (bb))
17957 break;
17958
17959 next = NEXT_INSN (next);
17960 }
17961
17962 return distance;
17963 }
17964
17965 /* Return the distance between INSN and the next insn that uses
17966 register number REGNO0 in memory address. Return -1 if no such
17967 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
17968
17969 static int
17970 distance_agu_use (unsigned int regno0, rtx insn)
17971 {
17972 basic_block bb = BLOCK_FOR_INSN (insn);
17973 int distance = 0;
17974 bool found = false;
17975 bool redefined = false;
17976
17977 if (insn != BB_END (bb))
17978 distance = distance_agu_use_in_bb (regno0, insn, distance,
17979 NEXT_INSN (insn),
17980 &found, &redefined);
17981
17982 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
17983 {
17984 edge e;
17985 edge_iterator ei;
17986 bool simple_loop = false;
17987
17988 FOR_EACH_EDGE (e, ei, bb->succs)
17989 if (e->dest == bb)
17990 {
17991 simple_loop = true;
17992 break;
17993 }
17994
17995 if (simple_loop)
17996 distance = distance_agu_use_in_bb (regno0, insn,
17997 distance, BB_HEAD (bb),
17998 &found, &redefined);
17999 else
18000 {
18001 int shortest_dist = -1;
18002 bool found_in_bb = false;
18003 bool redefined_in_bb = false;
18004
18005 FOR_EACH_EDGE (e, ei, bb->succs)
18006 {
18007 int bb_dist
18008 = distance_agu_use_in_bb (regno0, insn,
18009 distance, BB_HEAD (e->dest),
18010 &found_in_bb, &redefined_in_bb);
18011 if (found_in_bb)
18012 {
18013 if (shortest_dist < 0)
18014 shortest_dist = bb_dist;
18015 else if (bb_dist > 0)
18016 shortest_dist = MIN (bb_dist, shortest_dist);
18017
18018 found = true;
18019 }
18020 }
18021
18022 distance = shortest_dist;
18023 }
18024 }
18025
18026 if (!found || redefined)
18027 return -1;
18028
18029 return distance >> 1;
18030 }
18031
18032 /* Define this macro to tune LEA priority vs ADD, it take effect when
18033 there is a dilemma of choicing LEA or ADD
18034 Negative value: ADD is more preferred than LEA
18035 Zero: Netrual
18036 Positive value: LEA is more preferred than ADD*/
18037 #define IX86_LEA_PRIORITY 0
18038
18039 /* Return true if usage of lea INSN has performance advantage
18040 over a sequence of instructions. Instructions sequence has
18041 SPLIT_COST cycles higher latency than lea latency. */
18042
18043 static bool
18044 ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
18045 unsigned int regno2, int split_cost, bool has_scale)
18046 {
18047 int dist_define, dist_use;
18048
18049 /* For Silvermont if using a 2-source or 3-source LEA for
18050 non-destructive destination purposes, or due to wanting
18051 ability to use SCALE, the use of LEA is justified. */
18052 if (TARGET_SILVERMONT || TARGET_INTEL)
18053 {
18054 if (has_scale)
18055 return true;
18056 if (split_cost < 1)
18057 return false;
18058 if (regno0 == regno1 || regno0 == regno2)
18059 return false;
18060 return true;
18061 }
18062
18063 dist_define = distance_non_agu_define (regno1, regno2, insn);
18064 dist_use = distance_agu_use (regno0, insn);
18065
18066 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18067 {
18068 /* If there is no non AGU operand definition, no AGU
18069 operand usage and split cost is 0 then both lea
18070 and non lea variants have same priority. Currently
18071 we prefer lea for 64 bit code and non lea on 32 bit
18072 code. */
18073 if (dist_use < 0 && split_cost == 0)
18074 return TARGET_64BIT || IX86_LEA_PRIORITY;
18075 else
18076 return true;
18077 }
18078
18079 /* With longer definitions distance lea is more preferable.
18080 Here we change it to take into account splitting cost and
18081 lea priority. */
18082 dist_define += split_cost + IX86_LEA_PRIORITY;
18083
18084 /* If there is no use in memory addess then we just check
18085 that split cost exceeds AGU stall. */
18086 if (dist_use < 0)
18087 return dist_define > LEA_MAX_STALL;
18088
18089 /* If this insn has both backward non-agu dependence and forward
18090 agu dependence, the one with short distance takes effect. */
18091 return dist_define >= dist_use;
18092 }
18093
18094 /* Return true if it is legal to clobber flags by INSN and
18095 false otherwise. */
18096
18097 static bool
18098 ix86_ok_to_clobber_flags (rtx insn)
18099 {
18100 basic_block bb = BLOCK_FOR_INSN (insn);
18101 df_ref *use;
18102 bitmap live;
18103
18104 while (insn)
18105 {
18106 if (NONDEBUG_INSN_P (insn))
18107 {
18108 for (use = DF_INSN_USES (insn); *use; use++)
18109 if (DF_REF_REG_USE_P (*use) && DF_REF_REGNO (*use) == FLAGS_REG)
18110 return false;
18111
18112 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18113 return true;
18114 }
18115
18116 if (insn == BB_END (bb))
18117 break;
18118
18119 insn = NEXT_INSN (insn);
18120 }
18121
18122 live = df_get_live_out(bb);
18123 return !REGNO_REG_SET_P (live, FLAGS_REG);
18124 }
18125
18126 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18127 move and add to avoid AGU stalls. */
18128
18129 bool
18130 ix86_avoid_lea_for_add (rtx insn, rtx operands[])
18131 {
18132 unsigned int regno0, regno1, regno2;
18133
18134 /* Check if we need to optimize. */
18135 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18136 return false;
18137
18138 /* Check it is correct to split here. */
18139 if (!ix86_ok_to_clobber_flags(insn))
18140 return false;
18141
18142 regno0 = true_regnum (operands[0]);
18143 regno1 = true_regnum (operands[1]);
18144 regno2 = true_regnum (operands[2]);
18145
18146 /* We need to split only adds with non destructive
18147 destination operand. */
18148 if (regno0 == regno1 || regno0 == regno2)
18149 return false;
18150 else
18151 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18152 }
18153
18154 /* Return true if we should emit lea instruction instead of mov
18155 instruction. */
18156
18157 bool
18158 ix86_use_lea_for_mov (rtx insn, rtx operands[])
18159 {
18160 unsigned int regno0, regno1;
18161
18162 /* Check if we need to optimize. */
18163 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18164 return false;
18165
18166 /* Use lea for reg to reg moves only. */
18167 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18168 return false;
18169
18170 regno0 = true_regnum (operands[0]);
18171 regno1 = true_regnum (operands[1]);
18172
18173 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18174 }
18175
18176 /* Return true if we need to split lea into a sequence of
18177 instructions to avoid AGU stalls. */
18178
18179 bool
18180 ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
18181 {
18182 unsigned int regno0, regno1, regno2;
18183 int split_cost;
18184 struct ix86_address parts;
18185 int ok;
18186
18187 /* Check we need to optimize. */
18188 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18189 return false;
18190
18191 /* The "at least two components" test below might not catch simple
18192 move or zero extension insns if parts.base is non-NULL and parts.disp
18193 is const0_rtx as the only components in the address, e.g. if the
18194 register is %rbp or %r13. As this test is much cheaper and moves or
18195 zero extensions are the common case, do this check first. */
18196 if (REG_P (operands[1])
18197 || (SImode_address_operand (operands[1], VOIDmode)
18198 && REG_P (XEXP (operands[1], 0))))
18199 return false;
18200
18201 /* Check if it is OK to split here. */
18202 if (!ix86_ok_to_clobber_flags (insn))
18203 return false;
18204
18205 ok = ix86_decompose_address (operands[1], &parts);
18206 gcc_assert (ok);
18207
18208 /* There should be at least two components in the address. */
18209 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18210 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18211 return false;
18212
18213 /* We should not split into add if non legitimate pic
18214 operand is used as displacement. */
18215 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18216 return false;
18217
18218 regno0 = true_regnum (operands[0]) ;
18219 regno1 = INVALID_REGNUM;
18220 regno2 = INVALID_REGNUM;
18221
18222 if (parts.base)
18223 regno1 = true_regnum (parts.base);
18224 if (parts.index)
18225 regno2 = true_regnum (parts.index);
18226
18227 split_cost = 0;
18228
18229 /* Compute how many cycles we will add to execution time
18230 if split lea into a sequence of instructions. */
18231 if (parts.base || parts.index)
18232 {
18233 /* Have to use mov instruction if non desctructive
18234 destination form is used. */
18235 if (regno1 != regno0 && regno2 != regno0)
18236 split_cost += 1;
18237
18238 /* Have to add index to base if both exist. */
18239 if (parts.base && parts.index)
18240 split_cost += 1;
18241
18242 /* Have to use shift and adds if scale is 2 or greater. */
18243 if (parts.scale > 1)
18244 {
18245 if (regno0 != regno1)
18246 split_cost += 1;
18247 else if (regno2 == regno0)
18248 split_cost += 4;
18249 else
18250 split_cost += parts.scale;
18251 }
18252
18253 /* Have to use add instruction with immediate if
18254 disp is non zero. */
18255 if (parts.disp && parts.disp != const0_rtx)
18256 split_cost += 1;
18257
18258 /* Subtract the price of lea. */
18259 split_cost -= 1;
18260 }
18261
18262 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18263 parts.scale > 1);
18264 }
18265
18266 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18267 matches destination. RTX includes clobber of FLAGS_REG. */
18268
18269 static void
18270 ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
18271 rtx dst, rtx src)
18272 {
18273 rtx op, clob;
18274
18275 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18276 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18277
18278 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18279 }
18280
18281 /* Return true if regno1 def is nearest to the insn. */
18282
18283 static bool
18284 find_nearest_reg_def (rtx insn, int regno1, int regno2)
18285 {
18286 rtx prev = insn;
18287 rtx start = BB_HEAD (BLOCK_FOR_INSN (insn));
18288
18289 if (insn == start)
18290 return false;
18291 while (prev && prev != start)
18292 {
18293 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18294 {
18295 prev = PREV_INSN (prev);
18296 continue;
18297 }
18298 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18299 return true;
18300 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18301 return false;
18302 prev = PREV_INSN (prev);
18303 }
18304
18305 /* None of the regs is defined in the bb. */
18306 return false;
18307 }
18308
18309 /* Split lea instructions into a sequence of instructions
18310 which are executed on ALU to avoid AGU stalls.
18311 It is assumed that it is allowed to clobber flags register
18312 at lea position. */
18313
18314 void
18315 ix86_split_lea_for_addr (rtx insn, rtx operands[], enum machine_mode mode)
18316 {
18317 unsigned int regno0, regno1, regno2;
18318 struct ix86_address parts;
18319 rtx target, tmp;
18320 int ok, adds;
18321
18322 ok = ix86_decompose_address (operands[1], &parts);
18323 gcc_assert (ok);
18324
18325 target = gen_lowpart (mode, operands[0]);
18326
18327 regno0 = true_regnum (target);
18328 regno1 = INVALID_REGNUM;
18329 regno2 = INVALID_REGNUM;
18330
18331 if (parts.base)
18332 {
18333 parts.base = gen_lowpart (mode, parts.base);
18334 regno1 = true_regnum (parts.base);
18335 }
18336
18337 if (parts.index)
18338 {
18339 parts.index = gen_lowpart (mode, parts.index);
18340 regno2 = true_regnum (parts.index);
18341 }
18342
18343 if (parts.disp)
18344 parts.disp = gen_lowpart (mode, parts.disp);
18345
18346 if (parts.scale > 1)
18347 {
18348 /* Case r1 = r1 + ... */
18349 if (regno1 == regno0)
18350 {
18351 /* If we have a case r1 = r1 + C * r2 then we
18352 should use multiplication which is very
18353 expensive. Assume cost model is wrong if we
18354 have such case here. */
18355 gcc_assert (regno2 != regno0);
18356
18357 for (adds = parts.scale; adds > 0; adds--)
18358 ix86_emit_binop (PLUS, mode, target, parts.index);
18359 }
18360 else
18361 {
18362 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18363 if (regno0 != regno2)
18364 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18365
18366 /* Use shift for scaling. */
18367 ix86_emit_binop (ASHIFT, mode, target,
18368 GEN_INT (exact_log2 (parts.scale)));
18369
18370 if (parts.base)
18371 ix86_emit_binop (PLUS, mode, target, parts.base);
18372
18373 if (parts.disp && parts.disp != const0_rtx)
18374 ix86_emit_binop (PLUS, mode, target, parts.disp);
18375 }
18376 }
18377 else if (!parts.base && !parts.index)
18378 {
18379 gcc_assert(parts.disp);
18380 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18381 }
18382 else
18383 {
18384 if (!parts.base)
18385 {
18386 if (regno0 != regno2)
18387 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18388 }
18389 else if (!parts.index)
18390 {
18391 if (regno0 != regno1)
18392 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
18393 }
18394 else
18395 {
18396 if (regno0 == regno1)
18397 tmp = parts.index;
18398 else if (regno0 == regno2)
18399 tmp = parts.base;
18400 else
18401 {
18402 rtx tmp1;
18403
18404 /* Find better operand for SET instruction, depending
18405 on which definition is farther from the insn. */
18406 if (find_nearest_reg_def (insn, regno1, regno2))
18407 tmp = parts.index, tmp1 = parts.base;
18408 else
18409 tmp = parts.base, tmp1 = parts.index;
18410
18411 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18412
18413 if (parts.disp && parts.disp != const0_rtx)
18414 ix86_emit_binop (PLUS, mode, target, parts.disp);
18415
18416 ix86_emit_binop (PLUS, mode, target, tmp1);
18417 return;
18418 }
18419
18420 ix86_emit_binop (PLUS, mode, target, tmp);
18421 }
18422
18423 if (parts.disp && parts.disp != const0_rtx)
18424 ix86_emit_binop (PLUS, mode, target, parts.disp);
18425 }
18426 }
18427
18428 /* Return true if it is ok to optimize an ADD operation to LEA
18429 operation to avoid flag register consumation. For most processors,
18430 ADD is faster than LEA. For the processors like BONNELL, if the
18431 destination register of LEA holds an actual address which will be
18432 used soon, LEA is better and otherwise ADD is better. */
18433
18434 bool
18435 ix86_lea_for_add_ok (rtx insn, rtx operands[])
18436 {
18437 unsigned int regno0 = true_regnum (operands[0]);
18438 unsigned int regno1 = true_regnum (operands[1]);
18439 unsigned int regno2 = true_regnum (operands[2]);
18440
18441 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18442 if (regno0 != regno1 && regno0 != regno2)
18443 return true;
18444
18445 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18446 return false;
18447
18448 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
18449 }
18450
18451 /* Return true if destination reg of SET_BODY is shift count of
18452 USE_BODY. */
18453
18454 static bool
18455 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
18456 {
18457 rtx set_dest;
18458 rtx shift_rtx;
18459 int i;
18460
18461 /* Retrieve destination of SET_BODY. */
18462 switch (GET_CODE (set_body))
18463 {
18464 case SET:
18465 set_dest = SET_DEST (set_body);
18466 if (!set_dest || !REG_P (set_dest))
18467 return false;
18468 break;
18469 case PARALLEL:
18470 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
18471 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
18472 use_body))
18473 return true;
18474 default:
18475 return false;
18476 break;
18477 }
18478
18479 /* Retrieve shift count of USE_BODY. */
18480 switch (GET_CODE (use_body))
18481 {
18482 case SET:
18483 shift_rtx = XEXP (use_body, 1);
18484 break;
18485 case PARALLEL:
18486 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
18487 if (ix86_dep_by_shift_count_body (set_body,
18488 XVECEXP (use_body, 0, i)))
18489 return true;
18490 default:
18491 return false;
18492 break;
18493 }
18494
18495 if (shift_rtx
18496 && (GET_CODE (shift_rtx) == ASHIFT
18497 || GET_CODE (shift_rtx) == LSHIFTRT
18498 || GET_CODE (shift_rtx) == ASHIFTRT
18499 || GET_CODE (shift_rtx) == ROTATE
18500 || GET_CODE (shift_rtx) == ROTATERT))
18501 {
18502 rtx shift_count = XEXP (shift_rtx, 1);
18503
18504 /* Return true if shift count is dest of SET_BODY. */
18505 if (REG_P (shift_count))
18506 {
18507 /* Add check since it can be invoked before register
18508 allocation in pre-reload schedule. */
18509 if (reload_completed
18510 && true_regnum (set_dest) == true_regnum (shift_count))
18511 return true;
18512 else if (REGNO(set_dest) == REGNO(shift_count))
18513 return true;
18514 }
18515 }
18516
18517 return false;
18518 }
18519
18520 /* Return true if destination reg of SET_INSN is shift count of
18521 USE_INSN. */
18522
18523 bool
18524 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
18525 {
18526 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
18527 PATTERN (use_insn));
18528 }
18529
18530 /* Return TRUE or FALSE depending on whether the unary operator meets the
18531 appropriate constraints. */
18532
18533 bool
18534 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
18535 enum machine_mode mode ATTRIBUTE_UNUSED,
18536 rtx operands[2])
18537 {
18538 /* If one of operands is memory, source and destination must match. */
18539 if ((MEM_P (operands[0])
18540 || MEM_P (operands[1]))
18541 && ! rtx_equal_p (operands[0], operands[1]))
18542 return false;
18543 return true;
18544 }
18545
18546 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
18547 are ok, keeping in mind the possible movddup alternative. */
18548
18549 bool
18550 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
18551 {
18552 if (MEM_P (operands[0]))
18553 return rtx_equal_p (operands[0], operands[1 + high]);
18554 if (MEM_P (operands[1]) && MEM_P (operands[2]))
18555 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
18556 return true;
18557 }
18558
18559 /* Post-reload splitter for converting an SF or DFmode value in an
18560 SSE register into an unsigned SImode. */
18561
18562 void
18563 ix86_split_convert_uns_si_sse (rtx operands[])
18564 {
18565 enum machine_mode vecmode;
18566 rtx value, large, zero_or_two31, input, two31, x;
18567
18568 large = operands[1];
18569 zero_or_two31 = operands[2];
18570 input = operands[3];
18571 two31 = operands[4];
18572 vecmode = GET_MODE (large);
18573 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
18574
18575 /* Load up the value into the low element. We must ensure that the other
18576 elements are valid floats -- zero is the easiest such value. */
18577 if (MEM_P (input))
18578 {
18579 if (vecmode == V4SFmode)
18580 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
18581 else
18582 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
18583 }
18584 else
18585 {
18586 input = gen_rtx_REG (vecmode, REGNO (input));
18587 emit_move_insn (value, CONST0_RTX (vecmode));
18588 if (vecmode == V4SFmode)
18589 emit_insn (gen_sse_movss (value, value, input));
18590 else
18591 emit_insn (gen_sse2_movsd (value, value, input));
18592 }
18593
18594 emit_move_insn (large, two31);
18595 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
18596
18597 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
18598 emit_insn (gen_rtx_SET (VOIDmode, large, x));
18599
18600 x = gen_rtx_AND (vecmode, zero_or_two31, large);
18601 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
18602
18603 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
18604 emit_insn (gen_rtx_SET (VOIDmode, value, x));
18605
18606 large = gen_rtx_REG (V4SImode, REGNO (large));
18607 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
18608
18609 x = gen_rtx_REG (V4SImode, REGNO (value));
18610 if (vecmode == V4SFmode)
18611 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
18612 else
18613 emit_insn (gen_sse2_cvttpd2dq (x, value));
18614 value = x;
18615
18616 emit_insn (gen_xorv4si3 (value, value, large));
18617 }
18618
18619 /* Convert an unsigned DImode value into a DFmode, using only SSE.
18620 Expects the 64-bit DImode to be supplied in a pair of integral
18621 registers. Requires SSE2; will use SSE3 if available. For x86_32,
18622 -mfpmath=sse, !optimize_size only. */
18623
18624 void
18625 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
18626 {
18627 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
18628 rtx int_xmm, fp_xmm;
18629 rtx biases, exponents;
18630 rtx x;
18631
18632 int_xmm = gen_reg_rtx (V4SImode);
18633 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
18634 emit_insn (gen_movdi_to_sse (int_xmm, input));
18635 else if (TARGET_SSE_SPLIT_REGS)
18636 {
18637 emit_clobber (int_xmm);
18638 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
18639 }
18640 else
18641 {
18642 x = gen_reg_rtx (V2DImode);
18643 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
18644 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
18645 }
18646
18647 x = gen_rtx_CONST_VECTOR (V4SImode,
18648 gen_rtvec (4, GEN_INT (0x43300000UL),
18649 GEN_INT (0x45300000UL),
18650 const0_rtx, const0_rtx));
18651 exponents = validize_mem (force_const_mem (V4SImode, x));
18652
18653 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
18654 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
18655
18656 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
18657 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
18658 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
18659 (0x1.0p84 + double(fp_value_hi_xmm)).
18660 Note these exponents differ by 32. */
18661
18662 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
18663
18664 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
18665 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
18666 real_ldexp (&bias_lo_rvt, &dconst1, 52);
18667 real_ldexp (&bias_hi_rvt, &dconst1, 84);
18668 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
18669 x = const_double_from_real_value (bias_hi_rvt, DFmode);
18670 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
18671 biases = validize_mem (force_const_mem (V2DFmode, biases));
18672 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
18673
18674 /* Add the upper and lower DFmode values together. */
18675 if (TARGET_SSE3)
18676 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
18677 else
18678 {
18679 x = copy_to_mode_reg (V2DFmode, fp_xmm);
18680 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
18681 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
18682 }
18683
18684 ix86_expand_vector_extract (false, target, fp_xmm, 0);
18685 }
18686
18687 /* Not used, but eases macroization of patterns. */
18688 void
18689 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
18690 rtx input ATTRIBUTE_UNUSED)
18691 {
18692 gcc_unreachable ();
18693 }
18694
18695 /* Convert an unsigned SImode value into a DFmode. Only currently used
18696 for SSE, but applicable anywhere. */
18697
18698 void
18699 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
18700 {
18701 REAL_VALUE_TYPE TWO31r;
18702 rtx x, fp;
18703
18704 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
18705 NULL, 1, OPTAB_DIRECT);
18706
18707 fp = gen_reg_rtx (DFmode);
18708 emit_insn (gen_floatsidf2 (fp, x));
18709
18710 real_ldexp (&TWO31r, &dconst1, 31);
18711 x = const_double_from_real_value (TWO31r, DFmode);
18712
18713 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
18714 if (x != target)
18715 emit_move_insn (target, x);
18716 }
18717
18718 /* Convert a signed DImode value into a DFmode. Only used for SSE in
18719 32-bit mode; otherwise we have a direct convert instruction. */
18720
18721 void
18722 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
18723 {
18724 REAL_VALUE_TYPE TWO32r;
18725 rtx fp_lo, fp_hi, x;
18726
18727 fp_lo = gen_reg_rtx (DFmode);
18728 fp_hi = gen_reg_rtx (DFmode);
18729
18730 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
18731
18732 real_ldexp (&TWO32r, &dconst1, 32);
18733 x = const_double_from_real_value (TWO32r, DFmode);
18734 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
18735
18736 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
18737
18738 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
18739 0, OPTAB_DIRECT);
18740 if (x != target)
18741 emit_move_insn (target, x);
18742 }
18743
18744 /* Convert an unsigned SImode value into a SFmode, using only SSE.
18745 For x86_32, -mfpmath=sse, !optimize_size only. */
18746 void
18747 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
18748 {
18749 REAL_VALUE_TYPE ONE16r;
18750 rtx fp_hi, fp_lo, int_hi, int_lo, x;
18751
18752 real_ldexp (&ONE16r, &dconst1, 16);
18753 x = const_double_from_real_value (ONE16r, SFmode);
18754 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
18755 NULL, 0, OPTAB_DIRECT);
18756 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
18757 NULL, 0, OPTAB_DIRECT);
18758 fp_hi = gen_reg_rtx (SFmode);
18759 fp_lo = gen_reg_rtx (SFmode);
18760 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
18761 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
18762 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
18763 0, OPTAB_DIRECT);
18764 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
18765 0, OPTAB_DIRECT);
18766 if (!rtx_equal_p (target, fp_hi))
18767 emit_move_insn (target, fp_hi);
18768 }
18769
18770 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
18771 a vector of unsigned ints VAL to vector of floats TARGET. */
18772
18773 void
18774 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
18775 {
18776 rtx tmp[8];
18777 REAL_VALUE_TYPE TWO16r;
18778 enum machine_mode intmode = GET_MODE (val);
18779 enum machine_mode fltmode = GET_MODE (target);
18780 rtx (*cvt) (rtx, rtx);
18781
18782 if (intmode == V4SImode)
18783 cvt = gen_floatv4siv4sf2;
18784 else
18785 cvt = gen_floatv8siv8sf2;
18786 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
18787 tmp[0] = force_reg (intmode, tmp[0]);
18788 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
18789 OPTAB_DIRECT);
18790 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
18791 NULL_RTX, 1, OPTAB_DIRECT);
18792 tmp[3] = gen_reg_rtx (fltmode);
18793 emit_insn (cvt (tmp[3], tmp[1]));
18794 tmp[4] = gen_reg_rtx (fltmode);
18795 emit_insn (cvt (tmp[4], tmp[2]));
18796 real_ldexp (&TWO16r, &dconst1, 16);
18797 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
18798 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
18799 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
18800 OPTAB_DIRECT);
18801 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
18802 OPTAB_DIRECT);
18803 if (tmp[7] != target)
18804 emit_move_insn (target, tmp[7]);
18805 }
18806
18807 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
18808 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
18809 This is done by doing just signed conversion if < 0x1p31, and otherwise by
18810 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
18811
18812 rtx
18813 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
18814 {
18815 REAL_VALUE_TYPE TWO31r;
18816 rtx two31r, tmp[4];
18817 enum machine_mode mode = GET_MODE (val);
18818 enum machine_mode scalarmode = GET_MODE_INNER (mode);
18819 enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
18820 rtx (*cmp) (rtx, rtx, rtx, rtx);
18821 int i;
18822
18823 for (i = 0; i < 3; i++)
18824 tmp[i] = gen_reg_rtx (mode);
18825 real_ldexp (&TWO31r, &dconst1, 31);
18826 two31r = const_double_from_real_value (TWO31r, scalarmode);
18827 two31r = ix86_build_const_vector (mode, 1, two31r);
18828 two31r = force_reg (mode, two31r);
18829 switch (mode)
18830 {
18831 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
18832 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
18833 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
18834 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
18835 default: gcc_unreachable ();
18836 }
18837 tmp[3] = gen_rtx_LE (mode, two31r, val);
18838 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
18839 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
18840 0, OPTAB_DIRECT);
18841 if (intmode == V4SImode || TARGET_AVX2)
18842 *xorp = expand_simple_binop (intmode, ASHIFT,
18843 gen_lowpart (intmode, tmp[0]),
18844 GEN_INT (31), NULL_RTX, 0,
18845 OPTAB_DIRECT);
18846 else
18847 {
18848 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
18849 two31 = ix86_build_const_vector (intmode, 1, two31);
18850 *xorp = expand_simple_binop (intmode, AND,
18851 gen_lowpart (intmode, tmp[0]),
18852 two31, NULL_RTX, 0,
18853 OPTAB_DIRECT);
18854 }
18855 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
18856 0, OPTAB_DIRECT);
18857 }
18858
18859 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
18860 then replicate the value for all elements of the vector
18861 register. */
18862
18863 rtx
18864 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
18865 {
18866 int i, n_elt;
18867 rtvec v;
18868 enum machine_mode scalar_mode;
18869
18870 switch (mode)
18871 {
18872 case V64QImode:
18873 case V32QImode:
18874 case V16QImode:
18875 case V32HImode:
18876 case V16HImode:
18877 case V8HImode:
18878 case V16SImode:
18879 case V8SImode:
18880 case V4SImode:
18881 case V8DImode:
18882 case V4DImode:
18883 case V2DImode:
18884 gcc_assert (vect);
18885 case V16SFmode:
18886 case V8SFmode:
18887 case V4SFmode:
18888 case V8DFmode:
18889 case V4DFmode:
18890 case V2DFmode:
18891 n_elt = GET_MODE_NUNITS (mode);
18892 v = rtvec_alloc (n_elt);
18893 scalar_mode = GET_MODE_INNER (mode);
18894
18895 RTVEC_ELT (v, 0) = value;
18896
18897 for (i = 1; i < n_elt; ++i)
18898 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
18899
18900 return gen_rtx_CONST_VECTOR (mode, v);
18901
18902 default:
18903 gcc_unreachable ();
18904 }
18905 }
18906
18907 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
18908 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
18909 for an SSE register. If VECT is true, then replicate the mask for
18910 all elements of the vector register. If INVERT is true, then create
18911 a mask excluding the sign bit. */
18912
18913 rtx
18914 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
18915 {
18916 enum machine_mode vec_mode, imode;
18917 HOST_WIDE_INT hi, lo;
18918 int shift = 63;
18919 rtx v;
18920 rtx mask;
18921
18922 /* Find the sign bit, sign extended to 2*HWI. */
18923 switch (mode)
18924 {
18925 case V16SImode:
18926 case V16SFmode:
18927 case V8SImode:
18928 case V4SImode:
18929 case V8SFmode:
18930 case V4SFmode:
18931 vec_mode = mode;
18932 mode = GET_MODE_INNER (mode);
18933 imode = SImode;
18934 lo = 0x80000000, hi = lo < 0;
18935 break;
18936
18937 case V8DImode:
18938 case V4DImode:
18939 case V2DImode:
18940 case V8DFmode:
18941 case V4DFmode:
18942 case V2DFmode:
18943 vec_mode = mode;
18944 mode = GET_MODE_INNER (mode);
18945 imode = DImode;
18946 if (HOST_BITS_PER_WIDE_INT >= 64)
18947 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
18948 else
18949 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
18950 break;
18951
18952 case TImode:
18953 case TFmode:
18954 vec_mode = VOIDmode;
18955 if (HOST_BITS_PER_WIDE_INT >= 64)
18956 {
18957 imode = TImode;
18958 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
18959 }
18960 else
18961 {
18962 rtvec vec;
18963
18964 imode = DImode;
18965 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
18966
18967 if (invert)
18968 {
18969 lo = ~lo, hi = ~hi;
18970 v = constm1_rtx;
18971 }
18972 else
18973 v = const0_rtx;
18974
18975 mask = immed_double_const (lo, hi, imode);
18976
18977 vec = gen_rtvec (2, v, mask);
18978 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
18979 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
18980
18981 return v;
18982 }
18983 break;
18984
18985 default:
18986 gcc_unreachable ();
18987 }
18988
18989 if (invert)
18990 lo = ~lo, hi = ~hi;
18991
18992 /* Force this value into the low part of a fp vector constant. */
18993 mask = immed_double_const (lo, hi, imode);
18994 mask = gen_lowpart (mode, mask);
18995
18996 if (vec_mode == VOIDmode)
18997 return force_reg (mode, mask);
18998
18999 v = ix86_build_const_vector (vec_mode, vect, mask);
19000 return force_reg (vec_mode, v);
19001 }
19002
19003 /* Generate code for floating point ABS or NEG. */
19004
19005 void
19006 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
19007 rtx operands[])
19008 {
19009 rtx mask, set, dst, src;
19010 bool use_sse = false;
19011 bool vector_mode = VECTOR_MODE_P (mode);
19012 enum machine_mode vmode = mode;
19013
19014 if (vector_mode)
19015 use_sse = true;
19016 else if (mode == TFmode)
19017 use_sse = true;
19018 else if (TARGET_SSE_MATH)
19019 {
19020 use_sse = SSE_FLOAT_MODE_P (mode);
19021 if (mode == SFmode)
19022 vmode = V4SFmode;
19023 else if (mode == DFmode)
19024 vmode = V2DFmode;
19025 }
19026
19027 /* NEG and ABS performed with SSE use bitwise mask operations.
19028 Create the appropriate mask now. */
19029 if (use_sse)
19030 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19031 else
19032 mask = NULL_RTX;
19033
19034 dst = operands[0];
19035 src = operands[1];
19036
19037 set = gen_rtx_fmt_e (code, mode, src);
19038 set = gen_rtx_SET (VOIDmode, dst, set);
19039
19040 if (mask)
19041 {
19042 rtx use, clob;
19043 rtvec par;
19044
19045 use = gen_rtx_USE (VOIDmode, mask);
19046 if (vector_mode)
19047 par = gen_rtvec (2, set, use);
19048 else
19049 {
19050 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19051 par = gen_rtvec (3, set, use, clob);
19052 }
19053 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19054 }
19055 else
19056 emit_insn (set);
19057 }
19058
19059 /* Expand a copysign operation. Special case operand 0 being a constant. */
19060
19061 void
19062 ix86_expand_copysign (rtx operands[])
19063 {
19064 enum machine_mode mode, vmode;
19065 rtx dest, op0, op1, mask, nmask;
19066
19067 dest = operands[0];
19068 op0 = operands[1];
19069 op1 = operands[2];
19070
19071 mode = GET_MODE (dest);
19072
19073 if (mode == SFmode)
19074 vmode = V4SFmode;
19075 else if (mode == DFmode)
19076 vmode = V2DFmode;
19077 else
19078 vmode = mode;
19079
19080 if (GET_CODE (op0) == CONST_DOUBLE)
19081 {
19082 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19083
19084 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19085 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19086
19087 if (mode == SFmode || mode == DFmode)
19088 {
19089 if (op0 == CONST0_RTX (mode))
19090 op0 = CONST0_RTX (vmode);
19091 else
19092 {
19093 rtx v = ix86_build_const_vector (vmode, false, op0);
19094
19095 op0 = force_reg (vmode, v);
19096 }
19097 }
19098 else if (op0 != CONST0_RTX (mode))
19099 op0 = force_reg (mode, op0);
19100
19101 mask = ix86_build_signbit_mask (vmode, 0, 0);
19102
19103 if (mode == SFmode)
19104 copysign_insn = gen_copysignsf3_const;
19105 else if (mode == DFmode)
19106 copysign_insn = gen_copysigndf3_const;
19107 else
19108 copysign_insn = gen_copysigntf3_const;
19109
19110 emit_insn (copysign_insn (dest, op0, op1, mask));
19111 }
19112 else
19113 {
19114 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19115
19116 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19117 mask = ix86_build_signbit_mask (vmode, 0, 0);
19118
19119 if (mode == SFmode)
19120 copysign_insn = gen_copysignsf3_var;
19121 else if (mode == DFmode)
19122 copysign_insn = gen_copysigndf3_var;
19123 else
19124 copysign_insn = gen_copysigntf3_var;
19125
19126 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19127 }
19128 }
19129
19130 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19131 be a constant, and so has already been expanded into a vector constant. */
19132
19133 void
19134 ix86_split_copysign_const (rtx operands[])
19135 {
19136 enum machine_mode mode, vmode;
19137 rtx dest, op0, mask, x;
19138
19139 dest = operands[0];
19140 op0 = operands[1];
19141 mask = operands[3];
19142
19143 mode = GET_MODE (dest);
19144 vmode = GET_MODE (mask);
19145
19146 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19147 x = gen_rtx_AND (vmode, dest, mask);
19148 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19149
19150 if (op0 != CONST0_RTX (vmode))
19151 {
19152 x = gen_rtx_IOR (vmode, dest, op0);
19153 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19154 }
19155 }
19156
19157 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19158 so we have to do two masks. */
19159
19160 void
19161 ix86_split_copysign_var (rtx operands[])
19162 {
19163 enum machine_mode mode, vmode;
19164 rtx dest, scratch, op0, op1, mask, nmask, x;
19165
19166 dest = operands[0];
19167 scratch = operands[1];
19168 op0 = operands[2];
19169 op1 = operands[3];
19170 nmask = operands[4];
19171 mask = operands[5];
19172
19173 mode = GET_MODE (dest);
19174 vmode = GET_MODE (mask);
19175
19176 if (rtx_equal_p (op0, op1))
19177 {
19178 /* Shouldn't happen often (it's useless, obviously), but when it does
19179 we'd generate incorrect code if we continue below. */
19180 emit_move_insn (dest, op0);
19181 return;
19182 }
19183
19184 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19185 {
19186 gcc_assert (REGNO (op1) == REGNO (scratch));
19187
19188 x = gen_rtx_AND (vmode, scratch, mask);
19189 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19190
19191 dest = mask;
19192 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19193 x = gen_rtx_NOT (vmode, dest);
19194 x = gen_rtx_AND (vmode, x, op0);
19195 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19196 }
19197 else
19198 {
19199 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19200 {
19201 x = gen_rtx_AND (vmode, scratch, mask);
19202 }
19203 else /* alternative 2,4 */
19204 {
19205 gcc_assert (REGNO (mask) == REGNO (scratch));
19206 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19207 x = gen_rtx_AND (vmode, scratch, op1);
19208 }
19209 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19210
19211 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19212 {
19213 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19214 x = gen_rtx_AND (vmode, dest, nmask);
19215 }
19216 else /* alternative 3,4 */
19217 {
19218 gcc_assert (REGNO (nmask) == REGNO (dest));
19219 dest = nmask;
19220 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19221 x = gen_rtx_AND (vmode, dest, op0);
19222 }
19223 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19224 }
19225
19226 x = gen_rtx_IOR (vmode, dest, scratch);
19227 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19228 }
19229
19230 /* Return TRUE or FALSE depending on whether the first SET in INSN
19231 has source and destination with matching CC modes, and that the
19232 CC mode is at least as constrained as REQ_MODE. */
19233
19234 bool
19235 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
19236 {
19237 rtx set;
19238 enum machine_mode set_mode;
19239
19240 set = PATTERN (insn);
19241 if (GET_CODE (set) == PARALLEL)
19242 set = XVECEXP (set, 0, 0);
19243 gcc_assert (GET_CODE (set) == SET);
19244 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19245
19246 set_mode = GET_MODE (SET_DEST (set));
19247 switch (set_mode)
19248 {
19249 case CCNOmode:
19250 if (req_mode != CCNOmode
19251 && (req_mode != CCmode
19252 || XEXP (SET_SRC (set), 1) != const0_rtx))
19253 return false;
19254 break;
19255 case CCmode:
19256 if (req_mode == CCGCmode)
19257 return false;
19258 /* FALLTHRU */
19259 case CCGCmode:
19260 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19261 return false;
19262 /* FALLTHRU */
19263 case CCGOCmode:
19264 if (req_mode == CCZmode)
19265 return false;
19266 /* FALLTHRU */
19267 case CCZmode:
19268 break;
19269
19270 case CCAmode:
19271 case CCCmode:
19272 case CCOmode:
19273 case CCSmode:
19274 if (set_mode != req_mode)
19275 return false;
19276 break;
19277
19278 default:
19279 gcc_unreachable ();
19280 }
19281
19282 return GET_MODE (SET_SRC (set)) == set_mode;
19283 }
19284
19285 /* Generate insn patterns to do an integer compare of OPERANDS. */
19286
19287 static rtx
19288 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19289 {
19290 enum machine_mode cmpmode;
19291 rtx tmp, flags;
19292
19293 cmpmode = SELECT_CC_MODE (code, op0, op1);
19294 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19295
19296 /* This is very simple, but making the interface the same as in the
19297 FP case makes the rest of the code easier. */
19298 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19299 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19300
19301 /* Return the test that should be put into the flags user, i.e.
19302 the bcc, scc, or cmov instruction. */
19303 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19304 }
19305
19306 /* Figure out whether to use ordered or unordered fp comparisons.
19307 Return the appropriate mode to use. */
19308
19309 enum machine_mode
19310 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
19311 {
19312 /* ??? In order to make all comparisons reversible, we do all comparisons
19313 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19314 all forms trapping and nontrapping comparisons, we can make inequality
19315 comparisons trapping again, since it results in better code when using
19316 FCOM based compares. */
19317 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19318 }
19319
19320 enum machine_mode
19321 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19322 {
19323 enum machine_mode mode = GET_MODE (op0);
19324
19325 if (SCALAR_FLOAT_MODE_P (mode))
19326 {
19327 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19328 return ix86_fp_compare_mode (code);
19329 }
19330
19331 switch (code)
19332 {
19333 /* Only zero flag is needed. */
19334 case EQ: /* ZF=0 */
19335 case NE: /* ZF!=0 */
19336 return CCZmode;
19337 /* Codes needing carry flag. */
19338 case GEU: /* CF=0 */
19339 case LTU: /* CF=1 */
19340 /* Detect overflow checks. They need just the carry flag. */
19341 if (GET_CODE (op0) == PLUS
19342 && rtx_equal_p (op1, XEXP (op0, 0)))
19343 return CCCmode;
19344 else
19345 return CCmode;
19346 case GTU: /* CF=0 & ZF=0 */
19347 case LEU: /* CF=1 | ZF=1 */
19348 return CCmode;
19349 /* Codes possibly doable only with sign flag when
19350 comparing against zero. */
19351 case GE: /* SF=OF or SF=0 */
19352 case LT: /* SF<>OF or SF=1 */
19353 if (op1 == const0_rtx)
19354 return CCGOCmode;
19355 else
19356 /* For other cases Carry flag is not required. */
19357 return CCGCmode;
19358 /* Codes doable only with sign flag when comparing
19359 against zero, but we miss jump instruction for it
19360 so we need to use relational tests against overflow
19361 that thus needs to be zero. */
19362 case GT: /* ZF=0 & SF=OF */
19363 case LE: /* ZF=1 | SF<>OF */
19364 if (op1 == const0_rtx)
19365 return CCNOmode;
19366 else
19367 return CCGCmode;
19368 /* strcmp pattern do (use flags) and combine may ask us for proper
19369 mode. */
19370 case USE:
19371 return CCmode;
19372 default:
19373 gcc_unreachable ();
19374 }
19375 }
19376
19377 /* Return the fixed registers used for condition codes. */
19378
19379 static bool
19380 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19381 {
19382 *p1 = FLAGS_REG;
19383 *p2 = FPSR_REG;
19384 return true;
19385 }
19386
19387 /* If two condition code modes are compatible, return a condition code
19388 mode which is compatible with both. Otherwise, return
19389 VOIDmode. */
19390
19391 static enum machine_mode
19392 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
19393 {
19394 if (m1 == m2)
19395 return m1;
19396
19397 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19398 return VOIDmode;
19399
19400 if ((m1 == CCGCmode && m2 == CCGOCmode)
19401 || (m1 == CCGOCmode && m2 == CCGCmode))
19402 return CCGCmode;
19403
19404 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19405 return m2;
19406 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19407 return m1;
19408
19409 switch (m1)
19410 {
19411 default:
19412 gcc_unreachable ();
19413
19414 case CCmode:
19415 case CCGCmode:
19416 case CCGOCmode:
19417 case CCNOmode:
19418 case CCAmode:
19419 case CCCmode:
19420 case CCOmode:
19421 case CCSmode:
19422 case CCZmode:
19423 switch (m2)
19424 {
19425 default:
19426 return VOIDmode;
19427
19428 case CCmode:
19429 case CCGCmode:
19430 case CCGOCmode:
19431 case CCNOmode:
19432 case CCAmode:
19433 case CCCmode:
19434 case CCOmode:
19435 case CCSmode:
19436 case CCZmode:
19437 return CCmode;
19438 }
19439
19440 case CCFPmode:
19441 case CCFPUmode:
19442 /* These are only compatible with themselves, which we already
19443 checked above. */
19444 return VOIDmode;
19445 }
19446 }
19447
19448
19449 /* Return a comparison we can do and that it is equivalent to
19450 swap_condition (code) apart possibly from orderedness.
19451 But, never change orderedness if TARGET_IEEE_FP, returning
19452 UNKNOWN in that case if necessary. */
19453
19454 static enum rtx_code
19455 ix86_fp_swap_condition (enum rtx_code code)
19456 {
19457 switch (code)
19458 {
19459 case GT: /* GTU - CF=0 & ZF=0 */
19460 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
19461 case GE: /* GEU - CF=0 */
19462 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
19463 case UNLT: /* LTU - CF=1 */
19464 return TARGET_IEEE_FP ? UNKNOWN : GT;
19465 case UNLE: /* LEU - CF=1 | ZF=1 */
19466 return TARGET_IEEE_FP ? UNKNOWN : GE;
19467 default:
19468 return swap_condition (code);
19469 }
19470 }
19471
19472 /* Return cost of comparison CODE using the best strategy for performance.
19473 All following functions do use number of instructions as a cost metrics.
19474 In future this should be tweaked to compute bytes for optimize_size and
19475 take into account performance of various instructions on various CPUs. */
19476
19477 static int
19478 ix86_fp_comparison_cost (enum rtx_code code)
19479 {
19480 int arith_cost;
19481
19482 /* The cost of code using bit-twiddling on %ah. */
19483 switch (code)
19484 {
19485 case UNLE:
19486 case UNLT:
19487 case LTGT:
19488 case GT:
19489 case GE:
19490 case UNORDERED:
19491 case ORDERED:
19492 case UNEQ:
19493 arith_cost = 4;
19494 break;
19495 case LT:
19496 case NE:
19497 case EQ:
19498 case UNGE:
19499 arith_cost = TARGET_IEEE_FP ? 5 : 4;
19500 break;
19501 case LE:
19502 case UNGT:
19503 arith_cost = TARGET_IEEE_FP ? 6 : 4;
19504 break;
19505 default:
19506 gcc_unreachable ();
19507 }
19508
19509 switch (ix86_fp_comparison_strategy (code))
19510 {
19511 case IX86_FPCMP_COMI:
19512 return arith_cost > 4 ? 3 : 2;
19513 case IX86_FPCMP_SAHF:
19514 return arith_cost > 4 ? 4 : 3;
19515 default:
19516 return arith_cost;
19517 }
19518 }
19519
19520 /* Return strategy to use for floating-point. We assume that fcomi is always
19521 preferrable where available, since that is also true when looking at size
19522 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
19523
19524 enum ix86_fpcmp_strategy
19525 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
19526 {
19527 /* Do fcomi/sahf based test when profitable. */
19528
19529 if (TARGET_CMOVE)
19530 return IX86_FPCMP_COMI;
19531
19532 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
19533 return IX86_FPCMP_SAHF;
19534
19535 return IX86_FPCMP_ARITH;
19536 }
19537
19538 /* Swap, force into registers, or otherwise massage the two operands
19539 to a fp comparison. The operands are updated in place; the new
19540 comparison code is returned. */
19541
19542 static enum rtx_code
19543 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
19544 {
19545 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
19546 rtx op0 = *pop0, op1 = *pop1;
19547 enum machine_mode op_mode = GET_MODE (op0);
19548 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
19549
19550 /* All of the unordered compare instructions only work on registers.
19551 The same is true of the fcomi compare instructions. The XFmode
19552 compare instructions require registers except when comparing
19553 against zero or when converting operand 1 from fixed point to
19554 floating point. */
19555
19556 if (!is_sse
19557 && (fpcmp_mode == CCFPUmode
19558 || (op_mode == XFmode
19559 && ! (standard_80387_constant_p (op0) == 1
19560 || standard_80387_constant_p (op1) == 1)
19561 && GET_CODE (op1) != FLOAT)
19562 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
19563 {
19564 op0 = force_reg (op_mode, op0);
19565 op1 = force_reg (op_mode, op1);
19566 }
19567 else
19568 {
19569 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
19570 things around if they appear profitable, otherwise force op0
19571 into a register. */
19572
19573 if (standard_80387_constant_p (op0) == 0
19574 || (MEM_P (op0)
19575 && ! (standard_80387_constant_p (op1) == 0
19576 || MEM_P (op1))))
19577 {
19578 enum rtx_code new_code = ix86_fp_swap_condition (code);
19579 if (new_code != UNKNOWN)
19580 {
19581 rtx tmp;
19582 tmp = op0, op0 = op1, op1 = tmp;
19583 code = new_code;
19584 }
19585 }
19586
19587 if (!REG_P (op0))
19588 op0 = force_reg (op_mode, op0);
19589
19590 if (CONSTANT_P (op1))
19591 {
19592 int tmp = standard_80387_constant_p (op1);
19593 if (tmp == 0)
19594 op1 = validize_mem (force_const_mem (op_mode, op1));
19595 else if (tmp == 1)
19596 {
19597 if (TARGET_CMOVE)
19598 op1 = force_reg (op_mode, op1);
19599 }
19600 else
19601 op1 = force_reg (op_mode, op1);
19602 }
19603 }
19604
19605 /* Try to rearrange the comparison to make it cheaper. */
19606 if (ix86_fp_comparison_cost (code)
19607 > ix86_fp_comparison_cost (swap_condition (code))
19608 && (REG_P (op1) || can_create_pseudo_p ()))
19609 {
19610 rtx tmp;
19611 tmp = op0, op0 = op1, op1 = tmp;
19612 code = swap_condition (code);
19613 if (!REG_P (op0))
19614 op0 = force_reg (op_mode, op0);
19615 }
19616
19617 *pop0 = op0;
19618 *pop1 = op1;
19619 return code;
19620 }
19621
19622 /* Convert comparison codes we use to represent FP comparison to integer
19623 code that will result in proper branch. Return UNKNOWN if no such code
19624 is available. */
19625
19626 enum rtx_code
19627 ix86_fp_compare_code_to_integer (enum rtx_code code)
19628 {
19629 switch (code)
19630 {
19631 case GT:
19632 return GTU;
19633 case GE:
19634 return GEU;
19635 case ORDERED:
19636 case UNORDERED:
19637 return code;
19638 break;
19639 case UNEQ:
19640 return EQ;
19641 break;
19642 case UNLT:
19643 return LTU;
19644 break;
19645 case UNLE:
19646 return LEU;
19647 break;
19648 case LTGT:
19649 return NE;
19650 break;
19651 default:
19652 return UNKNOWN;
19653 }
19654 }
19655
19656 /* Generate insn patterns to do a floating point compare of OPERANDS. */
19657
19658 static rtx
19659 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
19660 {
19661 enum machine_mode fpcmp_mode, intcmp_mode;
19662 rtx tmp, tmp2;
19663
19664 fpcmp_mode = ix86_fp_compare_mode (code);
19665 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
19666
19667 /* Do fcomi/sahf based test when profitable. */
19668 switch (ix86_fp_comparison_strategy (code))
19669 {
19670 case IX86_FPCMP_COMI:
19671 intcmp_mode = fpcmp_mode;
19672 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19673 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19674 tmp);
19675 emit_insn (tmp);
19676 break;
19677
19678 case IX86_FPCMP_SAHF:
19679 intcmp_mode = fpcmp_mode;
19680 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19681 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19682 tmp);
19683
19684 if (!scratch)
19685 scratch = gen_reg_rtx (HImode);
19686 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
19687 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
19688 break;
19689
19690 case IX86_FPCMP_ARITH:
19691 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
19692 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19693 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
19694 if (!scratch)
19695 scratch = gen_reg_rtx (HImode);
19696 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
19697
19698 /* In the unordered case, we have to check C2 for NaN's, which
19699 doesn't happen to work out to anything nice combination-wise.
19700 So do some bit twiddling on the value we've got in AH to come
19701 up with an appropriate set of condition codes. */
19702
19703 intcmp_mode = CCNOmode;
19704 switch (code)
19705 {
19706 case GT:
19707 case UNGT:
19708 if (code == GT || !TARGET_IEEE_FP)
19709 {
19710 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19711 code = EQ;
19712 }
19713 else
19714 {
19715 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19716 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19717 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
19718 intcmp_mode = CCmode;
19719 code = GEU;
19720 }
19721 break;
19722 case LT:
19723 case UNLT:
19724 if (code == LT && TARGET_IEEE_FP)
19725 {
19726 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19727 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
19728 intcmp_mode = CCmode;
19729 code = EQ;
19730 }
19731 else
19732 {
19733 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
19734 code = NE;
19735 }
19736 break;
19737 case GE:
19738 case UNGE:
19739 if (code == GE || !TARGET_IEEE_FP)
19740 {
19741 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
19742 code = EQ;
19743 }
19744 else
19745 {
19746 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19747 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
19748 code = NE;
19749 }
19750 break;
19751 case LE:
19752 case UNLE:
19753 if (code == LE && TARGET_IEEE_FP)
19754 {
19755 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19756 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19757 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19758 intcmp_mode = CCmode;
19759 code = LTU;
19760 }
19761 else
19762 {
19763 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19764 code = NE;
19765 }
19766 break;
19767 case EQ:
19768 case UNEQ:
19769 if (code == EQ && TARGET_IEEE_FP)
19770 {
19771 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19772 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19773 intcmp_mode = CCmode;
19774 code = EQ;
19775 }
19776 else
19777 {
19778 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19779 code = NE;
19780 }
19781 break;
19782 case NE:
19783 case LTGT:
19784 if (code == NE && TARGET_IEEE_FP)
19785 {
19786 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19787 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
19788 GEN_INT (0x40)));
19789 code = NE;
19790 }
19791 else
19792 {
19793 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19794 code = EQ;
19795 }
19796 break;
19797
19798 case UNORDERED:
19799 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19800 code = NE;
19801 break;
19802 case ORDERED:
19803 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19804 code = EQ;
19805 break;
19806
19807 default:
19808 gcc_unreachable ();
19809 }
19810 break;
19811
19812 default:
19813 gcc_unreachable();
19814 }
19815
19816 /* Return the test that should be put into the flags user, i.e.
19817 the bcc, scc, or cmov instruction. */
19818 return gen_rtx_fmt_ee (code, VOIDmode,
19819 gen_rtx_REG (intcmp_mode, FLAGS_REG),
19820 const0_rtx);
19821 }
19822
19823 static rtx
19824 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
19825 {
19826 rtx ret;
19827
19828 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
19829 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
19830
19831 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
19832 {
19833 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
19834 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
19835 }
19836 else
19837 ret = ix86_expand_int_compare (code, op0, op1);
19838
19839 return ret;
19840 }
19841
19842 void
19843 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
19844 {
19845 enum machine_mode mode = GET_MODE (op0);
19846 rtx tmp;
19847
19848 switch (mode)
19849 {
19850 case SFmode:
19851 case DFmode:
19852 case XFmode:
19853 case QImode:
19854 case HImode:
19855 case SImode:
19856 simple:
19857 tmp = ix86_expand_compare (code, op0, op1);
19858 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19859 gen_rtx_LABEL_REF (VOIDmode, label),
19860 pc_rtx);
19861 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19862 return;
19863
19864 case DImode:
19865 if (TARGET_64BIT)
19866 goto simple;
19867 case TImode:
19868 /* Expand DImode branch into multiple compare+branch. */
19869 {
19870 rtx lo[2], hi[2], label2;
19871 enum rtx_code code1, code2, code3;
19872 enum machine_mode submode;
19873
19874 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
19875 {
19876 tmp = op0, op0 = op1, op1 = tmp;
19877 code = swap_condition (code);
19878 }
19879
19880 split_double_mode (mode, &op0, 1, lo+0, hi+0);
19881 split_double_mode (mode, &op1, 1, lo+1, hi+1);
19882
19883 submode = mode == DImode ? SImode : DImode;
19884
19885 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
19886 avoid two branches. This costs one extra insn, so disable when
19887 optimizing for size. */
19888
19889 if ((code == EQ || code == NE)
19890 && (!optimize_insn_for_size_p ()
19891 || hi[1] == const0_rtx || lo[1] == const0_rtx))
19892 {
19893 rtx xor0, xor1;
19894
19895 xor1 = hi[0];
19896 if (hi[1] != const0_rtx)
19897 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
19898 NULL_RTX, 0, OPTAB_WIDEN);
19899
19900 xor0 = lo[0];
19901 if (lo[1] != const0_rtx)
19902 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
19903 NULL_RTX, 0, OPTAB_WIDEN);
19904
19905 tmp = expand_binop (submode, ior_optab, xor1, xor0,
19906 NULL_RTX, 0, OPTAB_WIDEN);
19907
19908 ix86_expand_branch (code, tmp, const0_rtx, label);
19909 return;
19910 }
19911
19912 /* Otherwise, if we are doing less-than or greater-or-equal-than,
19913 op1 is a constant and the low word is zero, then we can just
19914 examine the high word. Similarly for low word -1 and
19915 less-or-equal-than or greater-than. */
19916
19917 if (CONST_INT_P (hi[1]))
19918 switch (code)
19919 {
19920 case LT: case LTU: case GE: case GEU:
19921 if (lo[1] == const0_rtx)
19922 {
19923 ix86_expand_branch (code, hi[0], hi[1], label);
19924 return;
19925 }
19926 break;
19927 case LE: case LEU: case GT: case GTU:
19928 if (lo[1] == constm1_rtx)
19929 {
19930 ix86_expand_branch (code, hi[0], hi[1], label);
19931 return;
19932 }
19933 break;
19934 default:
19935 break;
19936 }
19937
19938 /* Otherwise, we need two or three jumps. */
19939
19940 label2 = gen_label_rtx ();
19941
19942 code1 = code;
19943 code2 = swap_condition (code);
19944 code3 = unsigned_condition (code);
19945
19946 switch (code)
19947 {
19948 case LT: case GT: case LTU: case GTU:
19949 break;
19950
19951 case LE: code1 = LT; code2 = GT; break;
19952 case GE: code1 = GT; code2 = LT; break;
19953 case LEU: code1 = LTU; code2 = GTU; break;
19954 case GEU: code1 = GTU; code2 = LTU; break;
19955
19956 case EQ: code1 = UNKNOWN; code2 = NE; break;
19957 case NE: code2 = UNKNOWN; break;
19958
19959 default:
19960 gcc_unreachable ();
19961 }
19962
19963 /*
19964 * a < b =>
19965 * if (hi(a) < hi(b)) goto true;
19966 * if (hi(a) > hi(b)) goto false;
19967 * if (lo(a) < lo(b)) goto true;
19968 * false:
19969 */
19970
19971 if (code1 != UNKNOWN)
19972 ix86_expand_branch (code1, hi[0], hi[1], label);
19973 if (code2 != UNKNOWN)
19974 ix86_expand_branch (code2, hi[0], hi[1], label2);
19975
19976 ix86_expand_branch (code3, lo[0], lo[1], label);
19977
19978 if (code2 != UNKNOWN)
19979 emit_label (label2);
19980 return;
19981 }
19982
19983 default:
19984 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
19985 goto simple;
19986 }
19987 }
19988
19989 /* Split branch based on floating point condition. */
19990 void
19991 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
19992 rtx target1, rtx target2, rtx tmp, rtx pushed)
19993 {
19994 rtx condition;
19995 rtx i;
19996
19997 if (target2 != pc_rtx)
19998 {
19999 rtx tmp = target2;
20000 code = reverse_condition_maybe_unordered (code);
20001 target2 = target1;
20002 target1 = tmp;
20003 }
20004
20005 condition = ix86_expand_fp_compare (code, op1, op2,
20006 tmp);
20007
20008 /* Remove pushed operand from stack. */
20009 if (pushed)
20010 ix86_free_from_memory (GET_MODE (pushed));
20011
20012 i = emit_jump_insn (gen_rtx_SET
20013 (VOIDmode, pc_rtx,
20014 gen_rtx_IF_THEN_ELSE (VOIDmode,
20015 condition, target1, target2)));
20016 if (split_branch_probability >= 0)
20017 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20018 }
20019
20020 void
20021 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20022 {
20023 rtx ret;
20024
20025 gcc_assert (GET_MODE (dest) == QImode);
20026
20027 ret = ix86_expand_compare (code, op0, op1);
20028 PUT_MODE (ret, QImode);
20029 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20030 }
20031
20032 /* Expand comparison setting or clearing carry flag. Return true when
20033 successful and set pop for the operation. */
20034 static bool
20035 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20036 {
20037 enum machine_mode mode =
20038 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20039
20040 /* Do not handle double-mode compares that go through special path. */
20041 if (mode == (TARGET_64BIT ? TImode : DImode))
20042 return false;
20043
20044 if (SCALAR_FLOAT_MODE_P (mode))
20045 {
20046 rtx compare_op, compare_seq;
20047
20048 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20049
20050 /* Shortcut: following common codes never translate
20051 into carry flag compares. */
20052 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20053 || code == ORDERED || code == UNORDERED)
20054 return false;
20055
20056 /* These comparisons require zero flag; swap operands so they won't. */
20057 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20058 && !TARGET_IEEE_FP)
20059 {
20060 rtx tmp = op0;
20061 op0 = op1;
20062 op1 = tmp;
20063 code = swap_condition (code);
20064 }
20065
20066 /* Try to expand the comparison and verify that we end up with
20067 carry flag based comparison. This fails to be true only when
20068 we decide to expand comparison using arithmetic that is not
20069 too common scenario. */
20070 start_sequence ();
20071 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20072 compare_seq = get_insns ();
20073 end_sequence ();
20074
20075 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20076 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20077 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20078 else
20079 code = GET_CODE (compare_op);
20080
20081 if (code != LTU && code != GEU)
20082 return false;
20083
20084 emit_insn (compare_seq);
20085 *pop = compare_op;
20086 return true;
20087 }
20088
20089 if (!INTEGRAL_MODE_P (mode))
20090 return false;
20091
20092 switch (code)
20093 {
20094 case LTU:
20095 case GEU:
20096 break;
20097
20098 /* Convert a==0 into (unsigned)a<1. */
20099 case EQ:
20100 case NE:
20101 if (op1 != const0_rtx)
20102 return false;
20103 op1 = const1_rtx;
20104 code = (code == EQ ? LTU : GEU);
20105 break;
20106
20107 /* Convert a>b into b<a or a>=b-1. */
20108 case GTU:
20109 case LEU:
20110 if (CONST_INT_P (op1))
20111 {
20112 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20113 /* Bail out on overflow. We still can swap operands but that
20114 would force loading of the constant into register. */
20115 if (op1 == const0_rtx
20116 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20117 return false;
20118 code = (code == GTU ? GEU : LTU);
20119 }
20120 else
20121 {
20122 rtx tmp = op1;
20123 op1 = op0;
20124 op0 = tmp;
20125 code = (code == GTU ? LTU : GEU);
20126 }
20127 break;
20128
20129 /* Convert a>=0 into (unsigned)a<0x80000000. */
20130 case LT:
20131 case GE:
20132 if (mode == DImode || op1 != const0_rtx)
20133 return false;
20134 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20135 code = (code == LT ? GEU : LTU);
20136 break;
20137 case LE:
20138 case GT:
20139 if (mode == DImode || op1 != constm1_rtx)
20140 return false;
20141 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20142 code = (code == LE ? GEU : LTU);
20143 break;
20144
20145 default:
20146 return false;
20147 }
20148 /* Swapping operands may cause constant to appear as first operand. */
20149 if (!nonimmediate_operand (op0, VOIDmode))
20150 {
20151 if (!can_create_pseudo_p ())
20152 return false;
20153 op0 = force_reg (mode, op0);
20154 }
20155 *pop = ix86_expand_compare (code, op0, op1);
20156 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20157 return true;
20158 }
20159
20160 bool
20161 ix86_expand_int_movcc (rtx operands[])
20162 {
20163 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20164 rtx compare_seq, compare_op;
20165 enum machine_mode mode = GET_MODE (operands[0]);
20166 bool sign_bit_compare_p = false;
20167 rtx op0 = XEXP (operands[1], 0);
20168 rtx op1 = XEXP (operands[1], 1);
20169
20170 if (GET_MODE (op0) == TImode
20171 || (GET_MODE (op0) == DImode
20172 && !TARGET_64BIT))
20173 return false;
20174
20175 start_sequence ();
20176 compare_op = ix86_expand_compare (code, op0, op1);
20177 compare_seq = get_insns ();
20178 end_sequence ();
20179
20180 compare_code = GET_CODE (compare_op);
20181
20182 if ((op1 == const0_rtx && (code == GE || code == LT))
20183 || (op1 == constm1_rtx && (code == GT || code == LE)))
20184 sign_bit_compare_p = true;
20185
20186 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20187 HImode insns, we'd be swallowed in word prefix ops. */
20188
20189 if ((mode != HImode || TARGET_FAST_PREFIX)
20190 && (mode != (TARGET_64BIT ? TImode : DImode))
20191 && CONST_INT_P (operands[2])
20192 && CONST_INT_P (operands[3]))
20193 {
20194 rtx out = operands[0];
20195 HOST_WIDE_INT ct = INTVAL (operands[2]);
20196 HOST_WIDE_INT cf = INTVAL (operands[3]);
20197 HOST_WIDE_INT diff;
20198
20199 diff = ct - cf;
20200 /* Sign bit compares are better done using shifts than we do by using
20201 sbb. */
20202 if (sign_bit_compare_p
20203 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20204 {
20205 /* Detect overlap between destination and compare sources. */
20206 rtx tmp = out;
20207
20208 if (!sign_bit_compare_p)
20209 {
20210 rtx flags;
20211 bool fpcmp = false;
20212
20213 compare_code = GET_CODE (compare_op);
20214
20215 flags = XEXP (compare_op, 0);
20216
20217 if (GET_MODE (flags) == CCFPmode
20218 || GET_MODE (flags) == CCFPUmode)
20219 {
20220 fpcmp = true;
20221 compare_code
20222 = ix86_fp_compare_code_to_integer (compare_code);
20223 }
20224
20225 /* To simplify rest of code, restrict to the GEU case. */
20226 if (compare_code == LTU)
20227 {
20228 HOST_WIDE_INT tmp = ct;
20229 ct = cf;
20230 cf = tmp;
20231 compare_code = reverse_condition (compare_code);
20232 code = reverse_condition (code);
20233 }
20234 else
20235 {
20236 if (fpcmp)
20237 PUT_CODE (compare_op,
20238 reverse_condition_maybe_unordered
20239 (GET_CODE (compare_op)));
20240 else
20241 PUT_CODE (compare_op,
20242 reverse_condition (GET_CODE (compare_op)));
20243 }
20244 diff = ct - cf;
20245
20246 if (reg_overlap_mentioned_p (out, op0)
20247 || reg_overlap_mentioned_p (out, op1))
20248 tmp = gen_reg_rtx (mode);
20249
20250 if (mode == DImode)
20251 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20252 else
20253 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20254 flags, compare_op));
20255 }
20256 else
20257 {
20258 if (code == GT || code == GE)
20259 code = reverse_condition (code);
20260 else
20261 {
20262 HOST_WIDE_INT tmp = ct;
20263 ct = cf;
20264 cf = tmp;
20265 diff = ct - cf;
20266 }
20267 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20268 }
20269
20270 if (diff == 1)
20271 {
20272 /*
20273 * cmpl op0,op1
20274 * sbbl dest,dest
20275 * [addl dest, ct]
20276 *
20277 * Size 5 - 8.
20278 */
20279 if (ct)
20280 tmp = expand_simple_binop (mode, PLUS,
20281 tmp, GEN_INT (ct),
20282 copy_rtx (tmp), 1, OPTAB_DIRECT);
20283 }
20284 else if (cf == -1)
20285 {
20286 /*
20287 * cmpl op0,op1
20288 * sbbl dest,dest
20289 * orl $ct, dest
20290 *
20291 * Size 8.
20292 */
20293 tmp = expand_simple_binop (mode, IOR,
20294 tmp, GEN_INT (ct),
20295 copy_rtx (tmp), 1, OPTAB_DIRECT);
20296 }
20297 else if (diff == -1 && ct)
20298 {
20299 /*
20300 * cmpl op0,op1
20301 * sbbl dest,dest
20302 * notl dest
20303 * [addl dest, cf]
20304 *
20305 * Size 8 - 11.
20306 */
20307 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20308 if (cf)
20309 tmp = expand_simple_binop (mode, PLUS,
20310 copy_rtx (tmp), GEN_INT (cf),
20311 copy_rtx (tmp), 1, OPTAB_DIRECT);
20312 }
20313 else
20314 {
20315 /*
20316 * cmpl op0,op1
20317 * sbbl dest,dest
20318 * [notl dest]
20319 * andl cf - ct, dest
20320 * [addl dest, ct]
20321 *
20322 * Size 8 - 11.
20323 */
20324
20325 if (cf == 0)
20326 {
20327 cf = ct;
20328 ct = 0;
20329 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20330 }
20331
20332 tmp = expand_simple_binop (mode, AND,
20333 copy_rtx (tmp),
20334 gen_int_mode (cf - ct, mode),
20335 copy_rtx (tmp), 1, OPTAB_DIRECT);
20336 if (ct)
20337 tmp = expand_simple_binop (mode, PLUS,
20338 copy_rtx (tmp), GEN_INT (ct),
20339 copy_rtx (tmp), 1, OPTAB_DIRECT);
20340 }
20341
20342 if (!rtx_equal_p (tmp, out))
20343 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20344
20345 return true;
20346 }
20347
20348 if (diff < 0)
20349 {
20350 enum machine_mode cmp_mode = GET_MODE (op0);
20351
20352 HOST_WIDE_INT tmp;
20353 tmp = ct, ct = cf, cf = tmp;
20354 diff = -diff;
20355
20356 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20357 {
20358 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20359
20360 /* We may be reversing unordered compare to normal compare, that
20361 is not valid in general (we may convert non-trapping condition
20362 to trapping one), however on i386 we currently emit all
20363 comparisons unordered. */
20364 compare_code = reverse_condition_maybe_unordered (compare_code);
20365 code = reverse_condition_maybe_unordered (code);
20366 }
20367 else
20368 {
20369 compare_code = reverse_condition (compare_code);
20370 code = reverse_condition (code);
20371 }
20372 }
20373
20374 compare_code = UNKNOWN;
20375 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20376 && CONST_INT_P (op1))
20377 {
20378 if (op1 == const0_rtx
20379 && (code == LT || code == GE))
20380 compare_code = code;
20381 else if (op1 == constm1_rtx)
20382 {
20383 if (code == LE)
20384 compare_code = LT;
20385 else if (code == GT)
20386 compare_code = GE;
20387 }
20388 }
20389
20390 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20391 if (compare_code != UNKNOWN
20392 && GET_MODE (op0) == GET_MODE (out)
20393 && (cf == -1 || ct == -1))
20394 {
20395 /* If lea code below could be used, only optimize
20396 if it results in a 2 insn sequence. */
20397
20398 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20399 || diff == 3 || diff == 5 || diff == 9)
20400 || (compare_code == LT && ct == -1)
20401 || (compare_code == GE && cf == -1))
20402 {
20403 /*
20404 * notl op1 (if necessary)
20405 * sarl $31, op1
20406 * orl cf, op1
20407 */
20408 if (ct != -1)
20409 {
20410 cf = ct;
20411 ct = -1;
20412 code = reverse_condition (code);
20413 }
20414
20415 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20416
20417 out = expand_simple_binop (mode, IOR,
20418 out, GEN_INT (cf),
20419 out, 1, OPTAB_DIRECT);
20420 if (out != operands[0])
20421 emit_move_insn (operands[0], out);
20422
20423 return true;
20424 }
20425 }
20426
20427
20428 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
20429 || diff == 3 || diff == 5 || diff == 9)
20430 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
20431 && (mode != DImode
20432 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
20433 {
20434 /*
20435 * xorl dest,dest
20436 * cmpl op1,op2
20437 * setcc dest
20438 * lea cf(dest*(ct-cf)),dest
20439 *
20440 * Size 14.
20441 *
20442 * This also catches the degenerate setcc-only case.
20443 */
20444
20445 rtx tmp;
20446 int nops;
20447
20448 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20449
20450 nops = 0;
20451 /* On x86_64 the lea instruction operates on Pmode, so we need
20452 to get arithmetics done in proper mode to match. */
20453 if (diff == 1)
20454 tmp = copy_rtx (out);
20455 else
20456 {
20457 rtx out1;
20458 out1 = copy_rtx (out);
20459 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
20460 nops++;
20461 if (diff & 1)
20462 {
20463 tmp = gen_rtx_PLUS (mode, tmp, out1);
20464 nops++;
20465 }
20466 }
20467 if (cf != 0)
20468 {
20469 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
20470 nops++;
20471 }
20472 if (!rtx_equal_p (tmp, out))
20473 {
20474 if (nops == 1)
20475 out = force_operand (tmp, copy_rtx (out));
20476 else
20477 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
20478 }
20479 if (!rtx_equal_p (out, operands[0]))
20480 emit_move_insn (operands[0], copy_rtx (out));
20481
20482 return true;
20483 }
20484
20485 /*
20486 * General case: Jumpful:
20487 * xorl dest,dest cmpl op1, op2
20488 * cmpl op1, op2 movl ct, dest
20489 * setcc dest jcc 1f
20490 * decl dest movl cf, dest
20491 * andl (cf-ct),dest 1:
20492 * addl ct,dest
20493 *
20494 * Size 20. Size 14.
20495 *
20496 * This is reasonably steep, but branch mispredict costs are
20497 * high on modern cpus, so consider failing only if optimizing
20498 * for space.
20499 */
20500
20501 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20502 && BRANCH_COST (optimize_insn_for_speed_p (),
20503 false) >= 2)
20504 {
20505 if (cf == 0)
20506 {
20507 enum machine_mode cmp_mode = GET_MODE (op0);
20508
20509 cf = ct;
20510 ct = 0;
20511
20512 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20513 {
20514 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20515
20516 /* We may be reversing unordered compare to normal compare,
20517 that is not valid in general (we may convert non-trapping
20518 condition to trapping one), however on i386 we currently
20519 emit all comparisons unordered. */
20520 code = reverse_condition_maybe_unordered (code);
20521 }
20522 else
20523 {
20524 code = reverse_condition (code);
20525 if (compare_code != UNKNOWN)
20526 compare_code = reverse_condition (compare_code);
20527 }
20528 }
20529
20530 if (compare_code != UNKNOWN)
20531 {
20532 /* notl op1 (if needed)
20533 sarl $31, op1
20534 andl (cf-ct), op1
20535 addl ct, op1
20536
20537 For x < 0 (resp. x <= -1) there will be no notl,
20538 so if possible swap the constants to get rid of the
20539 complement.
20540 True/false will be -1/0 while code below (store flag
20541 followed by decrement) is 0/-1, so the constants need
20542 to be exchanged once more. */
20543
20544 if (compare_code == GE || !cf)
20545 {
20546 code = reverse_condition (code);
20547 compare_code = LT;
20548 }
20549 else
20550 {
20551 HOST_WIDE_INT tmp = cf;
20552 cf = ct;
20553 ct = tmp;
20554 }
20555
20556 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20557 }
20558 else
20559 {
20560 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20561
20562 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
20563 constm1_rtx,
20564 copy_rtx (out), 1, OPTAB_DIRECT);
20565 }
20566
20567 out = expand_simple_binop (mode, AND, copy_rtx (out),
20568 gen_int_mode (cf - ct, mode),
20569 copy_rtx (out), 1, OPTAB_DIRECT);
20570 if (ct)
20571 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
20572 copy_rtx (out), 1, OPTAB_DIRECT);
20573 if (!rtx_equal_p (out, operands[0]))
20574 emit_move_insn (operands[0], copy_rtx (out));
20575
20576 return true;
20577 }
20578 }
20579
20580 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20581 {
20582 /* Try a few things more with specific constants and a variable. */
20583
20584 optab op;
20585 rtx var, orig_out, out, tmp;
20586
20587 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
20588 return false;
20589
20590 /* If one of the two operands is an interesting constant, load a
20591 constant with the above and mask it in with a logical operation. */
20592
20593 if (CONST_INT_P (operands[2]))
20594 {
20595 var = operands[3];
20596 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
20597 operands[3] = constm1_rtx, op = and_optab;
20598 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
20599 operands[3] = const0_rtx, op = ior_optab;
20600 else
20601 return false;
20602 }
20603 else if (CONST_INT_P (operands[3]))
20604 {
20605 var = operands[2];
20606 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
20607 operands[2] = constm1_rtx, op = and_optab;
20608 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
20609 operands[2] = const0_rtx, op = ior_optab;
20610 else
20611 return false;
20612 }
20613 else
20614 return false;
20615
20616 orig_out = operands[0];
20617 tmp = gen_reg_rtx (mode);
20618 operands[0] = tmp;
20619
20620 /* Recurse to get the constant loaded. */
20621 if (ix86_expand_int_movcc (operands) == 0)
20622 return false;
20623
20624 /* Mask in the interesting variable. */
20625 out = expand_binop (mode, op, var, tmp, orig_out, 0,
20626 OPTAB_WIDEN);
20627 if (!rtx_equal_p (out, orig_out))
20628 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
20629
20630 return true;
20631 }
20632
20633 /*
20634 * For comparison with above,
20635 *
20636 * movl cf,dest
20637 * movl ct,tmp
20638 * cmpl op1,op2
20639 * cmovcc tmp,dest
20640 *
20641 * Size 15.
20642 */
20643
20644 if (! nonimmediate_operand (operands[2], mode))
20645 operands[2] = force_reg (mode, operands[2]);
20646 if (! nonimmediate_operand (operands[3], mode))
20647 operands[3] = force_reg (mode, operands[3]);
20648
20649 if (! register_operand (operands[2], VOIDmode)
20650 && (mode == QImode
20651 || ! register_operand (operands[3], VOIDmode)))
20652 operands[2] = force_reg (mode, operands[2]);
20653
20654 if (mode == QImode
20655 && ! register_operand (operands[3], VOIDmode))
20656 operands[3] = force_reg (mode, operands[3]);
20657
20658 emit_insn (compare_seq);
20659 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
20660 gen_rtx_IF_THEN_ELSE (mode,
20661 compare_op, operands[2],
20662 operands[3])));
20663 return true;
20664 }
20665
20666 /* Swap, force into registers, or otherwise massage the two operands
20667 to an sse comparison with a mask result. Thus we differ a bit from
20668 ix86_prepare_fp_compare_args which expects to produce a flags result.
20669
20670 The DEST operand exists to help determine whether to commute commutative
20671 operators. The POP0/POP1 operands are updated in place. The new
20672 comparison code is returned, or UNKNOWN if not implementable. */
20673
20674 static enum rtx_code
20675 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
20676 rtx *pop0, rtx *pop1)
20677 {
20678 rtx tmp;
20679
20680 switch (code)
20681 {
20682 case LTGT:
20683 case UNEQ:
20684 /* AVX supports all the needed comparisons. */
20685 if (TARGET_AVX)
20686 break;
20687 /* We have no LTGT as an operator. We could implement it with
20688 NE & ORDERED, but this requires an extra temporary. It's
20689 not clear that it's worth it. */
20690 return UNKNOWN;
20691
20692 case LT:
20693 case LE:
20694 case UNGT:
20695 case UNGE:
20696 /* These are supported directly. */
20697 break;
20698
20699 case EQ:
20700 case NE:
20701 case UNORDERED:
20702 case ORDERED:
20703 /* AVX has 3 operand comparisons, no need to swap anything. */
20704 if (TARGET_AVX)
20705 break;
20706 /* For commutative operators, try to canonicalize the destination
20707 operand to be first in the comparison - this helps reload to
20708 avoid extra moves. */
20709 if (!dest || !rtx_equal_p (dest, *pop1))
20710 break;
20711 /* FALLTHRU */
20712
20713 case GE:
20714 case GT:
20715 case UNLE:
20716 case UNLT:
20717 /* These are not supported directly before AVX, and furthermore
20718 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
20719 comparison operands to transform into something that is
20720 supported. */
20721 tmp = *pop0;
20722 *pop0 = *pop1;
20723 *pop1 = tmp;
20724 code = swap_condition (code);
20725 break;
20726
20727 default:
20728 gcc_unreachable ();
20729 }
20730
20731 return code;
20732 }
20733
20734 /* Detect conditional moves that exactly match min/max operational
20735 semantics. Note that this is IEEE safe, as long as we don't
20736 interchange the operands.
20737
20738 Returns FALSE if this conditional move doesn't match a MIN/MAX,
20739 and TRUE if the operation is successful and instructions are emitted. */
20740
20741 static bool
20742 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
20743 rtx cmp_op1, rtx if_true, rtx if_false)
20744 {
20745 enum machine_mode mode;
20746 bool is_min;
20747 rtx tmp;
20748
20749 if (code == LT)
20750 ;
20751 else if (code == UNGE)
20752 {
20753 tmp = if_true;
20754 if_true = if_false;
20755 if_false = tmp;
20756 }
20757 else
20758 return false;
20759
20760 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
20761 is_min = true;
20762 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
20763 is_min = false;
20764 else
20765 return false;
20766
20767 mode = GET_MODE (dest);
20768
20769 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
20770 but MODE may be a vector mode and thus not appropriate. */
20771 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
20772 {
20773 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
20774 rtvec v;
20775
20776 if_true = force_reg (mode, if_true);
20777 v = gen_rtvec (2, if_true, if_false);
20778 tmp = gen_rtx_UNSPEC (mode, v, u);
20779 }
20780 else
20781 {
20782 code = is_min ? SMIN : SMAX;
20783 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
20784 }
20785
20786 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
20787 return true;
20788 }
20789
20790 /* Expand an sse vector comparison. Return the register with the result. */
20791
20792 static rtx
20793 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
20794 rtx op_true, rtx op_false)
20795 {
20796 enum machine_mode mode = GET_MODE (dest);
20797 enum machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
20798
20799 /* In general case result of comparison can differ from operands' type. */
20800 enum machine_mode cmp_mode;
20801
20802 /* In AVX512F the result of comparison is an integer mask. */
20803 bool maskcmp = false;
20804 rtx x;
20805
20806 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
20807 {
20808 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
20809 gcc_assert (cmp_mode != BLKmode);
20810
20811 maskcmp = true;
20812 }
20813 else
20814 cmp_mode = cmp_ops_mode;
20815
20816
20817 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
20818 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
20819 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
20820
20821 if (optimize
20822 || reg_overlap_mentioned_p (dest, op_true)
20823 || reg_overlap_mentioned_p (dest, op_false))
20824 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
20825
20826 /* Compare patterns for int modes are unspec in AVX512F only. */
20827 if (maskcmp && (code == GT || code == EQ))
20828 {
20829 rtx (*gen)(rtx, rtx, rtx);
20830
20831 switch (cmp_ops_mode)
20832 {
20833 case V16SImode:
20834 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
20835 break;
20836 case V8DImode:
20837 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
20838 break;
20839 default:
20840 gen = NULL;
20841 }
20842
20843 if (gen)
20844 {
20845 emit_insn (gen (dest, cmp_op0, cmp_op1));
20846 return dest;
20847 }
20848 }
20849 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
20850
20851 if (cmp_mode != mode && !maskcmp)
20852 {
20853 x = force_reg (cmp_ops_mode, x);
20854 convert_move (dest, x, false);
20855 }
20856 else
20857 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20858
20859 return dest;
20860 }
20861
20862 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
20863 operations. This is used for both scalar and vector conditional moves. */
20864
20865 static void
20866 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
20867 {
20868 enum machine_mode mode = GET_MODE (dest);
20869 enum machine_mode cmpmode = GET_MODE (cmp);
20870
20871 /* In AVX512F the result of comparison is an integer mask. */
20872 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
20873
20874 rtx t2, t3, x;
20875
20876 if (vector_all_ones_operand (op_true, mode)
20877 && rtx_equal_p (op_false, CONST0_RTX (mode))
20878 && !maskcmp)
20879 {
20880 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
20881 }
20882 else if (op_false == CONST0_RTX (mode)
20883 && !maskcmp)
20884 {
20885 op_true = force_reg (mode, op_true);
20886 x = gen_rtx_AND (mode, cmp, op_true);
20887 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20888 }
20889 else if (op_true == CONST0_RTX (mode)
20890 && !maskcmp)
20891 {
20892 op_false = force_reg (mode, op_false);
20893 x = gen_rtx_NOT (mode, cmp);
20894 x = gen_rtx_AND (mode, x, op_false);
20895 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20896 }
20897 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
20898 && !maskcmp)
20899 {
20900 op_false = force_reg (mode, op_false);
20901 x = gen_rtx_IOR (mode, cmp, op_false);
20902 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20903 }
20904 else if (TARGET_XOP
20905 && !maskcmp)
20906 {
20907 op_true = force_reg (mode, op_true);
20908
20909 if (!nonimmediate_operand (op_false, mode))
20910 op_false = force_reg (mode, op_false);
20911
20912 emit_insn (gen_rtx_SET (mode, dest,
20913 gen_rtx_IF_THEN_ELSE (mode, cmp,
20914 op_true,
20915 op_false)));
20916 }
20917 else
20918 {
20919 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
20920 rtx d = dest;
20921
20922 if (!nonimmediate_operand (op_true, mode))
20923 op_true = force_reg (mode, op_true);
20924
20925 op_false = force_reg (mode, op_false);
20926
20927 switch (mode)
20928 {
20929 case V4SFmode:
20930 if (TARGET_SSE4_1)
20931 gen = gen_sse4_1_blendvps;
20932 break;
20933 case V2DFmode:
20934 if (TARGET_SSE4_1)
20935 gen = gen_sse4_1_blendvpd;
20936 break;
20937 case V16QImode:
20938 case V8HImode:
20939 case V4SImode:
20940 case V2DImode:
20941 if (TARGET_SSE4_1)
20942 {
20943 gen = gen_sse4_1_pblendvb;
20944 if (mode != V16QImode)
20945 d = gen_reg_rtx (V16QImode);
20946 op_false = gen_lowpart (V16QImode, op_false);
20947 op_true = gen_lowpart (V16QImode, op_true);
20948 cmp = gen_lowpart (V16QImode, cmp);
20949 }
20950 break;
20951 case V8SFmode:
20952 if (TARGET_AVX)
20953 gen = gen_avx_blendvps256;
20954 break;
20955 case V4DFmode:
20956 if (TARGET_AVX)
20957 gen = gen_avx_blendvpd256;
20958 break;
20959 case V32QImode:
20960 case V16HImode:
20961 case V8SImode:
20962 case V4DImode:
20963 if (TARGET_AVX2)
20964 {
20965 gen = gen_avx2_pblendvb;
20966 if (mode != V32QImode)
20967 d = gen_reg_rtx (V32QImode);
20968 op_false = gen_lowpart (V32QImode, op_false);
20969 op_true = gen_lowpart (V32QImode, op_true);
20970 cmp = gen_lowpart (V32QImode, cmp);
20971 }
20972 break;
20973
20974 case V16SImode:
20975 gen = gen_avx512f_blendmv16si;
20976 break;
20977 case V8DImode:
20978 gen = gen_avx512f_blendmv8di;
20979 break;
20980 case V8DFmode:
20981 gen = gen_avx512f_blendmv8df;
20982 break;
20983 case V16SFmode:
20984 gen = gen_avx512f_blendmv16sf;
20985 break;
20986
20987 default:
20988 break;
20989 }
20990
20991 if (gen != NULL)
20992 {
20993 emit_insn (gen (d, op_false, op_true, cmp));
20994 if (d != dest)
20995 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
20996 }
20997 else
20998 {
20999 op_true = force_reg (mode, op_true);
21000
21001 t2 = gen_reg_rtx (mode);
21002 if (optimize)
21003 t3 = gen_reg_rtx (mode);
21004 else
21005 t3 = dest;
21006
21007 x = gen_rtx_AND (mode, op_true, cmp);
21008 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21009
21010 x = gen_rtx_NOT (mode, cmp);
21011 x = gen_rtx_AND (mode, x, op_false);
21012 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21013
21014 x = gen_rtx_IOR (mode, t3, t2);
21015 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21016 }
21017 }
21018 }
21019
21020 /* Expand a floating-point conditional move. Return true if successful. */
21021
21022 bool
21023 ix86_expand_fp_movcc (rtx operands[])
21024 {
21025 enum machine_mode mode = GET_MODE (operands[0]);
21026 enum rtx_code code = GET_CODE (operands[1]);
21027 rtx tmp, compare_op;
21028 rtx op0 = XEXP (operands[1], 0);
21029 rtx op1 = XEXP (operands[1], 1);
21030
21031 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21032 {
21033 enum machine_mode cmode;
21034
21035 /* Since we've no cmove for sse registers, don't force bad register
21036 allocation just to gain access to it. Deny movcc when the
21037 comparison mode doesn't match the move mode. */
21038 cmode = GET_MODE (op0);
21039 if (cmode == VOIDmode)
21040 cmode = GET_MODE (op1);
21041 if (cmode != mode)
21042 return false;
21043
21044 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21045 if (code == UNKNOWN)
21046 return false;
21047
21048 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21049 operands[2], operands[3]))
21050 return true;
21051
21052 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21053 operands[2], operands[3]);
21054 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21055 return true;
21056 }
21057
21058 if (GET_MODE (op0) == TImode
21059 || (GET_MODE (op0) == DImode
21060 && !TARGET_64BIT))
21061 return false;
21062
21063 /* The floating point conditional move instructions don't directly
21064 support conditions resulting from a signed integer comparison. */
21065
21066 compare_op = ix86_expand_compare (code, op0, op1);
21067 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21068 {
21069 tmp = gen_reg_rtx (QImode);
21070 ix86_expand_setcc (tmp, code, op0, op1);
21071
21072 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21073 }
21074
21075 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21076 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21077 operands[2], operands[3])));
21078
21079 return true;
21080 }
21081
21082 /* Expand a floating-point vector conditional move; a vcond operation
21083 rather than a movcc operation. */
21084
21085 bool
21086 ix86_expand_fp_vcond (rtx operands[])
21087 {
21088 enum rtx_code code = GET_CODE (operands[3]);
21089 rtx cmp;
21090
21091 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21092 &operands[4], &operands[5]);
21093 if (code == UNKNOWN)
21094 {
21095 rtx temp;
21096 switch (GET_CODE (operands[3]))
21097 {
21098 case LTGT:
21099 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21100 operands[5], operands[0], operands[0]);
21101 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21102 operands[5], operands[1], operands[2]);
21103 code = AND;
21104 break;
21105 case UNEQ:
21106 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21107 operands[5], operands[0], operands[0]);
21108 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21109 operands[5], operands[1], operands[2]);
21110 code = IOR;
21111 break;
21112 default:
21113 gcc_unreachable ();
21114 }
21115 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21116 OPTAB_DIRECT);
21117 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21118 return true;
21119 }
21120
21121 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21122 operands[5], operands[1], operands[2]))
21123 return true;
21124
21125 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21126 operands[1], operands[2]);
21127 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21128 return true;
21129 }
21130
21131 /* Expand a signed/unsigned integral vector conditional move. */
21132
21133 bool
21134 ix86_expand_int_vcond (rtx operands[])
21135 {
21136 enum machine_mode data_mode = GET_MODE (operands[0]);
21137 enum machine_mode mode = GET_MODE (operands[4]);
21138 enum rtx_code code = GET_CODE (operands[3]);
21139 bool negate = false;
21140 rtx x, cop0, cop1;
21141
21142 cop0 = operands[4];
21143 cop1 = operands[5];
21144
21145 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21146 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21147 if ((code == LT || code == GE)
21148 && data_mode == mode
21149 && cop1 == CONST0_RTX (mode)
21150 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21151 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21152 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21153 && (GET_MODE_SIZE (data_mode) == 16
21154 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21155 {
21156 rtx negop = operands[2 - (code == LT)];
21157 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21158 if (negop == CONST1_RTX (data_mode))
21159 {
21160 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21161 operands[0], 1, OPTAB_DIRECT);
21162 if (res != operands[0])
21163 emit_move_insn (operands[0], res);
21164 return true;
21165 }
21166 else if (GET_MODE_INNER (data_mode) != DImode
21167 && vector_all_ones_operand (negop, data_mode))
21168 {
21169 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21170 operands[0], 0, OPTAB_DIRECT);
21171 if (res != operands[0])
21172 emit_move_insn (operands[0], res);
21173 return true;
21174 }
21175 }
21176
21177 if (!nonimmediate_operand (cop1, mode))
21178 cop1 = force_reg (mode, cop1);
21179 if (!general_operand (operands[1], data_mode))
21180 operands[1] = force_reg (data_mode, operands[1]);
21181 if (!general_operand (operands[2], data_mode))
21182 operands[2] = force_reg (data_mode, operands[2]);
21183
21184 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21185 if (TARGET_XOP
21186 && (mode == V16QImode || mode == V8HImode
21187 || mode == V4SImode || mode == V2DImode))
21188 ;
21189 else
21190 {
21191 /* Canonicalize the comparison to EQ, GT, GTU. */
21192 switch (code)
21193 {
21194 case EQ:
21195 case GT:
21196 case GTU:
21197 break;
21198
21199 case NE:
21200 case LE:
21201 case LEU:
21202 code = reverse_condition (code);
21203 negate = true;
21204 break;
21205
21206 case GE:
21207 case GEU:
21208 code = reverse_condition (code);
21209 negate = true;
21210 /* FALLTHRU */
21211
21212 case LT:
21213 case LTU:
21214 code = swap_condition (code);
21215 x = cop0, cop0 = cop1, cop1 = x;
21216 break;
21217
21218 default:
21219 gcc_unreachable ();
21220 }
21221
21222 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21223 if (mode == V2DImode)
21224 {
21225 switch (code)
21226 {
21227 case EQ:
21228 /* SSE4.1 supports EQ. */
21229 if (!TARGET_SSE4_1)
21230 return false;
21231 break;
21232
21233 case GT:
21234 case GTU:
21235 /* SSE4.2 supports GT/GTU. */
21236 if (!TARGET_SSE4_2)
21237 return false;
21238 break;
21239
21240 default:
21241 gcc_unreachable ();
21242 }
21243 }
21244
21245 /* Unsigned parallel compare is not supported by the hardware.
21246 Play some tricks to turn this into a signed comparison
21247 against 0. */
21248 if (code == GTU)
21249 {
21250 cop0 = force_reg (mode, cop0);
21251
21252 switch (mode)
21253 {
21254 case V16SImode:
21255 case V8DImode:
21256 case V8SImode:
21257 case V4DImode:
21258 case V4SImode:
21259 case V2DImode:
21260 {
21261 rtx t1, t2, mask;
21262 rtx (*gen_sub3) (rtx, rtx, rtx);
21263
21264 switch (mode)
21265 {
21266 case V16SImode: gen_sub3 = gen_subv16si3; break;
21267 case V8DImode: gen_sub3 = gen_subv8di3; break;
21268 case V8SImode: gen_sub3 = gen_subv8si3; break;
21269 case V4DImode: gen_sub3 = gen_subv4di3; break;
21270 case V4SImode: gen_sub3 = gen_subv4si3; break;
21271 case V2DImode: gen_sub3 = gen_subv2di3; break;
21272 default:
21273 gcc_unreachable ();
21274 }
21275 /* Subtract (-(INT MAX) - 1) from both operands to make
21276 them signed. */
21277 mask = ix86_build_signbit_mask (mode, true, false);
21278 t1 = gen_reg_rtx (mode);
21279 emit_insn (gen_sub3 (t1, cop0, mask));
21280
21281 t2 = gen_reg_rtx (mode);
21282 emit_insn (gen_sub3 (t2, cop1, mask));
21283
21284 cop0 = t1;
21285 cop1 = t2;
21286 code = GT;
21287 }
21288 break;
21289
21290 case V32QImode:
21291 case V16HImode:
21292 case V16QImode:
21293 case V8HImode:
21294 /* Perform a parallel unsigned saturating subtraction. */
21295 x = gen_reg_rtx (mode);
21296 emit_insn (gen_rtx_SET (VOIDmode, x,
21297 gen_rtx_US_MINUS (mode, cop0, cop1)));
21298
21299 cop0 = x;
21300 cop1 = CONST0_RTX (mode);
21301 code = EQ;
21302 negate = !negate;
21303 break;
21304
21305 default:
21306 gcc_unreachable ();
21307 }
21308 }
21309 }
21310
21311 /* Allow the comparison to be done in one mode, but the movcc to
21312 happen in another mode. */
21313 if (data_mode == mode)
21314 {
21315 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21316 operands[1+negate], operands[2-negate]);
21317 }
21318 else
21319 {
21320 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21321 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21322 operands[1+negate], operands[2-negate]);
21323 if (GET_MODE (x) == mode)
21324 x = gen_lowpart (data_mode, x);
21325 }
21326
21327 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21328 operands[2-negate]);
21329 return true;
21330 }
21331
21332 static bool
21333 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1)
21334 {
21335 enum machine_mode mode = GET_MODE (op0);
21336 switch (mode)
21337 {
21338 case V16SImode:
21339 emit_insn (gen_avx512f_vpermi2varv16si3 (target, op0,
21340 force_reg (V16SImode, mask),
21341 op1));
21342 return true;
21343 case V16SFmode:
21344 emit_insn (gen_avx512f_vpermi2varv16sf3 (target, op0,
21345 force_reg (V16SImode, mask),
21346 op1));
21347 return true;
21348 case V8DImode:
21349 emit_insn (gen_avx512f_vpermi2varv8di3 (target, op0,
21350 force_reg (V8DImode, mask), op1));
21351 return true;
21352 case V8DFmode:
21353 emit_insn (gen_avx512f_vpermi2varv8df3 (target, op0,
21354 force_reg (V8DImode, mask), op1));
21355 return true;
21356 default:
21357 return false;
21358 }
21359 }
21360
21361 /* Expand a variable vector permutation. */
21362
21363 void
21364 ix86_expand_vec_perm (rtx operands[])
21365 {
21366 rtx target = operands[0];
21367 rtx op0 = operands[1];
21368 rtx op1 = operands[2];
21369 rtx mask = operands[3];
21370 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
21371 enum machine_mode mode = GET_MODE (op0);
21372 enum machine_mode maskmode = GET_MODE (mask);
21373 int w, e, i;
21374 bool one_operand_shuffle = rtx_equal_p (op0, op1);
21375
21376 /* Number of elements in the vector. */
21377 w = GET_MODE_NUNITS (mode);
21378 e = GET_MODE_UNIT_SIZE (mode);
21379 gcc_assert (w <= 64);
21380
21381 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1))
21382 return;
21383
21384 if (TARGET_AVX2)
21385 {
21386 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
21387 {
21388 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
21389 an constant shuffle operand. With a tiny bit of effort we can
21390 use VPERMD instead. A re-interpretation stall for V4DFmode is
21391 unfortunate but there's no avoiding it.
21392 Similarly for V16HImode we don't have instructions for variable
21393 shuffling, while for V32QImode we can use after preparing suitable
21394 masks vpshufb; vpshufb; vpermq; vpor. */
21395
21396 if (mode == V16HImode)
21397 {
21398 maskmode = mode = V32QImode;
21399 w = 32;
21400 e = 1;
21401 }
21402 else
21403 {
21404 maskmode = mode = V8SImode;
21405 w = 8;
21406 e = 4;
21407 }
21408 t1 = gen_reg_rtx (maskmode);
21409
21410 /* Replicate the low bits of the V4DImode mask into V8SImode:
21411 mask = { A B C D }
21412 t1 = { A A B B C C D D }. */
21413 for (i = 0; i < w / 2; ++i)
21414 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
21415 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21416 vt = force_reg (maskmode, vt);
21417 mask = gen_lowpart (maskmode, mask);
21418 if (maskmode == V8SImode)
21419 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
21420 else
21421 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
21422
21423 /* Multiply the shuffle indicies by two. */
21424 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
21425 OPTAB_DIRECT);
21426
21427 /* Add one to the odd shuffle indicies:
21428 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
21429 for (i = 0; i < w / 2; ++i)
21430 {
21431 vec[i * 2] = const0_rtx;
21432 vec[i * 2 + 1] = const1_rtx;
21433 }
21434 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21435 vt = validize_mem (force_const_mem (maskmode, vt));
21436 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
21437 OPTAB_DIRECT);
21438
21439 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
21440 operands[3] = mask = t1;
21441 target = gen_reg_rtx (mode);
21442 op0 = gen_lowpart (mode, op0);
21443 op1 = gen_lowpart (mode, op1);
21444 }
21445
21446 switch (mode)
21447 {
21448 case V8SImode:
21449 /* The VPERMD and VPERMPS instructions already properly ignore
21450 the high bits of the shuffle elements. No need for us to
21451 perform an AND ourselves. */
21452 if (one_operand_shuffle)
21453 {
21454 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
21455 if (target != operands[0])
21456 emit_move_insn (operands[0],
21457 gen_lowpart (GET_MODE (operands[0]), target));
21458 }
21459 else
21460 {
21461 t1 = gen_reg_rtx (V8SImode);
21462 t2 = gen_reg_rtx (V8SImode);
21463 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
21464 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
21465 goto merge_two;
21466 }
21467 return;
21468
21469 case V8SFmode:
21470 mask = gen_lowpart (V8SFmode, mask);
21471 if (one_operand_shuffle)
21472 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
21473 else
21474 {
21475 t1 = gen_reg_rtx (V8SFmode);
21476 t2 = gen_reg_rtx (V8SFmode);
21477 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
21478 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
21479 goto merge_two;
21480 }
21481 return;
21482
21483 case V4SImode:
21484 /* By combining the two 128-bit input vectors into one 256-bit
21485 input vector, we can use VPERMD and VPERMPS for the full
21486 two-operand shuffle. */
21487 t1 = gen_reg_rtx (V8SImode);
21488 t2 = gen_reg_rtx (V8SImode);
21489 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
21490 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21491 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
21492 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
21493 return;
21494
21495 case V4SFmode:
21496 t1 = gen_reg_rtx (V8SFmode);
21497 t2 = gen_reg_rtx (V8SImode);
21498 mask = gen_lowpart (V4SImode, mask);
21499 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
21500 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21501 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
21502 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
21503 return;
21504
21505 case V32QImode:
21506 t1 = gen_reg_rtx (V32QImode);
21507 t2 = gen_reg_rtx (V32QImode);
21508 t3 = gen_reg_rtx (V32QImode);
21509 vt2 = GEN_INT (128);
21510 for (i = 0; i < 32; i++)
21511 vec[i] = vt2;
21512 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21513 vt = force_reg (V32QImode, vt);
21514 for (i = 0; i < 32; i++)
21515 vec[i] = i < 16 ? vt2 : const0_rtx;
21516 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21517 vt2 = force_reg (V32QImode, vt2);
21518 /* From mask create two adjusted masks, which contain the same
21519 bits as mask in the low 7 bits of each vector element.
21520 The first mask will have the most significant bit clear
21521 if it requests element from the same 128-bit lane
21522 and MSB set if it requests element from the other 128-bit lane.
21523 The second mask will have the opposite values of the MSB,
21524 and additionally will have its 128-bit lanes swapped.
21525 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
21526 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
21527 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
21528 stands for other 12 bytes. */
21529 /* The bit whether element is from the same lane or the other
21530 lane is bit 4, so shift it up by 3 to the MSB position. */
21531 t5 = gen_reg_rtx (V4DImode);
21532 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
21533 GEN_INT (3)));
21534 /* Clear MSB bits from the mask just in case it had them set. */
21535 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
21536 /* After this t1 will have MSB set for elements from other lane. */
21537 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
21538 /* Clear bits other than MSB. */
21539 emit_insn (gen_andv32qi3 (t1, t1, vt));
21540 /* Or in the lower bits from mask into t3. */
21541 emit_insn (gen_iorv32qi3 (t3, t1, t2));
21542 /* And invert MSB bits in t1, so MSB is set for elements from the same
21543 lane. */
21544 emit_insn (gen_xorv32qi3 (t1, t1, vt));
21545 /* Swap 128-bit lanes in t3. */
21546 t6 = gen_reg_rtx (V4DImode);
21547 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
21548 const2_rtx, GEN_INT (3),
21549 const0_rtx, const1_rtx));
21550 /* And or in the lower bits from mask into t1. */
21551 emit_insn (gen_iorv32qi3 (t1, t1, t2));
21552 if (one_operand_shuffle)
21553 {
21554 /* Each of these shuffles will put 0s in places where
21555 element from the other 128-bit lane is needed, otherwise
21556 will shuffle in the requested value. */
21557 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
21558 gen_lowpart (V32QImode, t6)));
21559 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
21560 /* For t3 the 128-bit lanes are swapped again. */
21561 t7 = gen_reg_rtx (V4DImode);
21562 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
21563 const2_rtx, GEN_INT (3),
21564 const0_rtx, const1_rtx));
21565 /* And oring both together leads to the result. */
21566 emit_insn (gen_iorv32qi3 (target, t1,
21567 gen_lowpart (V32QImode, t7)));
21568 if (target != operands[0])
21569 emit_move_insn (operands[0],
21570 gen_lowpart (GET_MODE (operands[0]), target));
21571 return;
21572 }
21573
21574 t4 = gen_reg_rtx (V32QImode);
21575 /* Similarly to the above one_operand_shuffle code,
21576 just for repeated twice for each operand. merge_two:
21577 code will merge the two results together. */
21578 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
21579 gen_lowpart (V32QImode, t6)));
21580 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
21581 gen_lowpart (V32QImode, t6)));
21582 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
21583 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
21584 t7 = gen_reg_rtx (V4DImode);
21585 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
21586 const2_rtx, GEN_INT (3),
21587 const0_rtx, const1_rtx));
21588 t8 = gen_reg_rtx (V4DImode);
21589 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
21590 const2_rtx, GEN_INT (3),
21591 const0_rtx, const1_rtx));
21592 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
21593 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
21594 t1 = t4;
21595 t2 = t3;
21596 goto merge_two;
21597
21598 default:
21599 gcc_assert (GET_MODE_SIZE (mode) <= 16);
21600 break;
21601 }
21602 }
21603
21604 if (TARGET_XOP)
21605 {
21606 /* The XOP VPPERM insn supports three inputs. By ignoring the
21607 one_operand_shuffle special case, we avoid creating another
21608 set of constant vectors in memory. */
21609 one_operand_shuffle = false;
21610
21611 /* mask = mask & {2*w-1, ...} */
21612 vt = GEN_INT (2*w - 1);
21613 }
21614 else
21615 {
21616 /* mask = mask & {w-1, ...} */
21617 vt = GEN_INT (w - 1);
21618 }
21619
21620 for (i = 0; i < w; i++)
21621 vec[i] = vt;
21622 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21623 mask = expand_simple_binop (maskmode, AND, mask, vt,
21624 NULL_RTX, 0, OPTAB_DIRECT);
21625
21626 /* For non-QImode operations, convert the word permutation control
21627 into a byte permutation control. */
21628 if (mode != V16QImode)
21629 {
21630 mask = expand_simple_binop (maskmode, ASHIFT, mask,
21631 GEN_INT (exact_log2 (e)),
21632 NULL_RTX, 0, OPTAB_DIRECT);
21633
21634 /* Convert mask to vector of chars. */
21635 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
21636
21637 /* Replicate each of the input bytes into byte positions:
21638 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
21639 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
21640 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
21641 for (i = 0; i < 16; ++i)
21642 vec[i] = GEN_INT (i/e * e);
21643 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21644 vt = validize_mem (force_const_mem (V16QImode, vt));
21645 if (TARGET_XOP)
21646 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
21647 else
21648 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
21649
21650 /* Convert it into the byte positions by doing
21651 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
21652 for (i = 0; i < 16; ++i)
21653 vec[i] = GEN_INT (i % e);
21654 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21655 vt = validize_mem (force_const_mem (V16QImode, vt));
21656 emit_insn (gen_addv16qi3 (mask, mask, vt));
21657 }
21658
21659 /* The actual shuffle operations all operate on V16QImode. */
21660 op0 = gen_lowpart (V16QImode, op0);
21661 op1 = gen_lowpart (V16QImode, op1);
21662
21663 if (TARGET_XOP)
21664 {
21665 if (GET_MODE (target) != V16QImode)
21666 target = gen_reg_rtx (V16QImode);
21667 emit_insn (gen_xop_pperm (target, op0, op1, mask));
21668 if (target != operands[0])
21669 emit_move_insn (operands[0],
21670 gen_lowpart (GET_MODE (operands[0]), target));
21671 }
21672 else if (one_operand_shuffle)
21673 {
21674 if (GET_MODE (target) != V16QImode)
21675 target = gen_reg_rtx (V16QImode);
21676 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
21677 if (target != operands[0])
21678 emit_move_insn (operands[0],
21679 gen_lowpart (GET_MODE (operands[0]), target));
21680 }
21681 else
21682 {
21683 rtx xops[6];
21684 bool ok;
21685
21686 /* Shuffle the two input vectors independently. */
21687 t1 = gen_reg_rtx (V16QImode);
21688 t2 = gen_reg_rtx (V16QImode);
21689 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
21690 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
21691
21692 merge_two:
21693 /* Then merge them together. The key is whether any given control
21694 element contained a bit set that indicates the second word. */
21695 mask = operands[3];
21696 vt = GEN_INT (w);
21697 if (maskmode == V2DImode && !TARGET_SSE4_1)
21698 {
21699 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
21700 more shuffle to convert the V2DI input mask into a V4SI
21701 input mask. At which point the masking that expand_int_vcond
21702 will work as desired. */
21703 rtx t3 = gen_reg_rtx (V4SImode);
21704 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
21705 const0_rtx, const0_rtx,
21706 const2_rtx, const2_rtx));
21707 mask = t3;
21708 maskmode = V4SImode;
21709 e = w = 4;
21710 }
21711
21712 for (i = 0; i < w; i++)
21713 vec[i] = vt;
21714 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21715 vt = force_reg (maskmode, vt);
21716 mask = expand_simple_binop (maskmode, AND, mask, vt,
21717 NULL_RTX, 0, OPTAB_DIRECT);
21718
21719 if (GET_MODE (target) != mode)
21720 target = gen_reg_rtx (mode);
21721 xops[0] = target;
21722 xops[1] = gen_lowpart (mode, t2);
21723 xops[2] = gen_lowpart (mode, t1);
21724 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
21725 xops[4] = mask;
21726 xops[5] = vt;
21727 ok = ix86_expand_int_vcond (xops);
21728 gcc_assert (ok);
21729 if (target != operands[0])
21730 emit_move_insn (operands[0],
21731 gen_lowpart (GET_MODE (operands[0]), target));
21732 }
21733 }
21734
21735 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
21736 true if we should do zero extension, else sign extension. HIGH_P is
21737 true if we want the N/2 high elements, else the low elements. */
21738
21739 void
21740 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
21741 {
21742 enum machine_mode imode = GET_MODE (src);
21743 rtx tmp;
21744
21745 if (TARGET_SSE4_1)
21746 {
21747 rtx (*unpack)(rtx, rtx);
21748 rtx (*extract)(rtx, rtx) = NULL;
21749 enum machine_mode halfmode = BLKmode;
21750
21751 switch (imode)
21752 {
21753 case V32QImode:
21754 if (unsigned_p)
21755 unpack = gen_avx2_zero_extendv16qiv16hi2;
21756 else
21757 unpack = gen_avx2_sign_extendv16qiv16hi2;
21758 halfmode = V16QImode;
21759 extract
21760 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
21761 break;
21762 case V32HImode:
21763 if (unsigned_p)
21764 unpack = gen_avx512f_zero_extendv16hiv16si2;
21765 else
21766 unpack = gen_avx512f_sign_extendv16hiv16si2;
21767 halfmode = V16HImode;
21768 extract
21769 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
21770 break;
21771 case V16HImode:
21772 if (unsigned_p)
21773 unpack = gen_avx2_zero_extendv8hiv8si2;
21774 else
21775 unpack = gen_avx2_sign_extendv8hiv8si2;
21776 halfmode = V8HImode;
21777 extract
21778 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
21779 break;
21780 case V16SImode:
21781 if (unsigned_p)
21782 unpack = gen_avx512f_zero_extendv8siv8di2;
21783 else
21784 unpack = gen_avx512f_sign_extendv8siv8di2;
21785 halfmode = V8SImode;
21786 extract
21787 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
21788 break;
21789 case V8SImode:
21790 if (unsigned_p)
21791 unpack = gen_avx2_zero_extendv4siv4di2;
21792 else
21793 unpack = gen_avx2_sign_extendv4siv4di2;
21794 halfmode = V4SImode;
21795 extract
21796 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
21797 break;
21798 case V16QImode:
21799 if (unsigned_p)
21800 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
21801 else
21802 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
21803 break;
21804 case V8HImode:
21805 if (unsigned_p)
21806 unpack = gen_sse4_1_zero_extendv4hiv4si2;
21807 else
21808 unpack = gen_sse4_1_sign_extendv4hiv4si2;
21809 break;
21810 case V4SImode:
21811 if (unsigned_p)
21812 unpack = gen_sse4_1_zero_extendv2siv2di2;
21813 else
21814 unpack = gen_sse4_1_sign_extendv2siv2di2;
21815 break;
21816 default:
21817 gcc_unreachable ();
21818 }
21819
21820 if (GET_MODE_SIZE (imode) >= 32)
21821 {
21822 tmp = gen_reg_rtx (halfmode);
21823 emit_insn (extract (tmp, src));
21824 }
21825 else if (high_p)
21826 {
21827 /* Shift higher 8 bytes to lower 8 bytes. */
21828 tmp = gen_reg_rtx (V1TImode);
21829 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
21830 GEN_INT (64)));
21831 tmp = gen_lowpart (imode, tmp);
21832 }
21833 else
21834 tmp = src;
21835
21836 emit_insn (unpack (dest, tmp));
21837 }
21838 else
21839 {
21840 rtx (*unpack)(rtx, rtx, rtx);
21841
21842 switch (imode)
21843 {
21844 case V16QImode:
21845 if (high_p)
21846 unpack = gen_vec_interleave_highv16qi;
21847 else
21848 unpack = gen_vec_interleave_lowv16qi;
21849 break;
21850 case V8HImode:
21851 if (high_p)
21852 unpack = gen_vec_interleave_highv8hi;
21853 else
21854 unpack = gen_vec_interleave_lowv8hi;
21855 break;
21856 case V4SImode:
21857 if (high_p)
21858 unpack = gen_vec_interleave_highv4si;
21859 else
21860 unpack = gen_vec_interleave_lowv4si;
21861 break;
21862 default:
21863 gcc_unreachable ();
21864 }
21865
21866 if (unsigned_p)
21867 tmp = force_reg (imode, CONST0_RTX (imode));
21868 else
21869 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
21870 src, pc_rtx, pc_rtx);
21871
21872 rtx tmp2 = gen_reg_rtx (imode);
21873 emit_insn (unpack (tmp2, src, tmp));
21874 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
21875 }
21876 }
21877
21878 /* Expand conditional increment or decrement using adb/sbb instructions.
21879 The default case using setcc followed by the conditional move can be
21880 done by generic code. */
21881 bool
21882 ix86_expand_int_addcc (rtx operands[])
21883 {
21884 enum rtx_code code = GET_CODE (operands[1]);
21885 rtx flags;
21886 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
21887 rtx compare_op;
21888 rtx val = const0_rtx;
21889 bool fpcmp = false;
21890 enum machine_mode mode;
21891 rtx op0 = XEXP (operands[1], 0);
21892 rtx op1 = XEXP (operands[1], 1);
21893
21894 if (operands[3] != const1_rtx
21895 && operands[3] != constm1_rtx)
21896 return false;
21897 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
21898 return false;
21899 code = GET_CODE (compare_op);
21900
21901 flags = XEXP (compare_op, 0);
21902
21903 if (GET_MODE (flags) == CCFPmode
21904 || GET_MODE (flags) == CCFPUmode)
21905 {
21906 fpcmp = true;
21907 code = ix86_fp_compare_code_to_integer (code);
21908 }
21909
21910 if (code != LTU)
21911 {
21912 val = constm1_rtx;
21913 if (fpcmp)
21914 PUT_CODE (compare_op,
21915 reverse_condition_maybe_unordered
21916 (GET_CODE (compare_op)));
21917 else
21918 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
21919 }
21920
21921 mode = GET_MODE (operands[0]);
21922
21923 /* Construct either adc or sbb insn. */
21924 if ((code == LTU) == (operands[3] == constm1_rtx))
21925 {
21926 switch (mode)
21927 {
21928 case QImode:
21929 insn = gen_subqi3_carry;
21930 break;
21931 case HImode:
21932 insn = gen_subhi3_carry;
21933 break;
21934 case SImode:
21935 insn = gen_subsi3_carry;
21936 break;
21937 case DImode:
21938 insn = gen_subdi3_carry;
21939 break;
21940 default:
21941 gcc_unreachable ();
21942 }
21943 }
21944 else
21945 {
21946 switch (mode)
21947 {
21948 case QImode:
21949 insn = gen_addqi3_carry;
21950 break;
21951 case HImode:
21952 insn = gen_addhi3_carry;
21953 break;
21954 case SImode:
21955 insn = gen_addsi3_carry;
21956 break;
21957 case DImode:
21958 insn = gen_adddi3_carry;
21959 break;
21960 default:
21961 gcc_unreachable ();
21962 }
21963 }
21964 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
21965
21966 return true;
21967 }
21968
21969
21970 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
21971 but works for floating pointer parameters and nonoffsetable memories.
21972 For pushes, it returns just stack offsets; the values will be saved
21973 in the right order. Maximally three parts are generated. */
21974
21975 static int
21976 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
21977 {
21978 int size;
21979
21980 if (!TARGET_64BIT)
21981 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
21982 else
21983 size = (GET_MODE_SIZE (mode) + 4) / 8;
21984
21985 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
21986 gcc_assert (size >= 2 && size <= 4);
21987
21988 /* Optimize constant pool reference to immediates. This is used by fp
21989 moves, that force all constants to memory to allow combining. */
21990 if (MEM_P (operand) && MEM_READONLY_P (operand))
21991 {
21992 rtx tmp = maybe_get_pool_constant (operand);
21993 if (tmp)
21994 operand = tmp;
21995 }
21996
21997 if (MEM_P (operand) && !offsettable_memref_p (operand))
21998 {
21999 /* The only non-offsetable memories we handle are pushes. */
22000 int ok = push_operand (operand, VOIDmode);
22001
22002 gcc_assert (ok);
22003
22004 operand = copy_rtx (operand);
22005 PUT_MODE (operand, word_mode);
22006 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22007 return size;
22008 }
22009
22010 if (GET_CODE (operand) == CONST_VECTOR)
22011 {
22012 enum machine_mode imode = int_mode_for_mode (mode);
22013 /* Caution: if we looked through a constant pool memory above,
22014 the operand may actually have a different mode now. That's
22015 ok, since we want to pun this all the way back to an integer. */
22016 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22017 gcc_assert (operand != NULL);
22018 mode = imode;
22019 }
22020
22021 if (!TARGET_64BIT)
22022 {
22023 if (mode == DImode)
22024 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22025 else
22026 {
22027 int i;
22028
22029 if (REG_P (operand))
22030 {
22031 gcc_assert (reload_completed);
22032 for (i = 0; i < size; i++)
22033 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22034 }
22035 else if (offsettable_memref_p (operand))
22036 {
22037 operand = adjust_address (operand, SImode, 0);
22038 parts[0] = operand;
22039 for (i = 1; i < size; i++)
22040 parts[i] = adjust_address (operand, SImode, 4 * i);
22041 }
22042 else if (GET_CODE (operand) == CONST_DOUBLE)
22043 {
22044 REAL_VALUE_TYPE r;
22045 long l[4];
22046
22047 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22048 switch (mode)
22049 {
22050 case TFmode:
22051 real_to_target (l, &r, mode);
22052 parts[3] = gen_int_mode (l[3], SImode);
22053 parts[2] = gen_int_mode (l[2], SImode);
22054 break;
22055 case XFmode:
22056 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22057 long double may not be 80-bit. */
22058 real_to_target (l, &r, mode);
22059 parts[2] = gen_int_mode (l[2], SImode);
22060 break;
22061 case DFmode:
22062 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22063 break;
22064 default:
22065 gcc_unreachable ();
22066 }
22067 parts[1] = gen_int_mode (l[1], SImode);
22068 parts[0] = gen_int_mode (l[0], SImode);
22069 }
22070 else
22071 gcc_unreachable ();
22072 }
22073 }
22074 else
22075 {
22076 if (mode == TImode)
22077 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22078 if (mode == XFmode || mode == TFmode)
22079 {
22080 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22081 if (REG_P (operand))
22082 {
22083 gcc_assert (reload_completed);
22084 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22085 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22086 }
22087 else if (offsettable_memref_p (operand))
22088 {
22089 operand = adjust_address (operand, DImode, 0);
22090 parts[0] = operand;
22091 parts[1] = adjust_address (operand, upper_mode, 8);
22092 }
22093 else if (GET_CODE (operand) == CONST_DOUBLE)
22094 {
22095 REAL_VALUE_TYPE r;
22096 long l[4];
22097
22098 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22099 real_to_target (l, &r, mode);
22100
22101 /* Do not use shift by 32 to avoid warning on 32bit systems. */
22102 if (HOST_BITS_PER_WIDE_INT >= 64)
22103 parts[0]
22104 = gen_int_mode
22105 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22106 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22107 DImode);
22108 else
22109 parts[0] = immed_double_const (l[0], l[1], DImode);
22110
22111 if (upper_mode == SImode)
22112 parts[1] = gen_int_mode (l[2], SImode);
22113 else if (HOST_BITS_PER_WIDE_INT >= 64)
22114 parts[1]
22115 = gen_int_mode
22116 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22117 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22118 DImode);
22119 else
22120 parts[1] = immed_double_const (l[2], l[3], DImode);
22121 }
22122 else
22123 gcc_unreachable ();
22124 }
22125 }
22126
22127 return size;
22128 }
22129
22130 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22131 Return false when normal moves are needed; true when all required
22132 insns have been emitted. Operands 2-4 contain the input values
22133 int the correct order; operands 5-7 contain the output values. */
22134
22135 void
22136 ix86_split_long_move (rtx operands[])
22137 {
22138 rtx part[2][4];
22139 int nparts, i, j;
22140 int push = 0;
22141 int collisions = 0;
22142 enum machine_mode mode = GET_MODE (operands[0]);
22143 bool collisionparts[4];
22144
22145 /* The DFmode expanders may ask us to move double.
22146 For 64bit target this is single move. By hiding the fact
22147 here we simplify i386.md splitters. */
22148 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22149 {
22150 /* Optimize constant pool reference to immediates. This is used by
22151 fp moves, that force all constants to memory to allow combining. */
22152
22153 if (MEM_P (operands[1])
22154 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22155 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22156 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22157 if (push_operand (operands[0], VOIDmode))
22158 {
22159 operands[0] = copy_rtx (operands[0]);
22160 PUT_MODE (operands[0], word_mode);
22161 }
22162 else
22163 operands[0] = gen_lowpart (DImode, operands[0]);
22164 operands[1] = gen_lowpart (DImode, operands[1]);
22165 emit_move_insn (operands[0], operands[1]);
22166 return;
22167 }
22168
22169 /* The only non-offsettable memory we handle is push. */
22170 if (push_operand (operands[0], VOIDmode))
22171 push = 1;
22172 else
22173 gcc_assert (!MEM_P (operands[0])
22174 || offsettable_memref_p (operands[0]));
22175
22176 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22177 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22178
22179 /* When emitting push, take care for source operands on the stack. */
22180 if (push && MEM_P (operands[1])
22181 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22182 {
22183 rtx src_base = XEXP (part[1][nparts - 1], 0);
22184
22185 /* Compensate for the stack decrement by 4. */
22186 if (!TARGET_64BIT && nparts == 3
22187 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22188 src_base = plus_constant (Pmode, src_base, 4);
22189
22190 /* src_base refers to the stack pointer and is
22191 automatically decreased by emitted push. */
22192 for (i = 0; i < nparts; i++)
22193 part[1][i] = change_address (part[1][i],
22194 GET_MODE (part[1][i]), src_base);
22195 }
22196
22197 /* We need to do copy in the right order in case an address register
22198 of the source overlaps the destination. */
22199 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22200 {
22201 rtx tmp;
22202
22203 for (i = 0; i < nparts; i++)
22204 {
22205 collisionparts[i]
22206 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22207 if (collisionparts[i])
22208 collisions++;
22209 }
22210
22211 /* Collision in the middle part can be handled by reordering. */
22212 if (collisions == 1 && nparts == 3 && collisionparts [1])
22213 {
22214 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22215 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22216 }
22217 else if (collisions == 1
22218 && nparts == 4
22219 && (collisionparts [1] || collisionparts [2]))
22220 {
22221 if (collisionparts [1])
22222 {
22223 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22224 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22225 }
22226 else
22227 {
22228 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
22229 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
22230 }
22231 }
22232
22233 /* If there are more collisions, we can't handle it by reordering.
22234 Do an lea to the last part and use only one colliding move. */
22235 else if (collisions > 1)
22236 {
22237 rtx base;
22238
22239 collisions = 1;
22240
22241 base = part[0][nparts - 1];
22242
22243 /* Handle the case when the last part isn't valid for lea.
22244 Happens in 64-bit mode storing the 12-byte XFmode. */
22245 if (GET_MODE (base) != Pmode)
22246 base = gen_rtx_REG (Pmode, REGNO (base));
22247
22248 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
22249 part[1][0] = replace_equiv_address (part[1][0], base);
22250 for (i = 1; i < nparts; i++)
22251 {
22252 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22253 part[1][i] = replace_equiv_address (part[1][i], tmp);
22254 }
22255 }
22256 }
22257
22258 if (push)
22259 {
22260 if (!TARGET_64BIT)
22261 {
22262 if (nparts == 3)
22263 {
22264 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22265 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22266 stack_pointer_rtx, GEN_INT (-4)));
22267 emit_move_insn (part[0][2], part[1][2]);
22268 }
22269 else if (nparts == 4)
22270 {
22271 emit_move_insn (part[0][3], part[1][3]);
22272 emit_move_insn (part[0][2], part[1][2]);
22273 }
22274 }
22275 else
22276 {
22277 /* In 64bit mode we don't have 32bit push available. In case this is
22278 register, it is OK - we will just use larger counterpart. We also
22279 retype memory - these comes from attempt to avoid REX prefix on
22280 moving of second half of TFmode value. */
22281 if (GET_MODE (part[1][1]) == SImode)
22282 {
22283 switch (GET_CODE (part[1][1]))
22284 {
22285 case MEM:
22286 part[1][1] = adjust_address (part[1][1], DImode, 0);
22287 break;
22288
22289 case REG:
22290 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
22291 break;
22292
22293 default:
22294 gcc_unreachable ();
22295 }
22296
22297 if (GET_MODE (part[1][0]) == SImode)
22298 part[1][0] = part[1][1];
22299 }
22300 }
22301 emit_move_insn (part[0][1], part[1][1]);
22302 emit_move_insn (part[0][0], part[1][0]);
22303 return;
22304 }
22305
22306 /* Choose correct order to not overwrite the source before it is copied. */
22307 if ((REG_P (part[0][0])
22308 && REG_P (part[1][1])
22309 && (REGNO (part[0][0]) == REGNO (part[1][1])
22310 || (nparts == 3
22311 && REGNO (part[0][0]) == REGNO (part[1][2]))
22312 || (nparts == 4
22313 && REGNO (part[0][0]) == REGNO (part[1][3]))))
22314 || (collisions > 0
22315 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
22316 {
22317 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
22318 {
22319 operands[2 + i] = part[0][j];
22320 operands[6 + i] = part[1][j];
22321 }
22322 }
22323 else
22324 {
22325 for (i = 0; i < nparts; i++)
22326 {
22327 operands[2 + i] = part[0][i];
22328 operands[6 + i] = part[1][i];
22329 }
22330 }
22331
22332 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
22333 if (optimize_insn_for_size_p ())
22334 {
22335 for (j = 0; j < nparts - 1; j++)
22336 if (CONST_INT_P (operands[6 + j])
22337 && operands[6 + j] != const0_rtx
22338 && REG_P (operands[2 + j]))
22339 for (i = j; i < nparts - 1; i++)
22340 if (CONST_INT_P (operands[7 + i])
22341 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
22342 operands[7 + i] = operands[2 + j];
22343 }
22344
22345 for (i = 0; i < nparts; i++)
22346 emit_move_insn (operands[2 + i], operands[6 + i]);
22347
22348 return;
22349 }
22350
22351 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
22352 left shift by a constant, either using a single shift or
22353 a sequence of add instructions. */
22354
22355 static void
22356 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
22357 {
22358 rtx (*insn)(rtx, rtx, rtx);
22359
22360 if (count == 1
22361 || (count * ix86_cost->add <= ix86_cost->shift_const
22362 && !optimize_insn_for_size_p ()))
22363 {
22364 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
22365 while (count-- > 0)
22366 emit_insn (insn (operand, operand, operand));
22367 }
22368 else
22369 {
22370 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22371 emit_insn (insn (operand, operand, GEN_INT (count)));
22372 }
22373 }
22374
22375 void
22376 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
22377 {
22378 rtx (*gen_ashl3)(rtx, rtx, rtx);
22379 rtx (*gen_shld)(rtx, rtx, rtx);
22380 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22381
22382 rtx low[2], high[2];
22383 int count;
22384
22385 if (CONST_INT_P (operands[2]))
22386 {
22387 split_double_mode (mode, operands, 2, low, high);
22388 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22389
22390 if (count >= half_width)
22391 {
22392 emit_move_insn (high[0], low[1]);
22393 emit_move_insn (low[0], const0_rtx);
22394
22395 if (count > half_width)
22396 ix86_expand_ashl_const (high[0], count - half_width, mode);
22397 }
22398 else
22399 {
22400 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22401
22402 if (!rtx_equal_p (operands[0], operands[1]))
22403 emit_move_insn (operands[0], operands[1]);
22404
22405 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
22406 ix86_expand_ashl_const (low[0], count, mode);
22407 }
22408 return;
22409 }
22410
22411 split_double_mode (mode, operands, 1, low, high);
22412
22413 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22414
22415 if (operands[1] == const1_rtx)
22416 {
22417 /* Assuming we've chosen a QImode capable registers, then 1 << N
22418 can be done with two 32/64-bit shifts, no branches, no cmoves. */
22419 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
22420 {
22421 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
22422
22423 ix86_expand_clear (low[0]);
22424 ix86_expand_clear (high[0]);
22425 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
22426
22427 d = gen_lowpart (QImode, low[0]);
22428 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22429 s = gen_rtx_EQ (QImode, flags, const0_rtx);
22430 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22431
22432 d = gen_lowpart (QImode, high[0]);
22433 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22434 s = gen_rtx_NE (QImode, flags, const0_rtx);
22435 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22436 }
22437
22438 /* Otherwise, we can get the same results by manually performing
22439 a bit extract operation on bit 5/6, and then performing the two
22440 shifts. The two methods of getting 0/1 into low/high are exactly
22441 the same size. Avoiding the shift in the bit extract case helps
22442 pentium4 a bit; no one else seems to care much either way. */
22443 else
22444 {
22445 enum machine_mode half_mode;
22446 rtx (*gen_lshr3)(rtx, rtx, rtx);
22447 rtx (*gen_and3)(rtx, rtx, rtx);
22448 rtx (*gen_xor3)(rtx, rtx, rtx);
22449 HOST_WIDE_INT bits;
22450 rtx x;
22451
22452 if (mode == DImode)
22453 {
22454 half_mode = SImode;
22455 gen_lshr3 = gen_lshrsi3;
22456 gen_and3 = gen_andsi3;
22457 gen_xor3 = gen_xorsi3;
22458 bits = 5;
22459 }
22460 else
22461 {
22462 half_mode = DImode;
22463 gen_lshr3 = gen_lshrdi3;
22464 gen_and3 = gen_anddi3;
22465 gen_xor3 = gen_xordi3;
22466 bits = 6;
22467 }
22468
22469 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
22470 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
22471 else
22472 x = gen_lowpart (half_mode, operands[2]);
22473 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
22474
22475 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
22476 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
22477 emit_move_insn (low[0], high[0]);
22478 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
22479 }
22480
22481 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22482 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
22483 return;
22484 }
22485
22486 if (operands[1] == constm1_rtx)
22487 {
22488 /* For -1 << N, we can avoid the shld instruction, because we
22489 know that we're shifting 0...31/63 ones into a -1. */
22490 emit_move_insn (low[0], constm1_rtx);
22491 if (optimize_insn_for_size_p ())
22492 emit_move_insn (high[0], low[0]);
22493 else
22494 emit_move_insn (high[0], constm1_rtx);
22495 }
22496 else
22497 {
22498 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22499
22500 if (!rtx_equal_p (operands[0], operands[1]))
22501 emit_move_insn (operands[0], operands[1]);
22502
22503 split_double_mode (mode, operands, 1, low, high);
22504 emit_insn (gen_shld (high[0], low[0], operands[2]));
22505 }
22506
22507 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22508
22509 if (TARGET_CMOVE && scratch)
22510 {
22511 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22512 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22513
22514 ix86_expand_clear (scratch);
22515 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
22516 }
22517 else
22518 {
22519 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22520 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22521
22522 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
22523 }
22524 }
22525
22526 void
22527 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
22528 {
22529 rtx (*gen_ashr3)(rtx, rtx, rtx)
22530 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
22531 rtx (*gen_shrd)(rtx, rtx, rtx);
22532 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22533
22534 rtx low[2], high[2];
22535 int count;
22536
22537 if (CONST_INT_P (operands[2]))
22538 {
22539 split_double_mode (mode, operands, 2, low, high);
22540 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22541
22542 if (count == GET_MODE_BITSIZE (mode) - 1)
22543 {
22544 emit_move_insn (high[0], high[1]);
22545 emit_insn (gen_ashr3 (high[0], high[0],
22546 GEN_INT (half_width - 1)));
22547 emit_move_insn (low[0], high[0]);
22548
22549 }
22550 else if (count >= half_width)
22551 {
22552 emit_move_insn (low[0], high[1]);
22553 emit_move_insn (high[0], low[0]);
22554 emit_insn (gen_ashr3 (high[0], high[0],
22555 GEN_INT (half_width - 1)));
22556
22557 if (count > half_width)
22558 emit_insn (gen_ashr3 (low[0], low[0],
22559 GEN_INT (count - half_width)));
22560 }
22561 else
22562 {
22563 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22564
22565 if (!rtx_equal_p (operands[0], operands[1]))
22566 emit_move_insn (operands[0], operands[1]);
22567
22568 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22569 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
22570 }
22571 }
22572 else
22573 {
22574 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22575
22576 if (!rtx_equal_p (operands[0], operands[1]))
22577 emit_move_insn (operands[0], operands[1]);
22578
22579 split_double_mode (mode, operands, 1, low, high);
22580
22581 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22582 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
22583
22584 if (TARGET_CMOVE && scratch)
22585 {
22586 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22587 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22588
22589 emit_move_insn (scratch, high[0]);
22590 emit_insn (gen_ashr3 (scratch, scratch,
22591 GEN_INT (half_width - 1)));
22592 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22593 scratch));
22594 }
22595 else
22596 {
22597 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
22598 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
22599
22600 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
22601 }
22602 }
22603 }
22604
22605 void
22606 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
22607 {
22608 rtx (*gen_lshr3)(rtx, rtx, rtx)
22609 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
22610 rtx (*gen_shrd)(rtx, rtx, rtx);
22611 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22612
22613 rtx low[2], high[2];
22614 int count;
22615
22616 if (CONST_INT_P (operands[2]))
22617 {
22618 split_double_mode (mode, operands, 2, low, high);
22619 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22620
22621 if (count >= half_width)
22622 {
22623 emit_move_insn (low[0], high[1]);
22624 ix86_expand_clear (high[0]);
22625
22626 if (count > half_width)
22627 emit_insn (gen_lshr3 (low[0], low[0],
22628 GEN_INT (count - half_width)));
22629 }
22630 else
22631 {
22632 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22633
22634 if (!rtx_equal_p (operands[0], operands[1]))
22635 emit_move_insn (operands[0], operands[1]);
22636
22637 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22638 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
22639 }
22640 }
22641 else
22642 {
22643 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22644
22645 if (!rtx_equal_p (operands[0], operands[1]))
22646 emit_move_insn (operands[0], operands[1]);
22647
22648 split_double_mode (mode, operands, 1, low, high);
22649
22650 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22651 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
22652
22653 if (TARGET_CMOVE && scratch)
22654 {
22655 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22656 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22657
22658 ix86_expand_clear (scratch);
22659 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22660 scratch));
22661 }
22662 else
22663 {
22664 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22665 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22666
22667 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
22668 }
22669 }
22670 }
22671
22672 /* Predict just emitted jump instruction to be taken with probability PROB. */
22673 static void
22674 predict_jump (int prob)
22675 {
22676 rtx insn = get_last_insn ();
22677 gcc_assert (JUMP_P (insn));
22678 add_int_reg_note (insn, REG_BR_PROB, prob);
22679 }
22680
22681 /* Helper function for the string operations below. Dest VARIABLE whether
22682 it is aligned to VALUE bytes. If true, jump to the label. */
22683 static rtx
22684 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
22685 {
22686 rtx label = gen_label_rtx ();
22687 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
22688 if (GET_MODE (variable) == DImode)
22689 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
22690 else
22691 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
22692 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
22693 1, label);
22694 if (epilogue)
22695 predict_jump (REG_BR_PROB_BASE * 50 / 100);
22696 else
22697 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22698 return label;
22699 }
22700
22701 /* Adjust COUNTER by the VALUE. */
22702 static void
22703 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
22704 {
22705 rtx (*gen_add)(rtx, rtx, rtx)
22706 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
22707
22708 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
22709 }
22710
22711 /* Zero extend possibly SImode EXP to Pmode register. */
22712 rtx
22713 ix86_zero_extend_to_Pmode (rtx exp)
22714 {
22715 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
22716 }
22717
22718 /* Divide COUNTREG by SCALE. */
22719 static rtx
22720 scale_counter (rtx countreg, int scale)
22721 {
22722 rtx sc;
22723
22724 if (scale == 1)
22725 return countreg;
22726 if (CONST_INT_P (countreg))
22727 return GEN_INT (INTVAL (countreg) / scale);
22728 gcc_assert (REG_P (countreg));
22729
22730 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
22731 GEN_INT (exact_log2 (scale)),
22732 NULL, 1, OPTAB_DIRECT);
22733 return sc;
22734 }
22735
22736 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
22737 DImode for constant loop counts. */
22738
22739 static enum machine_mode
22740 counter_mode (rtx count_exp)
22741 {
22742 if (GET_MODE (count_exp) != VOIDmode)
22743 return GET_MODE (count_exp);
22744 if (!CONST_INT_P (count_exp))
22745 return Pmode;
22746 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
22747 return DImode;
22748 return SImode;
22749 }
22750
22751 /* Copy the address to a Pmode register. This is used for x32 to
22752 truncate DImode TLS address to a SImode register. */
22753
22754 static rtx
22755 ix86_copy_addr_to_reg (rtx addr)
22756 {
22757 if (GET_MODE (addr) == Pmode)
22758 return copy_addr_to_reg (addr);
22759 else
22760 {
22761 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
22762 return gen_rtx_SUBREG (SImode, copy_to_mode_reg (DImode, addr), 0);
22763 }
22764 }
22765
22766 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
22767 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
22768 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
22769 memory by VALUE (supposed to be in MODE).
22770
22771 The size is rounded down to whole number of chunk size moved at once.
22772 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
22773
22774
22775 static void
22776 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
22777 rtx destptr, rtx srcptr, rtx value,
22778 rtx count, enum machine_mode mode, int unroll,
22779 int expected_size, bool issetmem)
22780 {
22781 rtx out_label, top_label, iter, tmp;
22782 enum machine_mode iter_mode = counter_mode (count);
22783 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
22784 rtx piece_size = GEN_INT (piece_size_n);
22785 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
22786 rtx size;
22787 int i;
22788
22789 top_label = gen_label_rtx ();
22790 out_label = gen_label_rtx ();
22791 iter = gen_reg_rtx (iter_mode);
22792
22793 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
22794 NULL, 1, OPTAB_DIRECT);
22795 /* Those two should combine. */
22796 if (piece_size == const1_rtx)
22797 {
22798 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
22799 true, out_label);
22800 predict_jump (REG_BR_PROB_BASE * 10 / 100);
22801 }
22802 emit_move_insn (iter, const0_rtx);
22803
22804 emit_label (top_label);
22805
22806 tmp = convert_modes (Pmode, iter_mode, iter, true);
22807
22808 /* This assert could be relaxed - in this case we'll need to compute
22809 smallest power of two, containing in PIECE_SIZE_N and pass it to
22810 offset_address. */
22811 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
22812 destmem = offset_address (destmem, tmp, piece_size_n);
22813 destmem = adjust_address (destmem, mode, 0);
22814
22815 if (!issetmem)
22816 {
22817 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
22818 srcmem = adjust_address (srcmem, mode, 0);
22819
22820 /* When unrolling for chips that reorder memory reads and writes,
22821 we can save registers by using single temporary.
22822 Also using 4 temporaries is overkill in 32bit mode. */
22823 if (!TARGET_64BIT && 0)
22824 {
22825 for (i = 0; i < unroll; i++)
22826 {
22827 if (i)
22828 {
22829 destmem =
22830 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22831 srcmem =
22832 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22833 }
22834 emit_move_insn (destmem, srcmem);
22835 }
22836 }
22837 else
22838 {
22839 rtx tmpreg[4];
22840 gcc_assert (unroll <= 4);
22841 for (i = 0; i < unroll; i++)
22842 {
22843 tmpreg[i] = gen_reg_rtx (mode);
22844 if (i)
22845 {
22846 srcmem =
22847 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22848 }
22849 emit_move_insn (tmpreg[i], srcmem);
22850 }
22851 for (i = 0; i < unroll; i++)
22852 {
22853 if (i)
22854 {
22855 destmem =
22856 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22857 }
22858 emit_move_insn (destmem, tmpreg[i]);
22859 }
22860 }
22861 }
22862 else
22863 for (i = 0; i < unroll; i++)
22864 {
22865 if (i)
22866 destmem =
22867 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22868 emit_move_insn (destmem, value);
22869 }
22870
22871 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
22872 true, OPTAB_LIB_WIDEN);
22873 if (tmp != iter)
22874 emit_move_insn (iter, tmp);
22875
22876 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
22877 true, top_label);
22878 if (expected_size != -1)
22879 {
22880 expected_size /= GET_MODE_SIZE (mode) * unroll;
22881 if (expected_size == 0)
22882 predict_jump (0);
22883 else if (expected_size > REG_BR_PROB_BASE)
22884 predict_jump (REG_BR_PROB_BASE - 1);
22885 else
22886 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
22887 }
22888 else
22889 predict_jump (REG_BR_PROB_BASE * 80 / 100);
22890 iter = ix86_zero_extend_to_Pmode (iter);
22891 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
22892 true, OPTAB_LIB_WIDEN);
22893 if (tmp != destptr)
22894 emit_move_insn (destptr, tmp);
22895 if (!issetmem)
22896 {
22897 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
22898 true, OPTAB_LIB_WIDEN);
22899 if (tmp != srcptr)
22900 emit_move_insn (srcptr, tmp);
22901 }
22902 emit_label (out_label);
22903 }
22904
22905 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
22906 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
22907 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
22908 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
22909 ORIG_VALUE is the original value passed to memset to fill the memory with.
22910 Other arguments have same meaning as for previous function. */
22911
22912 static void
22913 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
22914 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
22915 rtx count,
22916 enum machine_mode mode, bool issetmem)
22917 {
22918 rtx destexp;
22919 rtx srcexp;
22920 rtx countreg;
22921 HOST_WIDE_INT rounded_count;
22922
22923 /* If possible, it is shorter to use rep movs.
22924 TODO: Maybe it is better to move this logic to decide_alg. */
22925 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
22926 && (!issetmem || orig_value == const0_rtx))
22927 mode = SImode;
22928
22929 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
22930 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
22931
22932 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
22933 GET_MODE_SIZE (mode)));
22934 if (mode != QImode)
22935 {
22936 destexp = gen_rtx_ASHIFT (Pmode, countreg,
22937 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
22938 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
22939 }
22940 else
22941 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
22942 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
22943 {
22944 rounded_count = (INTVAL (count)
22945 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
22946 destmem = shallow_copy_rtx (destmem);
22947 set_mem_size (destmem, rounded_count);
22948 }
22949 else if (MEM_SIZE_KNOWN_P (destmem))
22950 clear_mem_size (destmem);
22951
22952 if (issetmem)
22953 {
22954 value = force_reg (mode, gen_lowpart (mode, value));
22955 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
22956 }
22957 else
22958 {
22959 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
22960 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
22961 if (mode != QImode)
22962 {
22963 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
22964 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
22965 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
22966 }
22967 else
22968 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
22969 if (CONST_INT_P (count))
22970 {
22971 rounded_count = (INTVAL (count)
22972 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
22973 srcmem = shallow_copy_rtx (srcmem);
22974 set_mem_size (srcmem, rounded_count);
22975 }
22976 else
22977 {
22978 if (MEM_SIZE_KNOWN_P (srcmem))
22979 clear_mem_size (srcmem);
22980 }
22981 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
22982 destexp, srcexp));
22983 }
22984 }
22985
22986 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
22987 DESTMEM.
22988 SRC is passed by pointer to be updated on return.
22989 Return value is updated DST. */
22990 static rtx
22991 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
22992 HOST_WIDE_INT size_to_move)
22993 {
22994 rtx dst = destmem, src = *srcmem, adjust, tempreg;
22995 enum insn_code code;
22996 enum machine_mode move_mode;
22997 int piece_size, i;
22998
22999 /* Find the widest mode in which we could perform moves.
23000 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23001 it until move of such size is supported. */
23002 piece_size = 1 << floor_log2 (size_to_move);
23003 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23004 code = optab_handler (mov_optab, move_mode);
23005 while (code == CODE_FOR_nothing && piece_size > 1)
23006 {
23007 piece_size >>= 1;
23008 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23009 code = optab_handler (mov_optab, move_mode);
23010 }
23011
23012 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23013 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23014 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23015 {
23016 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23017 move_mode = mode_for_vector (word_mode, nunits);
23018 code = optab_handler (mov_optab, move_mode);
23019 if (code == CODE_FOR_nothing)
23020 {
23021 move_mode = word_mode;
23022 piece_size = GET_MODE_SIZE (move_mode);
23023 code = optab_handler (mov_optab, move_mode);
23024 }
23025 }
23026 gcc_assert (code != CODE_FOR_nothing);
23027
23028 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23029 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23030
23031 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23032 gcc_assert (size_to_move % piece_size == 0);
23033 adjust = GEN_INT (piece_size);
23034 for (i = 0; i < size_to_move; i += piece_size)
23035 {
23036 /* We move from memory to memory, so we'll need to do it via
23037 a temporary register. */
23038 tempreg = gen_reg_rtx (move_mode);
23039 emit_insn (GEN_FCN (code) (tempreg, src));
23040 emit_insn (GEN_FCN (code) (dst, tempreg));
23041
23042 emit_move_insn (destptr,
23043 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23044 emit_move_insn (srcptr,
23045 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23046
23047 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23048 piece_size);
23049 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23050 piece_size);
23051 }
23052
23053 /* Update DST and SRC rtx. */
23054 *srcmem = src;
23055 return dst;
23056 }
23057
23058 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23059 static void
23060 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23061 rtx destptr, rtx srcptr, rtx count, int max_size)
23062 {
23063 rtx src, dest;
23064 if (CONST_INT_P (count))
23065 {
23066 HOST_WIDE_INT countval = INTVAL (count);
23067 HOST_WIDE_INT epilogue_size = countval % max_size;
23068 int i;
23069
23070 /* For now MAX_SIZE should be a power of 2. This assert could be
23071 relaxed, but it'll require a bit more complicated epilogue
23072 expanding. */
23073 gcc_assert ((max_size & (max_size - 1)) == 0);
23074 for (i = max_size; i >= 1; i >>= 1)
23075 {
23076 if (epilogue_size & i)
23077 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23078 }
23079 return;
23080 }
23081 if (max_size > 8)
23082 {
23083 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23084 count, 1, OPTAB_DIRECT);
23085 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23086 count, QImode, 1, 4, false);
23087 return;
23088 }
23089
23090 /* When there are stringops, we can cheaply increase dest and src pointers.
23091 Otherwise we save code size by maintaining offset (zero is readily
23092 available from preceding rep operation) and using x86 addressing modes.
23093 */
23094 if (TARGET_SINGLE_STRINGOP)
23095 {
23096 if (max_size > 4)
23097 {
23098 rtx label = ix86_expand_aligntest (count, 4, true);
23099 src = change_address (srcmem, SImode, srcptr);
23100 dest = change_address (destmem, SImode, destptr);
23101 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23102 emit_label (label);
23103 LABEL_NUSES (label) = 1;
23104 }
23105 if (max_size > 2)
23106 {
23107 rtx label = ix86_expand_aligntest (count, 2, true);
23108 src = change_address (srcmem, HImode, srcptr);
23109 dest = change_address (destmem, HImode, destptr);
23110 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23111 emit_label (label);
23112 LABEL_NUSES (label) = 1;
23113 }
23114 if (max_size > 1)
23115 {
23116 rtx label = ix86_expand_aligntest (count, 1, true);
23117 src = change_address (srcmem, QImode, srcptr);
23118 dest = change_address (destmem, QImode, destptr);
23119 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23120 emit_label (label);
23121 LABEL_NUSES (label) = 1;
23122 }
23123 }
23124 else
23125 {
23126 rtx offset = force_reg (Pmode, const0_rtx);
23127 rtx tmp;
23128
23129 if (max_size > 4)
23130 {
23131 rtx label = ix86_expand_aligntest (count, 4, true);
23132 src = change_address (srcmem, SImode, srcptr);
23133 dest = change_address (destmem, SImode, destptr);
23134 emit_move_insn (dest, src);
23135 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23136 true, OPTAB_LIB_WIDEN);
23137 if (tmp != offset)
23138 emit_move_insn (offset, tmp);
23139 emit_label (label);
23140 LABEL_NUSES (label) = 1;
23141 }
23142 if (max_size > 2)
23143 {
23144 rtx label = ix86_expand_aligntest (count, 2, true);
23145 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23146 src = change_address (srcmem, HImode, tmp);
23147 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23148 dest = change_address (destmem, HImode, tmp);
23149 emit_move_insn (dest, src);
23150 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23151 true, OPTAB_LIB_WIDEN);
23152 if (tmp != offset)
23153 emit_move_insn (offset, tmp);
23154 emit_label (label);
23155 LABEL_NUSES (label) = 1;
23156 }
23157 if (max_size > 1)
23158 {
23159 rtx label = ix86_expand_aligntest (count, 1, true);
23160 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23161 src = change_address (srcmem, QImode, tmp);
23162 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23163 dest = change_address (destmem, QImode, tmp);
23164 emit_move_insn (dest, src);
23165 emit_label (label);
23166 LABEL_NUSES (label) = 1;
23167 }
23168 }
23169 }
23170
23171 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23172 with value PROMOTED_VAL.
23173 SRC is passed by pointer to be updated on return.
23174 Return value is updated DST. */
23175 static rtx
23176 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23177 HOST_WIDE_INT size_to_move)
23178 {
23179 rtx dst = destmem, adjust;
23180 enum insn_code code;
23181 enum machine_mode move_mode;
23182 int piece_size, i;
23183
23184 /* Find the widest mode in which we could perform moves.
23185 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23186 it until move of such size is supported. */
23187 move_mode = GET_MODE (promoted_val);
23188 if (move_mode == VOIDmode)
23189 move_mode = QImode;
23190 if (size_to_move < GET_MODE_SIZE (move_mode))
23191 {
23192 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23193 promoted_val = gen_lowpart (move_mode, promoted_val);
23194 }
23195 piece_size = GET_MODE_SIZE (move_mode);
23196 code = optab_handler (mov_optab, move_mode);
23197 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23198
23199 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23200
23201 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23202 gcc_assert (size_to_move % piece_size == 0);
23203 adjust = GEN_INT (piece_size);
23204 for (i = 0; i < size_to_move; i += piece_size)
23205 {
23206 if (piece_size <= GET_MODE_SIZE (word_mode))
23207 {
23208 emit_insn (gen_strset (destptr, dst, promoted_val));
23209 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23210 piece_size);
23211 continue;
23212 }
23213
23214 emit_insn (GEN_FCN (code) (dst, promoted_val));
23215
23216 emit_move_insn (destptr,
23217 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23218
23219 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23220 piece_size);
23221 }
23222
23223 /* Update DST rtx. */
23224 return dst;
23225 }
23226 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23227 static void
23228 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23229 rtx count, int max_size)
23230 {
23231 count =
23232 expand_simple_binop (counter_mode (count), AND, count,
23233 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23234 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23235 gen_lowpart (QImode, value), count, QImode,
23236 1, max_size / 2, true);
23237 }
23238
23239 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23240 static void
23241 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23242 rtx count, int max_size)
23243 {
23244 rtx dest;
23245
23246 if (CONST_INT_P (count))
23247 {
23248 HOST_WIDE_INT countval = INTVAL (count);
23249 HOST_WIDE_INT epilogue_size = countval % max_size;
23250 int i;
23251
23252 /* For now MAX_SIZE should be a power of 2. This assert could be
23253 relaxed, but it'll require a bit more complicated epilogue
23254 expanding. */
23255 gcc_assert ((max_size & (max_size - 1)) == 0);
23256 for (i = max_size; i >= 1; i >>= 1)
23257 {
23258 if (epilogue_size & i)
23259 {
23260 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23261 destmem = emit_memset (destmem, destptr, vec_value, i);
23262 else
23263 destmem = emit_memset (destmem, destptr, value, i);
23264 }
23265 }
23266 return;
23267 }
23268 if (max_size > 32)
23269 {
23270 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23271 return;
23272 }
23273 if (max_size > 16)
23274 {
23275 rtx label = ix86_expand_aligntest (count, 16, true);
23276 if (TARGET_64BIT)
23277 {
23278 dest = change_address (destmem, DImode, destptr);
23279 emit_insn (gen_strset (destptr, dest, value));
23280 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23281 emit_insn (gen_strset (destptr, dest, value));
23282 }
23283 else
23284 {
23285 dest = change_address (destmem, SImode, destptr);
23286 emit_insn (gen_strset (destptr, dest, value));
23287 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23288 emit_insn (gen_strset (destptr, dest, value));
23289 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23290 emit_insn (gen_strset (destptr, dest, value));
23291 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
23292 emit_insn (gen_strset (destptr, dest, value));
23293 }
23294 emit_label (label);
23295 LABEL_NUSES (label) = 1;
23296 }
23297 if (max_size > 8)
23298 {
23299 rtx label = ix86_expand_aligntest (count, 8, true);
23300 if (TARGET_64BIT)
23301 {
23302 dest = change_address (destmem, DImode, destptr);
23303 emit_insn (gen_strset (destptr, dest, value));
23304 }
23305 else
23306 {
23307 dest = change_address (destmem, SImode, destptr);
23308 emit_insn (gen_strset (destptr, dest, value));
23309 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23310 emit_insn (gen_strset (destptr, dest, value));
23311 }
23312 emit_label (label);
23313 LABEL_NUSES (label) = 1;
23314 }
23315 if (max_size > 4)
23316 {
23317 rtx label = ix86_expand_aligntest (count, 4, true);
23318 dest = change_address (destmem, SImode, destptr);
23319 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
23320 emit_label (label);
23321 LABEL_NUSES (label) = 1;
23322 }
23323 if (max_size > 2)
23324 {
23325 rtx label = ix86_expand_aligntest (count, 2, true);
23326 dest = change_address (destmem, HImode, destptr);
23327 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
23328 emit_label (label);
23329 LABEL_NUSES (label) = 1;
23330 }
23331 if (max_size > 1)
23332 {
23333 rtx label = ix86_expand_aligntest (count, 1, true);
23334 dest = change_address (destmem, QImode, destptr);
23335 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
23336 emit_label (label);
23337 LABEL_NUSES (label) = 1;
23338 }
23339 }
23340
23341 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
23342 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
23343 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
23344 ignored.
23345 Return value is updated DESTMEM. */
23346 static rtx
23347 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
23348 rtx destptr, rtx srcptr, rtx value,
23349 rtx vec_value, rtx count, int align,
23350 int desired_alignment, bool issetmem)
23351 {
23352 int i;
23353 for (i = 1; i < desired_alignment; i <<= 1)
23354 {
23355 if (align <= i)
23356 {
23357 rtx label = ix86_expand_aligntest (destptr, i, false);
23358 if (issetmem)
23359 {
23360 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23361 destmem = emit_memset (destmem, destptr, vec_value, i);
23362 else
23363 destmem = emit_memset (destmem, destptr, value, i);
23364 }
23365 else
23366 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23367 ix86_adjust_counter (count, i);
23368 emit_label (label);
23369 LABEL_NUSES (label) = 1;
23370 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
23371 }
23372 }
23373 return destmem;
23374 }
23375
23376 /* Test if COUNT&SIZE is nonzero and if so, expand movme
23377 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
23378 and jump to DONE_LABEL. */
23379 static void
23380 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
23381 rtx destptr, rtx srcptr,
23382 rtx value, rtx vec_value,
23383 rtx count, int size,
23384 rtx done_label, bool issetmem)
23385 {
23386 rtx label = ix86_expand_aligntest (count, size, false);
23387 enum machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
23388 rtx modesize;
23389 int n;
23390
23391 /* If we do not have vector value to copy, we must reduce size. */
23392 if (issetmem)
23393 {
23394 if (!vec_value)
23395 {
23396 if (GET_MODE (value) == VOIDmode && size > 8)
23397 mode = Pmode;
23398 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
23399 mode = GET_MODE (value);
23400 }
23401 else
23402 mode = GET_MODE (vec_value), value = vec_value;
23403 }
23404 else
23405 {
23406 /* Choose appropriate vector mode. */
23407 if (size >= 32)
23408 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
23409 else if (size >= 16)
23410 mode = TARGET_SSE ? V16QImode : DImode;
23411 srcmem = change_address (srcmem, mode, srcptr);
23412 }
23413 destmem = change_address (destmem, mode, destptr);
23414 modesize = GEN_INT (GET_MODE_SIZE (mode));
23415 gcc_assert (GET_MODE_SIZE (mode) <= size);
23416 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23417 {
23418 if (issetmem)
23419 emit_move_insn (destmem, gen_lowpart (mode, value));
23420 else
23421 {
23422 emit_move_insn (destmem, srcmem);
23423 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23424 }
23425 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23426 }
23427
23428 destmem = offset_address (destmem, count, 1);
23429 destmem = offset_address (destmem, GEN_INT (-2 * size),
23430 GET_MODE_SIZE (mode));
23431 if (!issetmem)
23432 {
23433 srcmem = offset_address (srcmem, count, 1);
23434 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
23435 GET_MODE_SIZE (mode));
23436 }
23437 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23438 {
23439 if (issetmem)
23440 emit_move_insn (destmem, gen_lowpart (mode, value));
23441 else
23442 {
23443 emit_move_insn (destmem, srcmem);
23444 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23445 }
23446 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23447 }
23448 emit_jump_insn (gen_jump (done_label));
23449 emit_barrier ();
23450
23451 emit_label (label);
23452 LABEL_NUSES (label) = 1;
23453 }
23454
23455 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
23456 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
23457 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
23458 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
23459 DONE_LABEL is a label after the whole copying sequence. The label is created
23460 on demand if *DONE_LABEL is NULL.
23461 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
23462 bounds after the initial copies.
23463
23464 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
23465 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
23466 we will dispatch to a library call for large blocks.
23467
23468 In pseudocode we do:
23469
23470 if (COUNT < SIZE)
23471 {
23472 Assume that SIZE is 4. Bigger sizes are handled analogously
23473 if (COUNT & 4)
23474 {
23475 copy 4 bytes from SRCPTR to DESTPTR
23476 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
23477 goto done_label
23478 }
23479 if (!COUNT)
23480 goto done_label;
23481 copy 1 byte from SRCPTR to DESTPTR
23482 if (COUNT & 2)
23483 {
23484 copy 2 bytes from SRCPTR to DESTPTR
23485 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
23486 }
23487 }
23488 else
23489 {
23490 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
23491 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
23492
23493 OLD_DESPTR = DESTPTR;
23494 Align DESTPTR up to DESIRED_ALIGN
23495 SRCPTR += DESTPTR - OLD_DESTPTR
23496 COUNT -= DEST_PTR - OLD_DESTPTR
23497 if (DYNAMIC_CHECK)
23498 Round COUNT down to multiple of SIZE
23499 << optional caller supplied zero size guard is here >>
23500 << optional caller suppplied dynamic check is here >>
23501 << caller supplied main copy loop is here >>
23502 }
23503 done_label:
23504 */
23505 static void
23506 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
23507 rtx *destptr, rtx *srcptr,
23508 enum machine_mode mode,
23509 rtx value, rtx vec_value,
23510 rtx *count,
23511 rtx *done_label,
23512 int size,
23513 int desired_align,
23514 int align,
23515 unsigned HOST_WIDE_INT *min_size,
23516 bool dynamic_check,
23517 bool issetmem)
23518 {
23519 rtx loop_label = NULL, label;
23520 int n;
23521 rtx modesize;
23522 int prolog_size = 0;
23523 rtx mode_value;
23524
23525 /* Chose proper value to copy. */
23526 if (issetmem && VECTOR_MODE_P (mode))
23527 mode_value = vec_value;
23528 else
23529 mode_value = value;
23530 gcc_assert (GET_MODE_SIZE (mode) <= size);
23531
23532 /* See if block is big or small, handle small blocks. */
23533 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
23534 {
23535 int size2 = size;
23536 loop_label = gen_label_rtx ();
23537
23538 if (!*done_label)
23539 *done_label = gen_label_rtx ();
23540
23541 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
23542 1, loop_label);
23543 size2 >>= 1;
23544
23545 /* Handle sizes > 3. */
23546 for (;size2 > 2; size2 >>= 1)
23547 expand_small_movmem_or_setmem (destmem, srcmem,
23548 *destptr, *srcptr,
23549 value, vec_value,
23550 *count,
23551 size2, *done_label, issetmem);
23552 /* Nothing to copy? Jump to DONE_LABEL if so */
23553 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
23554 1, *done_label);
23555
23556 /* Do a byte copy. */
23557 destmem = change_address (destmem, QImode, *destptr);
23558 if (issetmem)
23559 emit_move_insn (destmem, gen_lowpart (QImode, value));
23560 else
23561 {
23562 srcmem = change_address (srcmem, QImode, *srcptr);
23563 emit_move_insn (destmem, srcmem);
23564 }
23565
23566 /* Handle sizes 2 and 3. */
23567 label = ix86_expand_aligntest (*count, 2, false);
23568 destmem = change_address (destmem, HImode, *destptr);
23569 destmem = offset_address (destmem, *count, 1);
23570 destmem = offset_address (destmem, GEN_INT (-2), 2);
23571 if (issetmem)
23572 emit_move_insn (destmem, gen_lowpart (HImode, value));
23573 else
23574 {
23575 srcmem = change_address (srcmem, HImode, *srcptr);
23576 srcmem = offset_address (srcmem, *count, 1);
23577 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
23578 emit_move_insn (destmem, srcmem);
23579 }
23580
23581 emit_label (label);
23582 LABEL_NUSES (label) = 1;
23583 emit_jump_insn (gen_jump (*done_label));
23584 emit_barrier ();
23585 }
23586 else
23587 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
23588 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
23589
23590 /* Start memcpy for COUNT >= SIZE. */
23591 if (loop_label)
23592 {
23593 emit_label (loop_label);
23594 LABEL_NUSES (loop_label) = 1;
23595 }
23596
23597 /* Copy first desired_align bytes. */
23598 if (!issetmem)
23599 srcmem = change_address (srcmem, mode, *srcptr);
23600 destmem = change_address (destmem, mode, *destptr);
23601 modesize = GEN_INT (GET_MODE_SIZE (mode));
23602 for (n = 0; prolog_size < desired_align - align; n++)
23603 {
23604 if (issetmem)
23605 emit_move_insn (destmem, mode_value);
23606 else
23607 {
23608 emit_move_insn (destmem, srcmem);
23609 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23610 }
23611 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23612 prolog_size += GET_MODE_SIZE (mode);
23613 }
23614
23615
23616 /* Copy last SIZE bytes. */
23617 destmem = offset_address (destmem, *count, 1);
23618 destmem = offset_address (destmem,
23619 GEN_INT (-size - prolog_size),
23620 1);
23621 if (issetmem)
23622 emit_move_insn (destmem, mode_value);
23623 else
23624 {
23625 srcmem = offset_address (srcmem, *count, 1);
23626 srcmem = offset_address (srcmem,
23627 GEN_INT (-size - prolog_size),
23628 1);
23629 emit_move_insn (destmem, srcmem);
23630 }
23631 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
23632 {
23633 destmem = offset_address (destmem, modesize, 1);
23634 if (issetmem)
23635 emit_move_insn (destmem, mode_value);
23636 else
23637 {
23638 srcmem = offset_address (srcmem, modesize, 1);
23639 emit_move_insn (destmem, srcmem);
23640 }
23641 }
23642
23643 /* Align destination. */
23644 if (desired_align > 1 && desired_align > align)
23645 {
23646 rtx saveddest = *destptr;
23647
23648 gcc_assert (desired_align <= size);
23649 /* Align destptr up, place it to new register. */
23650 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
23651 GEN_INT (prolog_size),
23652 NULL_RTX, 1, OPTAB_DIRECT);
23653 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
23654 GEN_INT (-desired_align),
23655 *destptr, 1, OPTAB_DIRECT);
23656 /* See how many bytes we skipped. */
23657 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
23658 *destptr,
23659 saveddest, 1, OPTAB_DIRECT);
23660 /* Adjust srcptr and count. */
23661 if (!issetmem)
23662 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, saveddest,
23663 *srcptr, 1, OPTAB_DIRECT);
23664 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23665 saveddest, *count, 1, OPTAB_DIRECT);
23666 /* We copied at most size + prolog_size. */
23667 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
23668 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
23669 else
23670 *min_size = 0;
23671
23672 /* Our loops always round down the bock size, but for dispatch to library
23673 we need precise value. */
23674 if (dynamic_check)
23675 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
23676 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
23677 }
23678 else
23679 {
23680 gcc_assert (prolog_size == 0);
23681 /* Decrease count, so we won't end up copying last word twice. */
23682 if (!CONST_INT_P (*count))
23683 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23684 constm1_rtx, *count, 1, OPTAB_DIRECT);
23685 else
23686 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
23687 if (*min_size)
23688 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
23689 }
23690 }
23691
23692
23693 /* This function is like the previous one, except here we know how many bytes
23694 need to be copied. That allows us to update alignment not only of DST, which
23695 is returned, but also of SRC, which is passed as a pointer for that
23696 reason. */
23697 static rtx
23698 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
23699 rtx srcreg, rtx value, rtx vec_value,
23700 int desired_align, int align_bytes,
23701 bool issetmem)
23702 {
23703 rtx src = NULL;
23704 rtx orig_dst = dst;
23705 rtx orig_src = NULL;
23706 int piece_size = 1;
23707 int copied_bytes = 0;
23708
23709 if (!issetmem)
23710 {
23711 gcc_assert (srcp != NULL);
23712 src = *srcp;
23713 orig_src = src;
23714 }
23715
23716 for (piece_size = 1;
23717 piece_size <= desired_align && copied_bytes < align_bytes;
23718 piece_size <<= 1)
23719 {
23720 if (align_bytes & piece_size)
23721 {
23722 if (issetmem)
23723 {
23724 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
23725 dst = emit_memset (dst, destreg, vec_value, piece_size);
23726 else
23727 dst = emit_memset (dst, destreg, value, piece_size);
23728 }
23729 else
23730 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
23731 copied_bytes += piece_size;
23732 }
23733 }
23734 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
23735 set_mem_align (dst, desired_align * BITS_PER_UNIT);
23736 if (MEM_SIZE_KNOWN_P (orig_dst))
23737 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
23738
23739 if (!issetmem)
23740 {
23741 int src_align_bytes = get_mem_align_offset (src, desired_align
23742 * BITS_PER_UNIT);
23743 if (src_align_bytes >= 0)
23744 src_align_bytes = desired_align - src_align_bytes;
23745 if (src_align_bytes >= 0)
23746 {
23747 unsigned int src_align;
23748 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
23749 {
23750 if ((src_align_bytes & (src_align - 1))
23751 == (align_bytes & (src_align - 1)))
23752 break;
23753 }
23754 if (src_align > (unsigned int) desired_align)
23755 src_align = desired_align;
23756 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
23757 set_mem_align (src, src_align * BITS_PER_UNIT);
23758 }
23759 if (MEM_SIZE_KNOWN_P (orig_src))
23760 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
23761 *srcp = src;
23762 }
23763
23764 return dst;
23765 }
23766
23767 /* Return true if ALG can be used in current context.
23768 Assume we expand memset if MEMSET is true. */
23769 static bool
23770 alg_usable_p (enum stringop_alg alg, bool memset)
23771 {
23772 if (alg == no_stringop)
23773 return false;
23774 if (alg == vector_loop)
23775 return TARGET_SSE || TARGET_AVX;
23776 /* Algorithms using the rep prefix want at least edi and ecx;
23777 additionally, memset wants eax and memcpy wants esi. Don't
23778 consider such algorithms if the user has appropriated those
23779 registers for their own purposes. */
23780 if (alg == rep_prefix_1_byte
23781 || alg == rep_prefix_4_byte
23782 || alg == rep_prefix_8_byte)
23783 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
23784 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
23785 return true;
23786 }
23787
23788 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
23789 static enum stringop_alg
23790 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
23791 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
23792 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
23793 {
23794 const struct stringop_algs * algs;
23795 bool optimize_for_speed;
23796 int max = -1;
23797 const struct processor_costs *cost;
23798 int i;
23799 bool any_alg_usable_p = false;
23800
23801 *noalign = false;
23802 *dynamic_check = -1;
23803
23804 /* Even if the string operation call is cold, we still might spend a lot
23805 of time processing large blocks. */
23806 if (optimize_function_for_size_p (cfun)
23807 || (optimize_insn_for_size_p ()
23808 && (max_size < 256
23809 || (expected_size != -1 && expected_size < 256))))
23810 optimize_for_speed = false;
23811 else
23812 optimize_for_speed = true;
23813
23814 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
23815 if (memset)
23816 algs = &cost->memset[TARGET_64BIT != 0];
23817 else
23818 algs = &cost->memcpy[TARGET_64BIT != 0];
23819
23820 /* See maximal size for user defined algorithm. */
23821 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23822 {
23823 enum stringop_alg candidate = algs->size[i].alg;
23824 bool usable = alg_usable_p (candidate, memset);
23825 any_alg_usable_p |= usable;
23826
23827 if (candidate != libcall && candidate && usable)
23828 max = algs->size[i].max;
23829 }
23830
23831 /* If expected size is not known but max size is small enough
23832 so inline version is a win, set expected size into
23833 the range. */
23834 if (max > 1 && (unsigned HOST_WIDE_INT) max >= max_size
23835 && expected_size == -1)
23836 expected_size = min_size / 2 + max_size / 2;
23837
23838 /* If user specified the algorithm, honnor it if possible. */
23839 if (ix86_stringop_alg != no_stringop
23840 && alg_usable_p (ix86_stringop_alg, memset))
23841 return ix86_stringop_alg;
23842 /* rep; movq or rep; movl is the smallest variant. */
23843 else if (!optimize_for_speed)
23844 {
23845 *noalign = true;
23846 if (!count || (count & 3) || (memset && !zero_memset))
23847 return alg_usable_p (rep_prefix_1_byte, memset)
23848 ? rep_prefix_1_byte : loop_1_byte;
23849 else
23850 return alg_usable_p (rep_prefix_4_byte, memset)
23851 ? rep_prefix_4_byte : loop;
23852 }
23853 /* Very tiny blocks are best handled via the loop, REP is expensive to
23854 setup. */
23855 else if (expected_size != -1 && expected_size < 4)
23856 return loop_1_byte;
23857 else if (expected_size != -1)
23858 {
23859 enum stringop_alg alg = libcall;
23860 bool alg_noalign = false;
23861 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23862 {
23863 /* We get here if the algorithms that were not libcall-based
23864 were rep-prefix based and we are unable to use rep prefixes
23865 based on global register usage. Break out of the loop and
23866 use the heuristic below. */
23867 if (algs->size[i].max == 0)
23868 break;
23869 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
23870 {
23871 enum stringop_alg candidate = algs->size[i].alg;
23872
23873 if (candidate != libcall && alg_usable_p (candidate, memset))
23874 {
23875 alg = candidate;
23876 alg_noalign = algs->size[i].noalign;
23877 }
23878 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
23879 last non-libcall inline algorithm. */
23880 if (TARGET_INLINE_ALL_STRINGOPS)
23881 {
23882 /* When the current size is best to be copied by a libcall,
23883 but we are still forced to inline, run the heuristic below
23884 that will pick code for medium sized blocks. */
23885 if (alg != libcall)
23886 {
23887 *noalign = alg_noalign;
23888 return alg;
23889 }
23890 break;
23891 }
23892 else if (alg_usable_p (candidate, memset))
23893 {
23894 *noalign = algs->size[i].noalign;
23895 return candidate;
23896 }
23897 }
23898 }
23899 }
23900 /* When asked to inline the call anyway, try to pick meaningful choice.
23901 We look for maximal size of block that is faster to copy by hand and
23902 take blocks of at most of that size guessing that average size will
23903 be roughly half of the block.
23904
23905 If this turns out to be bad, we might simply specify the preferred
23906 choice in ix86_costs. */
23907 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23908 && (algs->unknown_size == libcall
23909 || !alg_usable_p (algs->unknown_size, memset)))
23910 {
23911 enum stringop_alg alg;
23912
23913 /* If there aren't any usable algorithms, then recursing on
23914 smaller sizes isn't going to find anything. Just return the
23915 simple byte-at-a-time copy loop. */
23916 if (!any_alg_usable_p)
23917 {
23918 /* Pick something reasonable. */
23919 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23920 *dynamic_check = 128;
23921 return loop_1_byte;
23922 }
23923 if (max == -1)
23924 max = 4096;
23925 alg = decide_alg (count, max / 2, min_size, max_size, memset,
23926 zero_memset, dynamic_check, noalign);
23927 gcc_assert (*dynamic_check == -1);
23928 gcc_assert (alg != libcall);
23929 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23930 *dynamic_check = max;
23931 return alg;
23932 }
23933 return (alg_usable_p (algs->unknown_size, memset)
23934 ? algs->unknown_size : libcall);
23935 }
23936
23937 /* Decide on alignment. We know that the operand is already aligned to ALIGN
23938 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
23939 static int
23940 decide_alignment (int align,
23941 enum stringop_alg alg,
23942 int expected_size,
23943 enum machine_mode move_mode)
23944 {
23945 int desired_align = 0;
23946
23947 gcc_assert (alg != no_stringop);
23948
23949 if (alg == libcall)
23950 return 0;
23951 if (move_mode == VOIDmode)
23952 return 0;
23953
23954 desired_align = GET_MODE_SIZE (move_mode);
23955 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
23956 copying whole cacheline at once. */
23957 if (TARGET_PENTIUMPRO
23958 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
23959 desired_align = 8;
23960
23961 if (optimize_size)
23962 desired_align = 1;
23963 if (desired_align < align)
23964 desired_align = align;
23965 if (expected_size != -1 && expected_size < 4)
23966 desired_align = align;
23967
23968 return desired_align;
23969 }
23970
23971
23972 /* Helper function for memcpy. For QImode value 0xXY produce
23973 0xXYXYXYXY of wide specified by MODE. This is essentially
23974 a * 0x10101010, but we can do slightly better than
23975 synth_mult by unwinding the sequence by hand on CPUs with
23976 slow multiply. */
23977 static rtx
23978 promote_duplicated_reg (enum machine_mode mode, rtx val)
23979 {
23980 enum machine_mode valmode = GET_MODE (val);
23981 rtx tmp;
23982 int nops = mode == DImode ? 3 : 2;
23983
23984 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
23985 if (val == const0_rtx)
23986 return copy_to_mode_reg (mode, CONST0_RTX (mode));
23987 if (CONST_INT_P (val))
23988 {
23989 HOST_WIDE_INT v = INTVAL (val) & 255;
23990
23991 v |= v << 8;
23992 v |= v << 16;
23993 if (mode == DImode)
23994 v |= (v << 16) << 16;
23995 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
23996 }
23997
23998 if (valmode == VOIDmode)
23999 valmode = QImode;
24000 if (valmode != QImode)
24001 val = gen_lowpart (QImode, val);
24002 if (mode == QImode)
24003 return val;
24004 if (!TARGET_PARTIAL_REG_STALL)
24005 nops--;
24006 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24007 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24008 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24009 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24010 {
24011 rtx reg = convert_modes (mode, QImode, val, true);
24012 tmp = promote_duplicated_reg (mode, const1_rtx);
24013 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24014 OPTAB_DIRECT);
24015 }
24016 else
24017 {
24018 rtx reg = convert_modes (mode, QImode, val, true);
24019
24020 if (!TARGET_PARTIAL_REG_STALL)
24021 if (mode == SImode)
24022 emit_insn (gen_movsi_insv_1 (reg, reg));
24023 else
24024 emit_insn (gen_movdi_insv_1 (reg, reg));
24025 else
24026 {
24027 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24028 NULL, 1, OPTAB_DIRECT);
24029 reg =
24030 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24031 }
24032 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24033 NULL, 1, OPTAB_DIRECT);
24034 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24035 if (mode == SImode)
24036 return reg;
24037 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24038 NULL, 1, OPTAB_DIRECT);
24039 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24040 return reg;
24041 }
24042 }
24043
24044 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24045 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24046 alignment from ALIGN to DESIRED_ALIGN. */
24047 static rtx
24048 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24049 int align)
24050 {
24051 rtx promoted_val;
24052
24053 if (TARGET_64BIT
24054 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24055 promoted_val = promote_duplicated_reg (DImode, val);
24056 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24057 promoted_val = promote_duplicated_reg (SImode, val);
24058 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24059 promoted_val = promote_duplicated_reg (HImode, val);
24060 else
24061 promoted_val = val;
24062
24063 return promoted_val;
24064 }
24065
24066 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24067 operations when profitable. The code depends upon architecture, block size
24068 and alignment, but always has one of the following overall structures:
24069
24070 Aligned move sequence:
24071
24072 1) Prologue guard: Conditional that jumps up to epilogues for small
24073 blocks that can be handled by epilogue alone. This is faster
24074 but also needed for correctness, since prologue assume the block
24075 is larger than the desired alignment.
24076
24077 Optional dynamic check for size and libcall for large
24078 blocks is emitted here too, with -minline-stringops-dynamically.
24079
24080 2) Prologue: copy first few bytes in order to get destination
24081 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24082 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24083 copied. We emit either a jump tree on power of two sized
24084 blocks, or a byte loop.
24085
24086 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24087 with specified algorithm.
24088
24089 4) Epilogue: code copying tail of the block that is too small to be
24090 handled by main body (or up to size guarded by prologue guard).
24091
24092 Misaligned move sequence
24093
24094 1) missaligned move prologue/epilogue containing:
24095 a) Prologue handling small memory blocks and jumping to done_label
24096 (skipped if blocks are known to be large enough)
24097 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24098 needed by single possibly misaligned move
24099 (skipped if alignment is not needed)
24100 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24101
24102 2) Zero size guard dispatching to done_label, if needed
24103
24104 3) dispatch to library call, if needed,
24105
24106 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24107 with specified algorithm. */
24108 bool
24109 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24110 rtx align_exp, rtx expected_align_exp,
24111 rtx expected_size_exp, rtx min_size_exp,
24112 rtx max_size_exp, rtx probable_max_size_exp,
24113 bool issetmem)
24114 {
24115 rtx destreg;
24116 rtx srcreg = NULL;
24117 rtx label = NULL;
24118 rtx tmp;
24119 rtx jump_around_label = NULL;
24120 HOST_WIDE_INT align = 1;
24121 unsigned HOST_WIDE_INT count = 0;
24122 HOST_WIDE_INT expected_size = -1;
24123 int size_needed = 0, epilogue_size_needed;
24124 int desired_align = 0, align_bytes = 0;
24125 enum stringop_alg alg;
24126 rtx promoted_val = NULL;
24127 rtx vec_promoted_val = NULL;
24128 bool force_loopy_epilogue = false;
24129 int dynamic_check;
24130 bool need_zero_guard = false;
24131 bool noalign;
24132 enum machine_mode move_mode = VOIDmode;
24133 int unroll_factor = 1;
24134 /* TODO: Once value ranges are available, fill in proper data. */
24135 unsigned HOST_WIDE_INT min_size = 0;
24136 unsigned HOST_WIDE_INT max_size = -1;
24137 unsigned HOST_WIDE_INT probable_max_size = -1;
24138 bool misaligned_prologue_used = false;
24139
24140 if (CONST_INT_P (align_exp))
24141 align = INTVAL (align_exp);
24142 /* i386 can do misaligned access on reasonably increased cost. */
24143 if (CONST_INT_P (expected_align_exp)
24144 && INTVAL (expected_align_exp) > align)
24145 align = INTVAL (expected_align_exp);
24146 /* ALIGN is the minimum of destination and source alignment, but we care here
24147 just about destination alignment. */
24148 else if (!issetmem
24149 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24150 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24151
24152 if (CONST_INT_P (count_exp))
24153 min_size = max_size = probable_max_size = count = expected_size
24154 = INTVAL (count_exp);
24155 else
24156 {
24157 if (min_size_exp)
24158 min_size = INTVAL (min_size_exp);
24159 if (max_size_exp)
24160 max_size = INTVAL (max_size_exp);
24161 if (probable_max_size_exp)
24162 probable_max_size = INTVAL (probable_max_size_exp);
24163 if (CONST_INT_P (expected_size_exp) && count == 0)
24164 expected_size = INTVAL (expected_size_exp);
24165 }
24166
24167 /* Make sure we don't need to care about overflow later on. */
24168 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
24169 return false;
24170
24171 /* Step 0: Decide on preferred algorithm, desired alignment and
24172 size of chunks to be copied by main loop. */
24173 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24174 issetmem,
24175 issetmem && val_exp == const0_rtx,
24176 &dynamic_check, &noalign);
24177 if (alg == libcall)
24178 return false;
24179 gcc_assert (alg != no_stringop);
24180
24181 /* For now vector-version of memset is generated only for memory zeroing, as
24182 creating of promoted vector value is very cheap in this case. */
24183 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24184 alg = unrolled_loop;
24185
24186 if (!count)
24187 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24188 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24189 if (!issetmem)
24190 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24191
24192 unroll_factor = 1;
24193 move_mode = word_mode;
24194 switch (alg)
24195 {
24196 case libcall:
24197 case no_stringop:
24198 case last_alg:
24199 gcc_unreachable ();
24200 case loop_1_byte:
24201 need_zero_guard = true;
24202 move_mode = QImode;
24203 break;
24204 case loop:
24205 need_zero_guard = true;
24206 break;
24207 case unrolled_loop:
24208 need_zero_guard = true;
24209 unroll_factor = (TARGET_64BIT ? 4 : 2);
24210 break;
24211 case vector_loop:
24212 need_zero_guard = true;
24213 unroll_factor = 4;
24214 /* Find the widest supported mode. */
24215 move_mode = word_mode;
24216 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24217 != CODE_FOR_nothing)
24218 move_mode = GET_MODE_WIDER_MODE (move_mode);
24219
24220 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24221 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24222 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24223 {
24224 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24225 move_mode = mode_for_vector (word_mode, nunits);
24226 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24227 move_mode = word_mode;
24228 }
24229 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24230 break;
24231 case rep_prefix_8_byte:
24232 move_mode = DImode;
24233 break;
24234 case rep_prefix_4_byte:
24235 move_mode = SImode;
24236 break;
24237 case rep_prefix_1_byte:
24238 move_mode = QImode;
24239 break;
24240 }
24241 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24242 epilogue_size_needed = size_needed;
24243
24244 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24245 if (!TARGET_ALIGN_STRINGOPS || noalign)
24246 align = desired_align;
24247
24248 /* Step 1: Prologue guard. */
24249
24250 /* Alignment code needs count to be in register. */
24251 if (CONST_INT_P (count_exp) && desired_align > align)
24252 {
24253 if (INTVAL (count_exp) > desired_align
24254 && INTVAL (count_exp) > size_needed)
24255 {
24256 align_bytes
24257 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24258 if (align_bytes <= 0)
24259 align_bytes = 0;
24260 else
24261 align_bytes = desired_align - align_bytes;
24262 }
24263 if (align_bytes == 0)
24264 count_exp = force_reg (counter_mode (count_exp), count_exp);
24265 }
24266 gcc_assert (desired_align >= 1 && align >= 1);
24267
24268 /* Misaligned move sequences handle both prologue and epilogue at once.
24269 Default code generation results in a smaller code for large alignments
24270 and also avoids redundant job when sizes are known precisely. */
24271 misaligned_prologue_used
24272 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24273 && MAX (desired_align, epilogue_size_needed) <= 32
24274 && desired_align <= epilogue_size_needed
24275 && ((desired_align > align && !align_bytes)
24276 || (!count && epilogue_size_needed > 1)));
24277
24278 /* Do the cheap promotion to allow better CSE across the
24279 main loop and epilogue (ie one load of the big constant in the
24280 front of all code.
24281 For now the misaligned move sequences do not have fast path
24282 without broadcasting. */
24283 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
24284 {
24285 if (alg == vector_loop)
24286 {
24287 gcc_assert (val_exp == const0_rtx);
24288 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
24289 promoted_val = promote_duplicated_reg_to_size (val_exp,
24290 GET_MODE_SIZE (word_mode),
24291 desired_align, align);
24292 }
24293 else
24294 {
24295 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24296 desired_align, align);
24297 }
24298 }
24299 /* Misaligned move sequences handles both prologues and epilogues at once.
24300 Default code generation results in smaller code for large alignments and
24301 also avoids redundant job when sizes are known precisely. */
24302 if (misaligned_prologue_used)
24303 {
24304 /* Misaligned move prologue handled small blocks by itself. */
24305 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
24306 (dst, src, &destreg, &srcreg,
24307 move_mode, promoted_val, vec_promoted_val,
24308 &count_exp,
24309 &jump_around_label,
24310 desired_align < align
24311 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
24312 desired_align, align, &min_size, dynamic_check, issetmem);
24313 if (!issetmem)
24314 src = change_address (src, BLKmode, srcreg);
24315 dst = change_address (dst, BLKmode, destreg);
24316 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24317 epilogue_size_needed = 0;
24318 if (need_zero_guard && !min_size)
24319 {
24320 /* It is possible that we copied enough so the main loop will not
24321 execute. */
24322 gcc_assert (size_needed > 1);
24323 if (jump_around_label == NULL_RTX)
24324 jump_around_label = gen_label_rtx ();
24325 emit_cmp_and_jump_insns (count_exp,
24326 GEN_INT (size_needed),
24327 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
24328 if (expected_size == -1
24329 || expected_size < (desired_align - align) / 2 + size_needed)
24330 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24331 else
24332 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24333 }
24334 }
24335 /* Ensure that alignment prologue won't copy past end of block. */
24336 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
24337 {
24338 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
24339 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
24340 Make sure it is power of 2. */
24341 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
24342
24343 /* To improve performance of small blocks, we jump around the VAL
24344 promoting mode. This mean that if the promoted VAL is not constant,
24345 we might not use it in the epilogue and have to use byte
24346 loop variant. */
24347 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
24348 force_loopy_epilogue = true;
24349 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24350 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24351 {
24352 /* If main algorithm works on QImode, no epilogue is needed.
24353 For small sizes just don't align anything. */
24354 if (size_needed == 1)
24355 desired_align = align;
24356 else
24357 goto epilogue;
24358 }
24359 else if (!count
24360 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24361 {
24362 label = gen_label_rtx ();
24363 emit_cmp_and_jump_insns (count_exp,
24364 GEN_INT (epilogue_size_needed),
24365 LTU, 0, counter_mode (count_exp), 1, label);
24366 if (expected_size == -1 || expected_size < epilogue_size_needed)
24367 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24368 else
24369 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24370 }
24371 }
24372
24373 /* Emit code to decide on runtime whether library call or inline should be
24374 used. */
24375 if (dynamic_check != -1)
24376 {
24377 if (!issetmem && CONST_INT_P (count_exp))
24378 {
24379 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
24380 {
24381 emit_block_move_via_libcall (dst, src, count_exp, false);
24382 count_exp = const0_rtx;
24383 goto epilogue;
24384 }
24385 }
24386 else
24387 {
24388 rtx hot_label = gen_label_rtx ();
24389 if (jump_around_label == NULL_RTX)
24390 jump_around_label = gen_label_rtx ();
24391 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
24392 LEU, 0, GET_MODE (count_exp), 1, hot_label);
24393 predict_jump (REG_BR_PROB_BASE * 90 / 100);
24394 if (issetmem)
24395 set_storage_via_libcall (dst, count_exp, val_exp, false);
24396 else
24397 emit_block_move_via_libcall (dst, src, count_exp, false);
24398 emit_jump (jump_around_label);
24399 emit_label (hot_label);
24400 }
24401 }
24402
24403 /* Step 2: Alignment prologue. */
24404 /* Do the expensive promotion once we branched off the small blocks. */
24405 if (issetmem && !promoted_val)
24406 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24407 desired_align, align);
24408
24409 if (desired_align > align && !misaligned_prologue_used)
24410 {
24411 if (align_bytes == 0)
24412 {
24413 /* Except for the first move in prologue, we no longer know
24414 constant offset in aliasing info. It don't seems to worth
24415 the pain to maintain it for the first move, so throw away
24416 the info early. */
24417 dst = change_address (dst, BLKmode, destreg);
24418 if (!issetmem)
24419 src = change_address (src, BLKmode, srcreg);
24420 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
24421 promoted_val, vec_promoted_val,
24422 count_exp, align, desired_align,
24423 issetmem);
24424 /* At most desired_align - align bytes are copied. */
24425 if (min_size < (unsigned)(desired_align - align))
24426 min_size = 0;
24427 else
24428 min_size -= desired_align - align;
24429 }
24430 else
24431 {
24432 /* If we know how many bytes need to be stored before dst is
24433 sufficiently aligned, maintain aliasing info accurately. */
24434 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
24435 srcreg,
24436 promoted_val,
24437 vec_promoted_val,
24438 desired_align,
24439 align_bytes,
24440 issetmem);
24441
24442 count_exp = plus_constant (counter_mode (count_exp),
24443 count_exp, -align_bytes);
24444 count -= align_bytes;
24445 min_size -= align_bytes;
24446 max_size -= align_bytes;
24447 }
24448 if (need_zero_guard
24449 && !min_size
24450 && (count < (unsigned HOST_WIDE_INT) size_needed
24451 || (align_bytes == 0
24452 && count < ((unsigned HOST_WIDE_INT) size_needed
24453 + desired_align - align))))
24454 {
24455 /* It is possible that we copied enough so the main loop will not
24456 execute. */
24457 gcc_assert (size_needed > 1);
24458 if (label == NULL_RTX)
24459 label = gen_label_rtx ();
24460 emit_cmp_and_jump_insns (count_exp,
24461 GEN_INT (size_needed),
24462 LTU, 0, counter_mode (count_exp), 1, label);
24463 if (expected_size == -1
24464 || expected_size < (desired_align - align) / 2 + size_needed)
24465 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24466 else
24467 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24468 }
24469 }
24470 if (label && size_needed == 1)
24471 {
24472 emit_label (label);
24473 LABEL_NUSES (label) = 1;
24474 label = NULL;
24475 epilogue_size_needed = 1;
24476 if (issetmem)
24477 promoted_val = val_exp;
24478 }
24479 else if (label == NULL_RTX && !misaligned_prologue_used)
24480 epilogue_size_needed = size_needed;
24481
24482 /* Step 3: Main loop. */
24483
24484 switch (alg)
24485 {
24486 case libcall:
24487 case no_stringop:
24488 case last_alg:
24489 gcc_unreachable ();
24490 case loop_1_byte:
24491 case loop:
24492 case unrolled_loop:
24493 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
24494 count_exp, move_mode, unroll_factor,
24495 expected_size, issetmem);
24496 break;
24497 case vector_loop:
24498 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
24499 vec_promoted_val, count_exp, move_mode,
24500 unroll_factor, expected_size, issetmem);
24501 break;
24502 case rep_prefix_8_byte:
24503 case rep_prefix_4_byte:
24504 case rep_prefix_1_byte:
24505 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
24506 val_exp, count_exp, move_mode, issetmem);
24507 break;
24508 }
24509 /* Adjust properly the offset of src and dest memory for aliasing. */
24510 if (CONST_INT_P (count_exp))
24511 {
24512 if (!issetmem)
24513 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
24514 (count / size_needed) * size_needed);
24515 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
24516 (count / size_needed) * size_needed);
24517 }
24518 else
24519 {
24520 if (!issetmem)
24521 src = change_address (src, BLKmode, srcreg);
24522 dst = change_address (dst, BLKmode, destreg);
24523 }
24524
24525 /* Step 4: Epilogue to copy the remaining bytes. */
24526 epilogue:
24527 if (label)
24528 {
24529 /* When the main loop is done, COUNT_EXP might hold original count,
24530 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
24531 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
24532 bytes. Compensate if needed. */
24533
24534 if (size_needed < epilogue_size_needed)
24535 {
24536 tmp =
24537 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
24538 GEN_INT (size_needed - 1), count_exp, 1,
24539 OPTAB_DIRECT);
24540 if (tmp != count_exp)
24541 emit_move_insn (count_exp, tmp);
24542 }
24543 emit_label (label);
24544 LABEL_NUSES (label) = 1;
24545 }
24546
24547 if (count_exp != const0_rtx && epilogue_size_needed > 1)
24548 {
24549 if (force_loopy_epilogue)
24550 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
24551 epilogue_size_needed);
24552 else
24553 {
24554 if (issetmem)
24555 expand_setmem_epilogue (dst, destreg, promoted_val,
24556 vec_promoted_val, count_exp,
24557 epilogue_size_needed);
24558 else
24559 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
24560 epilogue_size_needed);
24561 }
24562 }
24563 if (jump_around_label)
24564 emit_label (jump_around_label);
24565 return true;
24566 }
24567
24568
24569 /* Expand the appropriate insns for doing strlen if not just doing
24570 repnz; scasb
24571
24572 out = result, initialized with the start address
24573 align_rtx = alignment of the address.
24574 scratch = scratch register, initialized with the startaddress when
24575 not aligned, otherwise undefined
24576
24577 This is just the body. It needs the initializations mentioned above and
24578 some address computing at the end. These things are done in i386.md. */
24579
24580 static void
24581 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
24582 {
24583 int align;
24584 rtx tmp;
24585 rtx align_2_label = NULL_RTX;
24586 rtx align_3_label = NULL_RTX;
24587 rtx align_4_label = gen_label_rtx ();
24588 rtx end_0_label = gen_label_rtx ();
24589 rtx mem;
24590 rtx tmpreg = gen_reg_rtx (SImode);
24591 rtx scratch = gen_reg_rtx (SImode);
24592 rtx cmp;
24593
24594 align = 0;
24595 if (CONST_INT_P (align_rtx))
24596 align = INTVAL (align_rtx);
24597
24598 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
24599
24600 /* Is there a known alignment and is it less than 4? */
24601 if (align < 4)
24602 {
24603 rtx scratch1 = gen_reg_rtx (Pmode);
24604 emit_move_insn (scratch1, out);
24605 /* Is there a known alignment and is it not 2? */
24606 if (align != 2)
24607 {
24608 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
24609 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
24610
24611 /* Leave just the 3 lower bits. */
24612 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
24613 NULL_RTX, 0, OPTAB_WIDEN);
24614
24615 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24616 Pmode, 1, align_4_label);
24617 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
24618 Pmode, 1, align_2_label);
24619 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
24620 Pmode, 1, align_3_label);
24621 }
24622 else
24623 {
24624 /* Since the alignment is 2, we have to check 2 or 0 bytes;
24625 check if is aligned to 4 - byte. */
24626
24627 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
24628 NULL_RTX, 0, OPTAB_WIDEN);
24629
24630 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24631 Pmode, 1, align_4_label);
24632 }
24633
24634 mem = change_address (src, QImode, out);
24635
24636 /* Now compare the bytes. */
24637
24638 /* Compare the first n unaligned byte on a byte per byte basis. */
24639 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
24640 QImode, 1, end_0_label);
24641
24642 /* Increment the address. */
24643 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24644
24645 /* Not needed with an alignment of 2 */
24646 if (align != 2)
24647 {
24648 emit_label (align_2_label);
24649
24650 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24651 end_0_label);
24652
24653 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24654
24655 emit_label (align_3_label);
24656 }
24657
24658 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24659 end_0_label);
24660
24661 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24662 }
24663
24664 /* Generate loop to check 4 bytes at a time. It is not a good idea to
24665 align this loop. It gives only huge programs, but does not help to
24666 speed up. */
24667 emit_label (align_4_label);
24668
24669 mem = change_address (src, SImode, out);
24670 emit_move_insn (scratch, mem);
24671 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
24672
24673 /* This formula yields a nonzero result iff one of the bytes is zero.
24674 This saves three branches inside loop and many cycles. */
24675
24676 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
24677 emit_insn (gen_one_cmplsi2 (scratch, scratch));
24678 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
24679 emit_insn (gen_andsi3 (tmpreg, tmpreg,
24680 gen_int_mode (0x80808080, SImode)));
24681 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
24682 align_4_label);
24683
24684 if (TARGET_CMOVE)
24685 {
24686 rtx reg = gen_reg_rtx (SImode);
24687 rtx reg2 = gen_reg_rtx (Pmode);
24688 emit_move_insn (reg, tmpreg);
24689 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
24690
24691 /* If zero is not in the first two bytes, move two bytes forward. */
24692 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24693 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24694 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24695 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
24696 gen_rtx_IF_THEN_ELSE (SImode, tmp,
24697 reg,
24698 tmpreg)));
24699 /* Emit lea manually to avoid clobbering of flags. */
24700 emit_insn (gen_rtx_SET (SImode, reg2,
24701 gen_rtx_PLUS (Pmode, out, const2_rtx)));
24702
24703 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24704 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24705 emit_insn (gen_rtx_SET (VOIDmode, out,
24706 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
24707 reg2,
24708 out)));
24709 }
24710 else
24711 {
24712 rtx end_2_label = gen_label_rtx ();
24713 /* Is zero in the first two bytes? */
24714
24715 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24716 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24717 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
24718 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
24719 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
24720 pc_rtx);
24721 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
24722 JUMP_LABEL (tmp) = end_2_label;
24723
24724 /* Not in the first two. Move two bytes forward. */
24725 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
24726 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
24727
24728 emit_label (end_2_label);
24729
24730 }
24731
24732 /* Avoid branch in fixing the byte. */
24733 tmpreg = gen_lowpart (QImode, tmpreg);
24734 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
24735 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
24736 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
24737 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
24738
24739 emit_label (end_0_label);
24740 }
24741
24742 /* Expand strlen. */
24743
24744 bool
24745 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
24746 {
24747 rtx addr, scratch1, scratch2, scratch3, scratch4;
24748
24749 /* The generic case of strlen expander is long. Avoid it's
24750 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
24751
24752 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24753 && !TARGET_INLINE_ALL_STRINGOPS
24754 && !optimize_insn_for_size_p ()
24755 && (!CONST_INT_P (align) || INTVAL (align) < 4))
24756 return false;
24757
24758 addr = force_reg (Pmode, XEXP (src, 0));
24759 scratch1 = gen_reg_rtx (Pmode);
24760
24761 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24762 && !optimize_insn_for_size_p ())
24763 {
24764 /* Well it seems that some optimizer does not combine a call like
24765 foo(strlen(bar), strlen(bar));
24766 when the move and the subtraction is done here. It does calculate
24767 the length just once when these instructions are done inside of
24768 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
24769 often used and I use one fewer register for the lifetime of
24770 output_strlen_unroll() this is better. */
24771
24772 emit_move_insn (out, addr);
24773
24774 ix86_expand_strlensi_unroll_1 (out, src, align);
24775
24776 /* strlensi_unroll_1 returns the address of the zero at the end of
24777 the string, like memchr(), so compute the length by subtracting
24778 the start address. */
24779 emit_insn (ix86_gen_sub3 (out, out, addr));
24780 }
24781 else
24782 {
24783 rtx unspec;
24784
24785 /* Can't use this if the user has appropriated eax, ecx, or edi. */
24786 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
24787 return false;
24788
24789 scratch2 = gen_reg_rtx (Pmode);
24790 scratch3 = gen_reg_rtx (Pmode);
24791 scratch4 = force_reg (Pmode, constm1_rtx);
24792
24793 emit_move_insn (scratch3, addr);
24794 eoschar = force_reg (QImode, eoschar);
24795
24796 src = replace_equiv_address_nv (src, scratch3);
24797
24798 /* If .md starts supporting :P, this can be done in .md. */
24799 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
24800 scratch4), UNSPEC_SCAS);
24801 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
24802 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
24803 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
24804 }
24805 return true;
24806 }
24807
24808 /* For given symbol (function) construct code to compute address of it's PLT
24809 entry in large x86-64 PIC model. */
24810 static rtx
24811 construct_plt_address (rtx symbol)
24812 {
24813 rtx tmp, unspec;
24814
24815 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
24816 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
24817 gcc_assert (Pmode == DImode);
24818
24819 tmp = gen_reg_rtx (Pmode);
24820 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
24821
24822 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
24823 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
24824 return tmp;
24825 }
24826
24827 rtx
24828 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
24829 rtx callarg2,
24830 rtx pop, bool sibcall)
24831 {
24832 unsigned int const cregs_size
24833 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
24834 rtx vec[3 + cregs_size];
24835 rtx use = NULL, call;
24836 unsigned int vec_len = 0;
24837
24838 if (pop == const0_rtx)
24839 pop = NULL;
24840 gcc_assert (!TARGET_64BIT || !pop);
24841
24842 if (TARGET_MACHO && !TARGET_64BIT)
24843 {
24844 #if TARGET_MACHO
24845 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
24846 fnaddr = machopic_indirect_call_target (fnaddr);
24847 #endif
24848 }
24849 else
24850 {
24851 /* Static functions and indirect calls don't need the pic register. */
24852 if (flag_pic
24853 && (!TARGET_64BIT
24854 || (ix86_cmodel == CM_LARGE_PIC
24855 && DEFAULT_ABI != MS_ABI))
24856 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24857 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
24858 use_reg (&use, pic_offset_table_rtx);
24859 }
24860
24861 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
24862 {
24863 rtx al = gen_rtx_REG (QImode, AX_REG);
24864 emit_move_insn (al, callarg2);
24865 use_reg (&use, al);
24866 }
24867
24868 if (ix86_cmodel == CM_LARGE_PIC
24869 && !TARGET_PECOFF
24870 && MEM_P (fnaddr)
24871 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24872 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
24873 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
24874 else if (sibcall
24875 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
24876 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
24877 {
24878 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
24879 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
24880 }
24881
24882 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
24883 if (retval)
24884 call = gen_rtx_SET (VOIDmode, retval, call);
24885 vec[vec_len++] = call;
24886
24887 if (pop)
24888 {
24889 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
24890 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
24891 vec[vec_len++] = pop;
24892 }
24893
24894 if (TARGET_64BIT_MS_ABI
24895 && (!callarg2 || INTVAL (callarg2) != -2))
24896 {
24897 unsigned i;
24898
24899 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
24900 UNSPEC_MS_TO_SYSV_CALL);
24901
24902 for (i = 0; i < cregs_size; i++)
24903 {
24904 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
24905 enum machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
24906
24907 vec[vec_len++]
24908 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (mode, regno));
24909 }
24910 }
24911
24912 if (vec_len > 1)
24913 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
24914 call = emit_call_insn (call);
24915 if (use)
24916 CALL_INSN_FUNCTION_USAGE (call) = use;
24917
24918 return call;
24919 }
24920
24921 /* Output the assembly for a call instruction. */
24922
24923 const char *
24924 ix86_output_call_insn (rtx insn, rtx call_op)
24925 {
24926 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
24927 bool seh_nop_p = false;
24928 const char *xasm;
24929
24930 if (SIBLING_CALL_P (insn))
24931 {
24932 if (direct_p)
24933 xasm = "jmp\t%P0";
24934 /* SEH epilogue detection requires the indirect branch case
24935 to include REX.W. */
24936 else if (TARGET_SEH)
24937 xasm = "rex.W jmp %A0";
24938 else
24939 xasm = "jmp\t%A0";
24940
24941 output_asm_insn (xasm, &call_op);
24942 return "";
24943 }
24944
24945 /* SEH unwinding can require an extra nop to be emitted in several
24946 circumstances. Determine if we have one of those. */
24947 if (TARGET_SEH)
24948 {
24949 rtx i;
24950
24951 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
24952 {
24953 /* If we get to another real insn, we don't need the nop. */
24954 if (INSN_P (i))
24955 break;
24956
24957 /* If we get to the epilogue note, prevent a catch region from
24958 being adjacent to the standard epilogue sequence. If non-
24959 call-exceptions, we'll have done this during epilogue emission. */
24960 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
24961 && !flag_non_call_exceptions
24962 && !can_throw_internal (insn))
24963 {
24964 seh_nop_p = true;
24965 break;
24966 }
24967 }
24968
24969 /* If we didn't find a real insn following the call, prevent the
24970 unwinder from looking into the next function. */
24971 if (i == NULL)
24972 seh_nop_p = true;
24973 }
24974
24975 if (direct_p)
24976 xasm = "call\t%P0";
24977 else
24978 xasm = "call\t%A0";
24979
24980 output_asm_insn (xasm, &call_op);
24981
24982 if (seh_nop_p)
24983 return "nop";
24984
24985 return "";
24986 }
24987 \f
24988 /* Clear stack slot assignments remembered from previous functions.
24989 This is called from INIT_EXPANDERS once before RTL is emitted for each
24990 function. */
24991
24992 static struct machine_function *
24993 ix86_init_machine_status (void)
24994 {
24995 struct machine_function *f;
24996
24997 f = ggc_alloc_cleared_machine_function ();
24998 f->use_fast_prologue_epilogue_nregs = -1;
24999 f->call_abi = ix86_abi;
25000
25001 return f;
25002 }
25003
25004 /* Return a MEM corresponding to a stack slot with mode MODE.
25005 Allocate a new slot if necessary.
25006
25007 The RTL for a function can have several slots available: N is
25008 which slot to use. */
25009
25010 rtx
25011 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
25012 {
25013 struct stack_local_entry *s;
25014
25015 gcc_assert (n < MAX_386_STACK_LOCALS);
25016
25017 for (s = ix86_stack_locals; s; s = s->next)
25018 if (s->mode == mode && s->n == n)
25019 return validize_mem (copy_rtx (s->rtl));
25020
25021 s = ggc_alloc_stack_local_entry ();
25022 s->n = n;
25023 s->mode = mode;
25024 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25025
25026 s->next = ix86_stack_locals;
25027 ix86_stack_locals = s;
25028 return validize_mem (s->rtl);
25029 }
25030
25031 static void
25032 ix86_instantiate_decls (void)
25033 {
25034 struct stack_local_entry *s;
25035
25036 for (s = ix86_stack_locals; s; s = s->next)
25037 if (s->rtl != NULL_RTX)
25038 instantiate_decl_rtl (s->rtl);
25039 }
25040 \f
25041 /* Check whether x86 address PARTS is a pc-relative address. */
25042
25043 static bool
25044 rip_relative_addr_p (struct ix86_address *parts)
25045 {
25046 rtx base, index, disp;
25047
25048 base = parts->base;
25049 index = parts->index;
25050 disp = parts->disp;
25051
25052 if (disp && !base && !index)
25053 {
25054 if (TARGET_64BIT)
25055 {
25056 rtx symbol = disp;
25057
25058 if (GET_CODE (disp) == CONST)
25059 symbol = XEXP (disp, 0);
25060 if (GET_CODE (symbol) == PLUS
25061 && CONST_INT_P (XEXP (symbol, 1)))
25062 symbol = XEXP (symbol, 0);
25063
25064 if (GET_CODE (symbol) == LABEL_REF
25065 || (GET_CODE (symbol) == SYMBOL_REF
25066 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25067 || (GET_CODE (symbol) == UNSPEC
25068 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25069 || XINT (symbol, 1) == UNSPEC_PCREL
25070 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25071 return true;
25072 }
25073 }
25074 return false;
25075 }
25076
25077 /* Calculate the length of the memory address in the instruction encoding.
25078 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25079 or other prefixes. We never generate addr32 prefix for LEA insn. */
25080
25081 int
25082 memory_address_length (rtx addr, bool lea)
25083 {
25084 struct ix86_address parts;
25085 rtx base, index, disp;
25086 int len;
25087 int ok;
25088
25089 if (GET_CODE (addr) == PRE_DEC
25090 || GET_CODE (addr) == POST_INC
25091 || GET_CODE (addr) == PRE_MODIFY
25092 || GET_CODE (addr) == POST_MODIFY)
25093 return 0;
25094
25095 ok = ix86_decompose_address (addr, &parts);
25096 gcc_assert (ok);
25097
25098 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25099
25100 /* If this is not LEA instruction, add the length of addr32 prefix. */
25101 if (TARGET_64BIT && !lea
25102 && (SImode_address_operand (addr, VOIDmode)
25103 || (parts.base && GET_MODE (parts.base) == SImode)
25104 || (parts.index && GET_MODE (parts.index) == SImode)))
25105 len++;
25106
25107 base = parts.base;
25108 index = parts.index;
25109 disp = parts.disp;
25110
25111 if (base && GET_CODE (base) == SUBREG)
25112 base = SUBREG_REG (base);
25113 if (index && GET_CODE (index) == SUBREG)
25114 index = SUBREG_REG (index);
25115
25116 gcc_assert (base == NULL_RTX || REG_P (base));
25117 gcc_assert (index == NULL_RTX || REG_P (index));
25118
25119 /* Rule of thumb:
25120 - esp as the base always wants an index,
25121 - ebp as the base always wants a displacement,
25122 - r12 as the base always wants an index,
25123 - r13 as the base always wants a displacement. */
25124
25125 /* Register Indirect. */
25126 if (base && !index && !disp)
25127 {
25128 /* esp (for its index) and ebp (for its displacement) need
25129 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25130 code. */
25131 if (base == arg_pointer_rtx
25132 || base == frame_pointer_rtx
25133 || REGNO (base) == SP_REG
25134 || REGNO (base) == BP_REG
25135 || REGNO (base) == R12_REG
25136 || REGNO (base) == R13_REG)
25137 len++;
25138 }
25139
25140 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25141 is not disp32, but disp32(%rip), so for disp32
25142 SIB byte is needed, unless print_operand_address
25143 optimizes it into disp32(%rip) or (%rip) is implied
25144 by UNSPEC. */
25145 else if (disp && !base && !index)
25146 {
25147 len += 4;
25148 if (rip_relative_addr_p (&parts))
25149 len++;
25150 }
25151 else
25152 {
25153 /* Find the length of the displacement constant. */
25154 if (disp)
25155 {
25156 if (base && satisfies_constraint_K (disp))
25157 len += 1;
25158 else
25159 len += 4;
25160 }
25161 /* ebp always wants a displacement. Similarly r13. */
25162 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25163 len++;
25164
25165 /* An index requires the two-byte modrm form.... */
25166 if (index
25167 /* ...like esp (or r12), which always wants an index. */
25168 || base == arg_pointer_rtx
25169 || base == frame_pointer_rtx
25170 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25171 len++;
25172 }
25173
25174 return len;
25175 }
25176
25177 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25178 is set, expect that insn have 8bit immediate alternative. */
25179 int
25180 ix86_attr_length_immediate_default (rtx insn, bool shortform)
25181 {
25182 int len = 0;
25183 int i;
25184 extract_insn_cached (insn);
25185 for (i = recog_data.n_operands - 1; i >= 0; --i)
25186 if (CONSTANT_P (recog_data.operand[i]))
25187 {
25188 enum attr_mode mode = get_attr_mode (insn);
25189
25190 gcc_assert (!len);
25191 if (shortform && CONST_INT_P (recog_data.operand[i]))
25192 {
25193 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25194 switch (mode)
25195 {
25196 case MODE_QI:
25197 len = 1;
25198 continue;
25199 case MODE_HI:
25200 ival = trunc_int_for_mode (ival, HImode);
25201 break;
25202 case MODE_SI:
25203 ival = trunc_int_for_mode (ival, SImode);
25204 break;
25205 default:
25206 break;
25207 }
25208 if (IN_RANGE (ival, -128, 127))
25209 {
25210 len = 1;
25211 continue;
25212 }
25213 }
25214 switch (mode)
25215 {
25216 case MODE_QI:
25217 len = 1;
25218 break;
25219 case MODE_HI:
25220 len = 2;
25221 break;
25222 case MODE_SI:
25223 len = 4;
25224 break;
25225 /* Immediates for DImode instructions are encoded
25226 as 32bit sign extended values. */
25227 case MODE_DI:
25228 len = 4;
25229 break;
25230 default:
25231 fatal_insn ("unknown insn mode", insn);
25232 }
25233 }
25234 return len;
25235 }
25236
25237 /* Compute default value for "length_address" attribute. */
25238 int
25239 ix86_attr_length_address_default (rtx insn)
25240 {
25241 int i;
25242
25243 if (get_attr_type (insn) == TYPE_LEA)
25244 {
25245 rtx set = PATTERN (insn), addr;
25246
25247 if (GET_CODE (set) == PARALLEL)
25248 set = XVECEXP (set, 0, 0);
25249
25250 gcc_assert (GET_CODE (set) == SET);
25251
25252 addr = SET_SRC (set);
25253
25254 return memory_address_length (addr, true);
25255 }
25256
25257 extract_insn_cached (insn);
25258 for (i = recog_data.n_operands - 1; i >= 0; --i)
25259 if (MEM_P (recog_data.operand[i]))
25260 {
25261 constrain_operands_cached (reload_completed);
25262 if (which_alternative != -1)
25263 {
25264 const char *constraints = recog_data.constraints[i];
25265 int alt = which_alternative;
25266
25267 while (*constraints == '=' || *constraints == '+')
25268 constraints++;
25269 while (alt-- > 0)
25270 while (*constraints++ != ',')
25271 ;
25272 /* Skip ignored operands. */
25273 if (*constraints == 'X')
25274 continue;
25275 }
25276 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
25277 }
25278 return 0;
25279 }
25280
25281 /* Compute default value for "length_vex" attribute. It includes
25282 2 or 3 byte VEX prefix and 1 opcode byte. */
25283
25284 int
25285 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
25286 {
25287 int i;
25288
25289 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
25290 byte VEX prefix. */
25291 if (!has_0f_opcode || has_vex_w)
25292 return 3 + 1;
25293
25294 /* We can always use 2 byte VEX prefix in 32bit. */
25295 if (!TARGET_64BIT)
25296 return 2 + 1;
25297
25298 extract_insn_cached (insn);
25299
25300 for (i = recog_data.n_operands - 1; i >= 0; --i)
25301 if (REG_P (recog_data.operand[i]))
25302 {
25303 /* REX.W bit uses 3 byte VEX prefix. */
25304 if (GET_MODE (recog_data.operand[i]) == DImode
25305 && GENERAL_REG_P (recog_data.operand[i]))
25306 return 3 + 1;
25307 }
25308 else
25309 {
25310 /* REX.X or REX.B bits use 3 byte VEX prefix. */
25311 if (MEM_P (recog_data.operand[i])
25312 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
25313 return 3 + 1;
25314 }
25315
25316 return 2 + 1;
25317 }
25318 \f
25319 /* Return the maximum number of instructions a cpu can issue. */
25320
25321 static int
25322 ix86_issue_rate (void)
25323 {
25324 switch (ix86_tune)
25325 {
25326 case PROCESSOR_PENTIUM:
25327 case PROCESSOR_BONNELL:
25328 case PROCESSOR_SILVERMONT:
25329 case PROCESSOR_INTEL:
25330 case PROCESSOR_K6:
25331 case PROCESSOR_BTVER2:
25332 case PROCESSOR_PENTIUM4:
25333 case PROCESSOR_NOCONA:
25334 return 2;
25335
25336 case PROCESSOR_PENTIUMPRO:
25337 case PROCESSOR_ATHLON:
25338 case PROCESSOR_K8:
25339 case PROCESSOR_AMDFAM10:
25340 case PROCESSOR_GENERIC:
25341 case PROCESSOR_BTVER1:
25342 return 3;
25343
25344 case PROCESSOR_BDVER1:
25345 case PROCESSOR_BDVER2:
25346 case PROCESSOR_BDVER3:
25347 case PROCESSOR_BDVER4:
25348 case PROCESSOR_CORE2:
25349 case PROCESSOR_NEHALEM:
25350 case PROCESSOR_SANDYBRIDGE:
25351 case PROCESSOR_HASWELL:
25352 return 4;
25353
25354 default:
25355 return 1;
25356 }
25357 }
25358
25359 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
25360 by DEP_INSN and nothing set by DEP_INSN. */
25361
25362 static bool
25363 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
25364 {
25365 rtx set, set2;
25366
25367 /* Simplify the test for uninteresting insns. */
25368 if (insn_type != TYPE_SETCC
25369 && insn_type != TYPE_ICMOV
25370 && insn_type != TYPE_FCMOV
25371 && insn_type != TYPE_IBR)
25372 return false;
25373
25374 if ((set = single_set (dep_insn)) != 0)
25375 {
25376 set = SET_DEST (set);
25377 set2 = NULL_RTX;
25378 }
25379 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
25380 && XVECLEN (PATTERN (dep_insn), 0) == 2
25381 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
25382 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
25383 {
25384 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25385 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25386 }
25387 else
25388 return false;
25389
25390 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
25391 return false;
25392
25393 /* This test is true if the dependent insn reads the flags but
25394 not any other potentially set register. */
25395 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
25396 return false;
25397
25398 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
25399 return false;
25400
25401 return true;
25402 }
25403
25404 /* Return true iff USE_INSN has a memory address with operands set by
25405 SET_INSN. */
25406
25407 bool
25408 ix86_agi_dependent (rtx set_insn, rtx use_insn)
25409 {
25410 int i;
25411 extract_insn_cached (use_insn);
25412 for (i = recog_data.n_operands - 1; i >= 0; --i)
25413 if (MEM_P (recog_data.operand[i]))
25414 {
25415 rtx addr = XEXP (recog_data.operand[i], 0);
25416 return modified_in_p (addr, set_insn) != 0;
25417 }
25418 return false;
25419 }
25420
25421 /* Helper function for exact_store_load_dependency.
25422 Return true if addr is found in insn. */
25423 static bool
25424 exact_dependency_1 (rtx addr, rtx insn)
25425 {
25426 enum rtx_code code;
25427 const char *format_ptr;
25428 int i, j;
25429
25430 code = GET_CODE (insn);
25431 switch (code)
25432 {
25433 case MEM:
25434 if (rtx_equal_p (addr, insn))
25435 return true;
25436 break;
25437 case REG:
25438 CASE_CONST_ANY:
25439 case SYMBOL_REF:
25440 case CODE_LABEL:
25441 case PC:
25442 case CC0:
25443 case EXPR_LIST:
25444 return false;
25445 default:
25446 break;
25447 }
25448
25449 format_ptr = GET_RTX_FORMAT (code);
25450 for (i = 0; i < GET_RTX_LENGTH (code); i++)
25451 {
25452 switch (*format_ptr++)
25453 {
25454 case 'e':
25455 if (exact_dependency_1 (addr, XEXP (insn, i)))
25456 return true;
25457 break;
25458 case 'E':
25459 for (j = 0; j < XVECLEN (insn, i); j++)
25460 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
25461 return true;
25462 break;
25463 }
25464 }
25465 return false;
25466 }
25467
25468 /* Return true if there exists exact dependency for store & load, i.e.
25469 the same memory address is used in them. */
25470 static bool
25471 exact_store_load_dependency (rtx store, rtx load)
25472 {
25473 rtx set1, set2;
25474
25475 set1 = single_set (store);
25476 if (!set1)
25477 return false;
25478 if (!MEM_P (SET_DEST (set1)))
25479 return false;
25480 set2 = single_set (load);
25481 if (!set2)
25482 return false;
25483 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
25484 return true;
25485 return false;
25486 }
25487
25488 static int
25489 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
25490 {
25491 enum attr_type insn_type, dep_insn_type;
25492 enum attr_memory memory;
25493 rtx set, set2;
25494 int dep_insn_code_number;
25495
25496 /* Anti and output dependencies have zero cost on all CPUs. */
25497 if (REG_NOTE_KIND (link) != 0)
25498 return 0;
25499
25500 dep_insn_code_number = recog_memoized (dep_insn);
25501
25502 /* If we can't recognize the insns, we can't really do anything. */
25503 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
25504 return cost;
25505
25506 insn_type = get_attr_type (insn);
25507 dep_insn_type = get_attr_type (dep_insn);
25508
25509 switch (ix86_tune)
25510 {
25511 case PROCESSOR_PENTIUM:
25512 /* Address Generation Interlock adds a cycle of latency. */
25513 if (insn_type == TYPE_LEA)
25514 {
25515 rtx addr = PATTERN (insn);
25516
25517 if (GET_CODE (addr) == PARALLEL)
25518 addr = XVECEXP (addr, 0, 0);
25519
25520 gcc_assert (GET_CODE (addr) == SET);
25521
25522 addr = SET_SRC (addr);
25523 if (modified_in_p (addr, dep_insn))
25524 cost += 1;
25525 }
25526 else if (ix86_agi_dependent (dep_insn, insn))
25527 cost += 1;
25528
25529 /* ??? Compares pair with jump/setcc. */
25530 if (ix86_flags_dependent (insn, dep_insn, insn_type))
25531 cost = 0;
25532
25533 /* Floating point stores require value to be ready one cycle earlier. */
25534 if (insn_type == TYPE_FMOV
25535 && get_attr_memory (insn) == MEMORY_STORE
25536 && !ix86_agi_dependent (dep_insn, insn))
25537 cost += 1;
25538 break;
25539
25540 case PROCESSOR_PENTIUMPRO:
25541 /* INT->FP conversion is expensive. */
25542 if (get_attr_fp_int_src (dep_insn))
25543 cost += 5;
25544
25545 /* There is one cycle extra latency between an FP op and a store. */
25546 if (insn_type == TYPE_FMOV
25547 && (set = single_set (dep_insn)) != NULL_RTX
25548 && (set2 = single_set (insn)) != NULL_RTX
25549 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
25550 && MEM_P (SET_DEST (set2)))
25551 cost += 1;
25552
25553 memory = get_attr_memory (insn);
25554
25555 /* Show ability of reorder buffer to hide latency of load by executing
25556 in parallel with previous instruction in case
25557 previous instruction is not needed to compute the address. */
25558 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25559 && !ix86_agi_dependent (dep_insn, insn))
25560 {
25561 /* Claim moves to take one cycle, as core can issue one load
25562 at time and the next load can start cycle later. */
25563 if (dep_insn_type == TYPE_IMOV
25564 || dep_insn_type == TYPE_FMOV)
25565 cost = 1;
25566 else if (cost > 1)
25567 cost--;
25568 }
25569 break;
25570
25571 case PROCESSOR_K6:
25572 /* The esp dependency is resolved before
25573 the instruction is really finished. */
25574 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25575 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25576 return 1;
25577
25578 /* INT->FP conversion is expensive. */
25579 if (get_attr_fp_int_src (dep_insn))
25580 cost += 5;
25581
25582 memory = get_attr_memory (insn);
25583
25584 /* Show ability of reorder buffer to hide latency of load by executing
25585 in parallel with previous instruction in case
25586 previous instruction is not needed to compute the address. */
25587 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25588 && !ix86_agi_dependent (dep_insn, insn))
25589 {
25590 /* Claim moves to take one cycle, as core can issue one load
25591 at time and the next load can start cycle later. */
25592 if (dep_insn_type == TYPE_IMOV
25593 || dep_insn_type == TYPE_FMOV)
25594 cost = 1;
25595 else if (cost > 2)
25596 cost -= 2;
25597 else
25598 cost = 1;
25599 }
25600 break;
25601
25602 case PROCESSOR_AMDFAM10:
25603 case PROCESSOR_BDVER1:
25604 case PROCESSOR_BDVER2:
25605 case PROCESSOR_BDVER3:
25606 case PROCESSOR_BDVER4:
25607 case PROCESSOR_BTVER1:
25608 case PROCESSOR_BTVER2:
25609 case PROCESSOR_GENERIC:
25610 /* Stack engine allows to execute push&pop instructions in parall. */
25611 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25612 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25613 return 0;
25614 /* FALLTHRU */
25615
25616 case PROCESSOR_ATHLON:
25617 case PROCESSOR_K8:
25618 memory = get_attr_memory (insn);
25619
25620 /* Show ability of reorder buffer to hide latency of load by executing
25621 in parallel with previous instruction in case
25622 previous instruction is not needed to compute the address. */
25623 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25624 && !ix86_agi_dependent (dep_insn, insn))
25625 {
25626 enum attr_unit unit = get_attr_unit (insn);
25627 int loadcost = 3;
25628
25629 /* Because of the difference between the length of integer and
25630 floating unit pipeline preparation stages, the memory operands
25631 for floating point are cheaper.
25632
25633 ??? For Athlon it the difference is most probably 2. */
25634 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
25635 loadcost = 3;
25636 else
25637 loadcost = TARGET_ATHLON ? 2 : 0;
25638
25639 if (cost >= loadcost)
25640 cost -= loadcost;
25641 else
25642 cost = 0;
25643 }
25644 break;
25645
25646 case PROCESSOR_CORE2:
25647 case PROCESSOR_NEHALEM:
25648 case PROCESSOR_SANDYBRIDGE:
25649 case PROCESSOR_HASWELL:
25650 /* Stack engine allows to execute push&pop instructions in parall. */
25651 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25652 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25653 return 0;
25654
25655 memory = get_attr_memory (insn);
25656
25657 /* Show ability of reorder buffer to hide latency of load by executing
25658 in parallel with previous instruction in case
25659 previous instruction is not needed to compute the address. */
25660 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25661 && !ix86_agi_dependent (dep_insn, insn))
25662 {
25663 if (cost >= 4)
25664 cost -= 4;
25665 else
25666 cost = 0;
25667 }
25668 break;
25669
25670 case PROCESSOR_SILVERMONT:
25671 case PROCESSOR_INTEL:
25672 if (!reload_completed)
25673 return cost;
25674
25675 /* Increase cost of integer loads. */
25676 memory = get_attr_memory (dep_insn);
25677 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25678 {
25679 enum attr_unit unit = get_attr_unit (dep_insn);
25680 if (unit == UNIT_INTEGER && cost == 1)
25681 {
25682 if (memory == MEMORY_LOAD)
25683 cost = 3;
25684 else
25685 {
25686 /* Increase cost of ld/st for short int types only
25687 because of store forwarding issue. */
25688 rtx set = single_set (dep_insn);
25689 if (set && (GET_MODE (SET_DEST (set)) == QImode
25690 || GET_MODE (SET_DEST (set)) == HImode))
25691 {
25692 /* Increase cost of store/load insn if exact
25693 dependence exists and it is load insn. */
25694 enum attr_memory insn_memory = get_attr_memory (insn);
25695 if (insn_memory == MEMORY_LOAD
25696 && exact_store_load_dependency (dep_insn, insn))
25697 cost = 3;
25698 }
25699 }
25700 }
25701 }
25702
25703 default:
25704 break;
25705 }
25706
25707 return cost;
25708 }
25709
25710 /* How many alternative schedules to try. This should be as wide as the
25711 scheduling freedom in the DFA, but no wider. Making this value too
25712 large results extra work for the scheduler. */
25713
25714 static int
25715 ia32_multipass_dfa_lookahead (void)
25716 {
25717 switch (ix86_tune)
25718 {
25719 case PROCESSOR_PENTIUM:
25720 return 2;
25721
25722 case PROCESSOR_PENTIUMPRO:
25723 case PROCESSOR_K6:
25724 return 1;
25725
25726 case PROCESSOR_BDVER1:
25727 case PROCESSOR_BDVER2:
25728 case PROCESSOR_BDVER3:
25729 case PROCESSOR_BDVER4:
25730 /* We use lookahead value 4 for BD both before and after reload
25731 schedules. Plan is to have value 8 included for O3. */
25732 return 4;
25733
25734 case PROCESSOR_CORE2:
25735 case PROCESSOR_NEHALEM:
25736 case PROCESSOR_SANDYBRIDGE:
25737 case PROCESSOR_HASWELL:
25738 case PROCESSOR_BONNELL:
25739 case PROCESSOR_SILVERMONT:
25740 case PROCESSOR_INTEL:
25741 /* Generally, we want haifa-sched:max_issue() to look ahead as far
25742 as many instructions can be executed on a cycle, i.e.,
25743 issue_rate. I wonder why tuning for many CPUs does not do this. */
25744 if (reload_completed)
25745 return ix86_issue_rate ();
25746 /* Don't use lookahead for pre-reload schedule to save compile time. */
25747 return 0;
25748
25749 default:
25750 return 0;
25751 }
25752 }
25753
25754 /* Return true if target platform supports macro-fusion. */
25755
25756 static bool
25757 ix86_macro_fusion_p ()
25758 {
25759 return TARGET_FUSE_CMP_AND_BRANCH;
25760 }
25761
25762 /* Check whether current microarchitecture support macro fusion
25763 for insn pair "CONDGEN + CONDJMP". Refer to
25764 "Intel Architectures Optimization Reference Manual". */
25765
25766 static bool
25767 ix86_macro_fusion_pair_p (rtx condgen, rtx condjmp)
25768 {
25769 rtx src, dest;
25770 rtx single_set = single_set (condgen);
25771 enum rtx_code ccode;
25772 rtx compare_set = NULL_RTX, test_if, cond;
25773 rtx alu_set = NULL_RTX, addr = NULL_RTX;
25774
25775 if (get_attr_type (condgen) != TYPE_TEST
25776 && get_attr_type (condgen) != TYPE_ICMP
25777 && get_attr_type (condgen) != TYPE_INCDEC
25778 && get_attr_type (condgen) != TYPE_ALU)
25779 return false;
25780
25781 if (single_set == NULL_RTX
25782 && !TARGET_FUSE_ALU_AND_BRANCH)
25783 return false;
25784
25785 if (single_set != NULL_RTX)
25786 compare_set = single_set;
25787 else
25788 {
25789 int i;
25790 rtx pat = PATTERN (condgen);
25791 for (i = 0; i < XVECLEN (pat, 0); i++)
25792 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
25793 {
25794 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
25795 if (GET_CODE (set_src) == COMPARE)
25796 compare_set = XVECEXP (pat, 0, i);
25797 else
25798 alu_set = XVECEXP (pat, 0, i);
25799 }
25800 }
25801 if (compare_set == NULL_RTX)
25802 return false;
25803 src = SET_SRC (compare_set);
25804 if (GET_CODE (src) != COMPARE)
25805 return false;
25806
25807 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
25808 supported. */
25809 if ((MEM_P (XEXP (src, 0))
25810 && CONST_INT_P (XEXP (src, 1)))
25811 || (MEM_P (XEXP (src, 1))
25812 && CONST_INT_P (XEXP (src, 0))))
25813 return false;
25814
25815 /* No fusion for RIP-relative address. */
25816 if (MEM_P (XEXP (src, 0)))
25817 addr = XEXP (XEXP (src, 0), 0);
25818 else if (MEM_P (XEXP (src, 1)))
25819 addr = XEXP (XEXP (src, 1), 0);
25820
25821 if (addr) {
25822 ix86_address parts;
25823 int ok = ix86_decompose_address (addr, &parts);
25824 gcc_assert (ok);
25825
25826 if (rip_relative_addr_p (&parts))
25827 return false;
25828 }
25829
25830 test_if = SET_SRC (pc_set (condjmp));
25831 cond = XEXP (test_if, 0);
25832 ccode = GET_CODE (cond);
25833 /* Check whether conditional jump use Sign or Overflow Flags. */
25834 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
25835 && (ccode == GE
25836 || ccode == GT
25837 || ccode == LE
25838 || ccode == LT))
25839 return false;
25840
25841 /* Return true for TYPE_TEST and TYPE_ICMP. */
25842 if (get_attr_type (condgen) == TYPE_TEST
25843 || get_attr_type (condgen) == TYPE_ICMP)
25844 return true;
25845
25846 /* The following is the case that macro-fusion for alu + jmp. */
25847 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
25848 return false;
25849
25850 /* No fusion for alu op with memory destination operand. */
25851 dest = SET_DEST (alu_set);
25852 if (MEM_P (dest))
25853 return false;
25854
25855 /* Macro-fusion for inc/dec + unsigned conditional jump is not
25856 supported. */
25857 if (get_attr_type (condgen) == TYPE_INCDEC
25858 && (ccode == GEU
25859 || ccode == GTU
25860 || ccode == LEU
25861 || ccode == LTU))
25862 return false;
25863
25864 return true;
25865 }
25866
25867 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
25868 execution. It is applied if
25869 (1) IMUL instruction is on the top of list;
25870 (2) There exists the only producer of independent IMUL instruction in
25871 ready list.
25872 Return index of IMUL producer if it was found and -1 otherwise. */
25873 static int
25874 do_reorder_for_imul (rtx *ready, int n_ready)
25875 {
25876 rtx insn, set, insn1, insn2;
25877 sd_iterator_def sd_it;
25878 dep_t dep;
25879 int index = -1;
25880 int i;
25881
25882 if (!TARGET_BONNELL)
25883 return index;
25884
25885 /* Check that IMUL instruction is on the top of ready list. */
25886 insn = ready[n_ready - 1];
25887 set = single_set (insn);
25888 if (!set)
25889 return index;
25890 if (!(GET_CODE (SET_SRC (set)) == MULT
25891 && GET_MODE (SET_SRC (set)) == SImode))
25892 return index;
25893
25894 /* Search for producer of independent IMUL instruction. */
25895 for (i = n_ready - 2; i >= 0; i--)
25896 {
25897 insn = ready[i];
25898 if (!NONDEBUG_INSN_P (insn))
25899 continue;
25900 /* Skip IMUL instruction. */
25901 insn2 = PATTERN (insn);
25902 if (GET_CODE (insn2) == PARALLEL)
25903 insn2 = XVECEXP (insn2, 0, 0);
25904 if (GET_CODE (insn2) == SET
25905 && GET_CODE (SET_SRC (insn2)) == MULT
25906 && GET_MODE (SET_SRC (insn2)) == SImode)
25907 continue;
25908
25909 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
25910 {
25911 rtx con;
25912 con = DEP_CON (dep);
25913 if (!NONDEBUG_INSN_P (con))
25914 continue;
25915 insn1 = PATTERN (con);
25916 if (GET_CODE (insn1) == PARALLEL)
25917 insn1 = XVECEXP (insn1, 0, 0);
25918
25919 if (GET_CODE (insn1) == SET
25920 && GET_CODE (SET_SRC (insn1)) == MULT
25921 && GET_MODE (SET_SRC (insn1)) == SImode)
25922 {
25923 sd_iterator_def sd_it1;
25924 dep_t dep1;
25925 /* Check if there is no other dependee for IMUL. */
25926 index = i;
25927 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
25928 {
25929 rtx pro;
25930 pro = DEP_PRO (dep1);
25931 if (!NONDEBUG_INSN_P (pro))
25932 continue;
25933 if (pro != insn)
25934 index = -1;
25935 }
25936 if (index >= 0)
25937 break;
25938 }
25939 }
25940 if (index >= 0)
25941 break;
25942 }
25943 return index;
25944 }
25945
25946 /* Try to find the best candidate on the top of ready list if two insns
25947 have the same priority - candidate is best if its dependees were
25948 scheduled earlier. Applied for Silvermont only.
25949 Return true if top 2 insns must be interchanged. */
25950 static bool
25951 swap_top_of_ready_list (rtx *ready, int n_ready)
25952 {
25953 rtx top = ready[n_ready - 1];
25954 rtx next = ready[n_ready - 2];
25955 rtx set;
25956 sd_iterator_def sd_it;
25957 dep_t dep;
25958 int clock1 = -1;
25959 int clock2 = -1;
25960 #define INSN_TICK(INSN) (HID (INSN)->tick)
25961
25962 if (!TARGET_SILVERMONT && !TARGET_INTEL)
25963 return false;
25964
25965 if (!NONDEBUG_INSN_P (top))
25966 return false;
25967 if (!NONJUMP_INSN_P (top))
25968 return false;
25969 if (!NONDEBUG_INSN_P (next))
25970 return false;
25971 if (!NONJUMP_INSN_P (next))
25972 return false;
25973 set = single_set (top);
25974 if (!set)
25975 return false;
25976 set = single_set (next);
25977 if (!set)
25978 return false;
25979
25980 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
25981 {
25982 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
25983 return false;
25984 /* Determine winner more precise. */
25985 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
25986 {
25987 rtx pro;
25988 pro = DEP_PRO (dep);
25989 if (!NONDEBUG_INSN_P (pro))
25990 continue;
25991 if (INSN_TICK (pro) > clock1)
25992 clock1 = INSN_TICK (pro);
25993 }
25994 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
25995 {
25996 rtx pro;
25997 pro = DEP_PRO (dep);
25998 if (!NONDEBUG_INSN_P (pro))
25999 continue;
26000 if (INSN_TICK (pro) > clock2)
26001 clock2 = INSN_TICK (pro);
26002 }
26003
26004 if (clock1 == clock2)
26005 {
26006 /* Determine winner - load must win. */
26007 enum attr_memory memory1, memory2;
26008 memory1 = get_attr_memory (top);
26009 memory2 = get_attr_memory (next);
26010 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26011 return true;
26012 }
26013 return (bool) (clock2 < clock1);
26014 }
26015 return false;
26016 #undef INSN_TICK
26017 }
26018
26019 /* Perform possible reodering of ready list for Atom/Silvermont only.
26020 Return issue rate. */
26021 static int
26022 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
26023 int clock_var)
26024 {
26025 int issue_rate = -1;
26026 int n_ready = *pn_ready;
26027 int i;
26028 rtx insn;
26029 int index = -1;
26030
26031 /* Set up issue rate. */
26032 issue_rate = ix86_issue_rate ();
26033
26034 /* Do reodering for BONNELL/SILVERMONT only. */
26035 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26036 return issue_rate;
26037
26038 /* Nothing to do if ready list contains only 1 instruction. */
26039 if (n_ready <= 1)
26040 return issue_rate;
26041
26042 /* Do reodering for post-reload scheduler only. */
26043 if (!reload_completed)
26044 return issue_rate;
26045
26046 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26047 {
26048 if (sched_verbose > 1)
26049 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26050 INSN_UID (ready[index]));
26051
26052 /* Put IMUL producer (ready[index]) at the top of ready list. */
26053 insn = ready[index];
26054 for (i = index; i < n_ready - 1; i++)
26055 ready[i] = ready[i + 1];
26056 ready[n_ready - 1] = insn;
26057 return issue_rate;
26058 }
26059 if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
26060 {
26061 if (sched_verbose > 1)
26062 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26063 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26064 /* Swap 2 top elements of ready list. */
26065 insn = ready[n_ready - 1];
26066 ready[n_ready - 1] = ready[n_ready - 2];
26067 ready[n_ready - 2] = insn;
26068 }
26069 return issue_rate;
26070 }
26071
26072 static bool
26073 ix86_class_likely_spilled_p (reg_class_t);
26074
26075 /* Returns true if lhs of insn is HW function argument register and set up
26076 is_spilled to true if it is likely spilled HW register. */
26077 static bool
26078 insn_is_function_arg (rtx insn, bool* is_spilled)
26079 {
26080 rtx dst;
26081
26082 if (!NONDEBUG_INSN_P (insn))
26083 return false;
26084 /* Call instructions are not movable, ignore it. */
26085 if (CALL_P (insn))
26086 return false;
26087 insn = PATTERN (insn);
26088 if (GET_CODE (insn) == PARALLEL)
26089 insn = XVECEXP (insn, 0, 0);
26090 if (GET_CODE (insn) != SET)
26091 return false;
26092 dst = SET_DEST (insn);
26093 if (REG_P (dst) && HARD_REGISTER_P (dst)
26094 && ix86_function_arg_regno_p (REGNO (dst)))
26095 {
26096 /* Is it likely spilled HW register? */
26097 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26098 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26099 *is_spilled = true;
26100 return true;
26101 }
26102 return false;
26103 }
26104
26105 /* Add output dependencies for chain of function adjacent arguments if only
26106 there is a move to likely spilled HW register. Return first argument
26107 if at least one dependence was added or NULL otherwise. */
26108 static rtx
26109 add_parameter_dependencies (rtx call, rtx head)
26110 {
26111 rtx insn;
26112 rtx last = call;
26113 rtx first_arg = NULL;
26114 bool is_spilled = false;
26115
26116 head = PREV_INSN (head);
26117
26118 /* Find nearest to call argument passing instruction. */
26119 while (true)
26120 {
26121 last = PREV_INSN (last);
26122 if (last == head)
26123 return NULL;
26124 if (!NONDEBUG_INSN_P (last))
26125 continue;
26126 if (insn_is_function_arg (last, &is_spilled))
26127 break;
26128 return NULL;
26129 }
26130
26131 first_arg = last;
26132 while (true)
26133 {
26134 insn = PREV_INSN (last);
26135 if (!INSN_P (insn))
26136 break;
26137 if (insn == head)
26138 break;
26139 if (!NONDEBUG_INSN_P (insn))
26140 {
26141 last = insn;
26142 continue;
26143 }
26144 if (insn_is_function_arg (insn, &is_spilled))
26145 {
26146 /* Add output depdendence between two function arguments if chain
26147 of output arguments contains likely spilled HW registers. */
26148 if (is_spilled)
26149 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26150 first_arg = last = insn;
26151 }
26152 else
26153 break;
26154 }
26155 if (!is_spilled)
26156 return NULL;
26157 return first_arg;
26158 }
26159
26160 /* Add output or anti dependency from insn to first_arg to restrict its code
26161 motion. */
26162 static void
26163 avoid_func_arg_motion (rtx first_arg, rtx insn)
26164 {
26165 rtx set;
26166 rtx tmp;
26167
26168 set = single_set (insn);
26169 if (!set)
26170 return;
26171 tmp = SET_DEST (set);
26172 if (REG_P (tmp))
26173 {
26174 /* Add output dependency to the first function argument. */
26175 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26176 return;
26177 }
26178 /* Add anti dependency. */
26179 add_dependence (first_arg, insn, REG_DEP_ANTI);
26180 }
26181
26182 /* Avoid cross block motion of function argument through adding dependency
26183 from the first non-jump instruction in bb. */
26184 static void
26185 add_dependee_for_func_arg (rtx arg, basic_block bb)
26186 {
26187 rtx insn = BB_END (bb);
26188
26189 while (insn)
26190 {
26191 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26192 {
26193 rtx set = single_set (insn);
26194 if (set)
26195 {
26196 avoid_func_arg_motion (arg, insn);
26197 return;
26198 }
26199 }
26200 if (insn == BB_HEAD (bb))
26201 return;
26202 insn = PREV_INSN (insn);
26203 }
26204 }
26205
26206 /* Hook for pre-reload schedule - avoid motion of function arguments
26207 passed in likely spilled HW registers. */
26208 static void
26209 ix86_dependencies_evaluation_hook (rtx head, rtx tail)
26210 {
26211 rtx insn;
26212 rtx first_arg = NULL;
26213 if (reload_completed)
26214 return;
26215 while (head != tail && DEBUG_INSN_P (head))
26216 head = NEXT_INSN (head);
26217 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26218 if (INSN_P (insn) && CALL_P (insn))
26219 {
26220 first_arg = add_parameter_dependencies (insn, head);
26221 if (first_arg)
26222 {
26223 /* Add dependee for first argument to predecessors if only
26224 region contains more than one block. */
26225 basic_block bb = BLOCK_FOR_INSN (insn);
26226 int rgn = CONTAINING_RGN (bb->index);
26227 int nr_blks = RGN_NR_BLOCKS (rgn);
26228 /* Skip trivial regions and region head blocks that can have
26229 predecessors outside of region. */
26230 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26231 {
26232 edge e;
26233 edge_iterator ei;
26234 /* Assume that region is SCC, i.e. all immediate predecessors
26235 of non-head block are in the same region. */
26236 FOR_EACH_EDGE (e, ei, bb->preds)
26237 {
26238 /* Avoid creating of loop-carried dependencies through
26239 using topological odering in region. */
26240 if (BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
26241 add_dependee_for_func_arg (first_arg, e->src);
26242 }
26243 }
26244 insn = first_arg;
26245 if (insn == head)
26246 break;
26247 }
26248 }
26249 else if (first_arg)
26250 avoid_func_arg_motion (first_arg, insn);
26251 }
26252
26253 /* Hook for pre-reload schedule - set priority of moves from likely spilled
26254 HW registers to maximum, to schedule them at soon as possible. These are
26255 moves from function argument registers at the top of the function entry
26256 and moves from function return value registers after call. */
26257 static int
26258 ix86_adjust_priority (rtx insn, int priority)
26259 {
26260 rtx set;
26261
26262 if (reload_completed)
26263 return priority;
26264
26265 if (!NONDEBUG_INSN_P (insn))
26266 return priority;
26267
26268 set = single_set (insn);
26269 if (set)
26270 {
26271 rtx tmp = SET_SRC (set);
26272 if (REG_P (tmp)
26273 && HARD_REGISTER_P (tmp)
26274 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
26275 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
26276 return current_sched_info->sched_max_insns_priority;
26277 }
26278
26279 return priority;
26280 }
26281
26282 /* Model decoder of Core 2/i7.
26283 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
26284 track the instruction fetch block boundaries and make sure that long
26285 (9+ bytes) instructions are assigned to D0. */
26286
26287 /* Maximum length of an insn that can be handled by
26288 a secondary decoder unit. '8' for Core 2/i7. */
26289 static int core2i7_secondary_decoder_max_insn_size;
26290
26291 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
26292 '16' for Core 2/i7. */
26293 static int core2i7_ifetch_block_size;
26294
26295 /* Maximum number of instructions decoder can handle per cycle.
26296 '6' for Core 2/i7. */
26297 static int core2i7_ifetch_block_max_insns;
26298
26299 typedef struct ix86_first_cycle_multipass_data_ *
26300 ix86_first_cycle_multipass_data_t;
26301 typedef const struct ix86_first_cycle_multipass_data_ *
26302 const_ix86_first_cycle_multipass_data_t;
26303
26304 /* A variable to store target state across calls to max_issue within
26305 one cycle. */
26306 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
26307 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
26308
26309 /* Initialize DATA. */
26310 static void
26311 core2i7_first_cycle_multipass_init (void *_data)
26312 {
26313 ix86_first_cycle_multipass_data_t data
26314 = (ix86_first_cycle_multipass_data_t) _data;
26315
26316 data->ifetch_block_len = 0;
26317 data->ifetch_block_n_insns = 0;
26318 data->ready_try_change = NULL;
26319 data->ready_try_change_size = 0;
26320 }
26321
26322 /* Advancing the cycle; reset ifetch block counts. */
26323 static void
26324 core2i7_dfa_post_advance_cycle (void)
26325 {
26326 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
26327
26328 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26329
26330 data->ifetch_block_len = 0;
26331 data->ifetch_block_n_insns = 0;
26332 }
26333
26334 static int min_insn_size (rtx);
26335
26336 /* Filter out insns from ready_try that the core will not be able to issue
26337 on current cycle due to decoder. */
26338 static void
26339 core2i7_first_cycle_multipass_filter_ready_try
26340 (const_ix86_first_cycle_multipass_data_t data,
26341 char *ready_try, int n_ready, bool first_cycle_insn_p)
26342 {
26343 while (n_ready--)
26344 {
26345 rtx insn;
26346 int insn_size;
26347
26348 if (ready_try[n_ready])
26349 continue;
26350
26351 insn = get_ready_element (n_ready);
26352 insn_size = min_insn_size (insn);
26353
26354 if (/* If this is a too long an insn for a secondary decoder ... */
26355 (!first_cycle_insn_p
26356 && insn_size > core2i7_secondary_decoder_max_insn_size)
26357 /* ... or it would not fit into the ifetch block ... */
26358 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
26359 /* ... or the decoder is full already ... */
26360 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
26361 /* ... mask the insn out. */
26362 {
26363 ready_try[n_ready] = 1;
26364
26365 if (data->ready_try_change)
26366 bitmap_set_bit (data->ready_try_change, n_ready);
26367 }
26368 }
26369 }
26370
26371 /* Prepare for a new round of multipass lookahead scheduling. */
26372 static void
26373 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
26374 bool first_cycle_insn_p)
26375 {
26376 ix86_first_cycle_multipass_data_t data
26377 = (ix86_first_cycle_multipass_data_t) _data;
26378 const_ix86_first_cycle_multipass_data_t prev_data
26379 = ix86_first_cycle_multipass_data;
26380
26381 /* Restore the state from the end of the previous round. */
26382 data->ifetch_block_len = prev_data->ifetch_block_len;
26383 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
26384
26385 /* Filter instructions that cannot be issued on current cycle due to
26386 decoder restrictions. */
26387 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26388 first_cycle_insn_p);
26389 }
26390
26391 /* INSN is being issued in current solution. Account for its impact on
26392 the decoder model. */
26393 static void
26394 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
26395 rtx insn, const void *_prev_data)
26396 {
26397 ix86_first_cycle_multipass_data_t data
26398 = (ix86_first_cycle_multipass_data_t) _data;
26399 const_ix86_first_cycle_multipass_data_t prev_data
26400 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
26401
26402 int insn_size = min_insn_size (insn);
26403
26404 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
26405 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
26406 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
26407 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26408
26409 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
26410 if (!data->ready_try_change)
26411 {
26412 data->ready_try_change = sbitmap_alloc (n_ready);
26413 data->ready_try_change_size = n_ready;
26414 }
26415 else if (data->ready_try_change_size < n_ready)
26416 {
26417 data->ready_try_change = sbitmap_resize (data->ready_try_change,
26418 n_ready, 0);
26419 data->ready_try_change_size = n_ready;
26420 }
26421 bitmap_clear (data->ready_try_change);
26422
26423 /* Filter out insns from ready_try that the core will not be able to issue
26424 on current cycle due to decoder. */
26425 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26426 false);
26427 }
26428
26429 /* Revert the effect on ready_try. */
26430 static void
26431 core2i7_first_cycle_multipass_backtrack (const void *_data,
26432 char *ready_try,
26433 int n_ready ATTRIBUTE_UNUSED)
26434 {
26435 const_ix86_first_cycle_multipass_data_t data
26436 = (const_ix86_first_cycle_multipass_data_t) _data;
26437 unsigned int i = 0;
26438 sbitmap_iterator sbi;
26439
26440 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
26441 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
26442 {
26443 ready_try[i] = 0;
26444 }
26445 }
26446
26447 /* Save the result of multipass lookahead scheduling for the next round. */
26448 static void
26449 core2i7_first_cycle_multipass_end (const void *_data)
26450 {
26451 const_ix86_first_cycle_multipass_data_t data
26452 = (const_ix86_first_cycle_multipass_data_t) _data;
26453 ix86_first_cycle_multipass_data_t next_data
26454 = ix86_first_cycle_multipass_data;
26455
26456 if (data != NULL)
26457 {
26458 next_data->ifetch_block_len = data->ifetch_block_len;
26459 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
26460 }
26461 }
26462
26463 /* Deallocate target data. */
26464 static void
26465 core2i7_first_cycle_multipass_fini (void *_data)
26466 {
26467 ix86_first_cycle_multipass_data_t data
26468 = (ix86_first_cycle_multipass_data_t) _data;
26469
26470 if (data->ready_try_change)
26471 {
26472 sbitmap_free (data->ready_try_change);
26473 data->ready_try_change = NULL;
26474 data->ready_try_change_size = 0;
26475 }
26476 }
26477
26478 /* Prepare for scheduling pass. */
26479 static void
26480 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
26481 int verbose ATTRIBUTE_UNUSED,
26482 int max_uid ATTRIBUTE_UNUSED)
26483 {
26484 /* Install scheduling hooks for current CPU. Some of these hooks are used
26485 in time-critical parts of the scheduler, so we only set them up when
26486 they are actually used. */
26487 switch (ix86_tune)
26488 {
26489 case PROCESSOR_CORE2:
26490 case PROCESSOR_NEHALEM:
26491 case PROCESSOR_SANDYBRIDGE:
26492 case PROCESSOR_HASWELL:
26493 /* Do not perform multipass scheduling for pre-reload schedule
26494 to save compile time. */
26495 if (reload_completed)
26496 {
26497 targetm.sched.dfa_post_advance_cycle
26498 = core2i7_dfa_post_advance_cycle;
26499 targetm.sched.first_cycle_multipass_init
26500 = core2i7_first_cycle_multipass_init;
26501 targetm.sched.first_cycle_multipass_begin
26502 = core2i7_first_cycle_multipass_begin;
26503 targetm.sched.first_cycle_multipass_issue
26504 = core2i7_first_cycle_multipass_issue;
26505 targetm.sched.first_cycle_multipass_backtrack
26506 = core2i7_first_cycle_multipass_backtrack;
26507 targetm.sched.first_cycle_multipass_end
26508 = core2i7_first_cycle_multipass_end;
26509 targetm.sched.first_cycle_multipass_fini
26510 = core2i7_first_cycle_multipass_fini;
26511
26512 /* Set decoder parameters. */
26513 core2i7_secondary_decoder_max_insn_size = 8;
26514 core2i7_ifetch_block_size = 16;
26515 core2i7_ifetch_block_max_insns = 6;
26516 break;
26517 }
26518 /* ... Fall through ... */
26519 default:
26520 targetm.sched.dfa_post_advance_cycle = NULL;
26521 targetm.sched.first_cycle_multipass_init = NULL;
26522 targetm.sched.first_cycle_multipass_begin = NULL;
26523 targetm.sched.first_cycle_multipass_issue = NULL;
26524 targetm.sched.first_cycle_multipass_backtrack = NULL;
26525 targetm.sched.first_cycle_multipass_end = NULL;
26526 targetm.sched.first_cycle_multipass_fini = NULL;
26527 break;
26528 }
26529 }
26530
26531 \f
26532 /* Compute the alignment given to a constant that is being placed in memory.
26533 EXP is the constant and ALIGN is the alignment that the object would
26534 ordinarily have.
26535 The value of this function is used instead of that alignment to align
26536 the object. */
26537
26538 int
26539 ix86_constant_alignment (tree exp, int align)
26540 {
26541 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
26542 || TREE_CODE (exp) == INTEGER_CST)
26543 {
26544 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
26545 return 64;
26546 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
26547 return 128;
26548 }
26549 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
26550 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
26551 return BITS_PER_WORD;
26552
26553 return align;
26554 }
26555
26556 /* Compute the alignment for a static variable.
26557 TYPE is the data type, and ALIGN is the alignment that
26558 the object would ordinarily have. The value of this function is used
26559 instead of that alignment to align the object. */
26560
26561 int
26562 ix86_data_alignment (tree type, int align, bool opt)
26563 {
26564 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
26565 for symbols from other compilation units or symbols that don't need
26566 to bind locally. In order to preserve some ABI compatibility with
26567 those compilers, ensure we don't decrease alignment from what we
26568 used to assume. */
26569
26570 int max_align_compat
26571 = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
26572
26573 /* A data structure, equal or greater than the size of a cache line
26574 (64 bytes in the Pentium 4 and other recent Intel processors, including
26575 processors based on Intel Core microarchitecture) should be aligned
26576 so that its base address is a multiple of a cache line size. */
26577
26578 int max_align
26579 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
26580
26581 if (max_align < BITS_PER_WORD)
26582 max_align = BITS_PER_WORD;
26583
26584 if (opt
26585 && AGGREGATE_TYPE_P (type)
26586 && TYPE_SIZE (type)
26587 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
26588 {
26589 if ((TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align_compat
26590 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
26591 && align < max_align_compat)
26592 align = max_align_compat;
26593 if ((TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
26594 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
26595 && align < max_align)
26596 align = max_align;
26597 }
26598
26599 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26600 to 16byte boundary. */
26601 if (TARGET_64BIT)
26602 {
26603 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
26604 && TYPE_SIZE (type)
26605 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26606 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
26607 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
26608 return 128;
26609 }
26610
26611 if (!opt)
26612 return align;
26613
26614 if (TREE_CODE (type) == ARRAY_TYPE)
26615 {
26616 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26617 return 64;
26618 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26619 return 128;
26620 }
26621 else if (TREE_CODE (type) == COMPLEX_TYPE)
26622 {
26623
26624 if (TYPE_MODE (type) == DCmode && align < 64)
26625 return 64;
26626 if ((TYPE_MODE (type) == XCmode
26627 || TYPE_MODE (type) == TCmode) && align < 128)
26628 return 128;
26629 }
26630 else if ((TREE_CODE (type) == RECORD_TYPE
26631 || TREE_CODE (type) == UNION_TYPE
26632 || TREE_CODE (type) == QUAL_UNION_TYPE)
26633 && TYPE_FIELDS (type))
26634 {
26635 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26636 return 64;
26637 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26638 return 128;
26639 }
26640 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26641 || TREE_CODE (type) == INTEGER_TYPE)
26642 {
26643 if (TYPE_MODE (type) == DFmode && align < 64)
26644 return 64;
26645 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26646 return 128;
26647 }
26648
26649 return align;
26650 }
26651
26652 /* Compute the alignment for a local variable or a stack slot. EXP is
26653 the data type or decl itself, MODE is the widest mode available and
26654 ALIGN is the alignment that the object would ordinarily have. The
26655 value of this macro is used instead of that alignment to align the
26656 object. */
26657
26658 unsigned int
26659 ix86_local_alignment (tree exp, enum machine_mode mode,
26660 unsigned int align)
26661 {
26662 tree type, decl;
26663
26664 if (exp && DECL_P (exp))
26665 {
26666 type = TREE_TYPE (exp);
26667 decl = exp;
26668 }
26669 else
26670 {
26671 type = exp;
26672 decl = NULL;
26673 }
26674
26675 /* Don't do dynamic stack realignment for long long objects with
26676 -mpreferred-stack-boundary=2. */
26677 if (!TARGET_64BIT
26678 && align == 64
26679 && ix86_preferred_stack_boundary < 64
26680 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
26681 && (!type || !TYPE_USER_ALIGN (type))
26682 && (!decl || !DECL_USER_ALIGN (decl)))
26683 align = 32;
26684
26685 /* If TYPE is NULL, we are allocating a stack slot for caller-save
26686 register in MODE. We will return the largest alignment of XF
26687 and DF. */
26688 if (!type)
26689 {
26690 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
26691 align = GET_MODE_ALIGNMENT (DFmode);
26692 return align;
26693 }
26694
26695 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26696 to 16byte boundary. Exact wording is:
26697
26698 An array uses the same alignment as its elements, except that a local or
26699 global array variable of length at least 16 bytes or
26700 a C99 variable-length array variable always has alignment of at least 16 bytes.
26701
26702 This was added to allow use of aligned SSE instructions at arrays. This
26703 rule is meant for static storage (where compiler can not do the analysis
26704 by itself). We follow it for automatic variables only when convenient.
26705 We fully control everything in the function compiled and functions from
26706 other unit can not rely on the alignment.
26707
26708 Exclude va_list type. It is the common case of local array where
26709 we can not benefit from the alignment.
26710
26711 TODO: Probably one should optimize for size only when var is not escaping. */
26712 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
26713 && TARGET_SSE)
26714 {
26715 if (AGGREGATE_TYPE_P (type)
26716 && (va_list_type_node == NULL_TREE
26717 || (TYPE_MAIN_VARIANT (type)
26718 != TYPE_MAIN_VARIANT (va_list_type_node)))
26719 && TYPE_SIZE (type)
26720 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26721 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
26722 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
26723 return 128;
26724 }
26725 if (TREE_CODE (type) == ARRAY_TYPE)
26726 {
26727 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26728 return 64;
26729 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26730 return 128;
26731 }
26732 else if (TREE_CODE (type) == COMPLEX_TYPE)
26733 {
26734 if (TYPE_MODE (type) == DCmode && align < 64)
26735 return 64;
26736 if ((TYPE_MODE (type) == XCmode
26737 || TYPE_MODE (type) == TCmode) && align < 128)
26738 return 128;
26739 }
26740 else if ((TREE_CODE (type) == RECORD_TYPE
26741 || TREE_CODE (type) == UNION_TYPE
26742 || TREE_CODE (type) == QUAL_UNION_TYPE)
26743 && TYPE_FIELDS (type))
26744 {
26745 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26746 return 64;
26747 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26748 return 128;
26749 }
26750 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26751 || TREE_CODE (type) == INTEGER_TYPE)
26752 {
26753
26754 if (TYPE_MODE (type) == DFmode && align < 64)
26755 return 64;
26756 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26757 return 128;
26758 }
26759 return align;
26760 }
26761
26762 /* Compute the minimum required alignment for dynamic stack realignment
26763 purposes for a local variable, parameter or a stack slot. EXP is
26764 the data type or decl itself, MODE is its mode and ALIGN is the
26765 alignment that the object would ordinarily have. */
26766
26767 unsigned int
26768 ix86_minimum_alignment (tree exp, enum machine_mode mode,
26769 unsigned int align)
26770 {
26771 tree type, decl;
26772
26773 if (exp && DECL_P (exp))
26774 {
26775 type = TREE_TYPE (exp);
26776 decl = exp;
26777 }
26778 else
26779 {
26780 type = exp;
26781 decl = NULL;
26782 }
26783
26784 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
26785 return align;
26786
26787 /* Don't do dynamic stack realignment for long long objects with
26788 -mpreferred-stack-boundary=2. */
26789 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
26790 && (!type || !TYPE_USER_ALIGN (type))
26791 && (!decl || !DECL_USER_ALIGN (decl)))
26792 return 32;
26793
26794 return align;
26795 }
26796 \f
26797 /* Find a location for the static chain incoming to a nested function.
26798 This is a register, unless all free registers are used by arguments. */
26799
26800 static rtx
26801 ix86_static_chain (const_tree fndecl, bool incoming_p)
26802 {
26803 unsigned regno;
26804
26805 if (!DECL_STATIC_CHAIN (fndecl))
26806 return NULL;
26807
26808 if (TARGET_64BIT)
26809 {
26810 /* We always use R10 in 64-bit mode. */
26811 regno = R10_REG;
26812 }
26813 else
26814 {
26815 tree fntype;
26816 unsigned int ccvt;
26817
26818 /* By default in 32-bit mode we use ECX to pass the static chain. */
26819 regno = CX_REG;
26820
26821 fntype = TREE_TYPE (fndecl);
26822 ccvt = ix86_get_callcvt (fntype);
26823 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
26824 {
26825 /* Fastcall functions use ecx/edx for arguments, which leaves
26826 us with EAX for the static chain.
26827 Thiscall functions use ecx for arguments, which also
26828 leaves us with EAX for the static chain. */
26829 regno = AX_REG;
26830 }
26831 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
26832 {
26833 /* Thiscall functions use ecx for arguments, which leaves
26834 us with EAX and EDX for the static chain.
26835 We are using for abi-compatibility EAX. */
26836 regno = AX_REG;
26837 }
26838 else if (ix86_function_regparm (fntype, fndecl) == 3)
26839 {
26840 /* For regparm 3, we have no free call-clobbered registers in
26841 which to store the static chain. In order to implement this,
26842 we have the trampoline push the static chain to the stack.
26843 However, we can't push a value below the return address when
26844 we call the nested function directly, so we have to use an
26845 alternate entry point. For this we use ESI, and have the
26846 alternate entry point push ESI, so that things appear the
26847 same once we're executing the nested function. */
26848 if (incoming_p)
26849 {
26850 if (fndecl == current_function_decl)
26851 ix86_static_chain_on_stack = true;
26852 return gen_frame_mem (SImode,
26853 plus_constant (Pmode,
26854 arg_pointer_rtx, -8));
26855 }
26856 regno = SI_REG;
26857 }
26858 }
26859
26860 return gen_rtx_REG (Pmode, regno);
26861 }
26862
26863 /* Emit RTL insns to initialize the variable parts of a trampoline.
26864 FNDECL is the decl of the target address; M_TRAMP is a MEM for
26865 the trampoline, and CHAIN_VALUE is an RTX for the static chain
26866 to be passed to the target function. */
26867
26868 static void
26869 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
26870 {
26871 rtx mem, fnaddr;
26872 int opcode;
26873 int offset = 0;
26874
26875 fnaddr = XEXP (DECL_RTL (fndecl), 0);
26876
26877 if (TARGET_64BIT)
26878 {
26879 int size;
26880
26881 /* Load the function address to r11. Try to load address using
26882 the shorter movl instead of movabs. We may want to support
26883 movq for kernel mode, but kernel does not use trampolines at
26884 the moment. FNADDR is a 32bit address and may not be in
26885 DImode when ptr_mode == SImode. Always use movl in this
26886 case. */
26887 if (ptr_mode == SImode
26888 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
26889 {
26890 fnaddr = copy_addr_to_reg (fnaddr);
26891
26892 mem = adjust_address (m_tramp, HImode, offset);
26893 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
26894
26895 mem = adjust_address (m_tramp, SImode, offset + 2);
26896 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
26897 offset += 6;
26898 }
26899 else
26900 {
26901 mem = adjust_address (m_tramp, HImode, offset);
26902 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
26903
26904 mem = adjust_address (m_tramp, DImode, offset + 2);
26905 emit_move_insn (mem, fnaddr);
26906 offset += 10;
26907 }
26908
26909 /* Load static chain using movabs to r10. Use the shorter movl
26910 instead of movabs when ptr_mode == SImode. */
26911 if (ptr_mode == SImode)
26912 {
26913 opcode = 0xba41;
26914 size = 6;
26915 }
26916 else
26917 {
26918 opcode = 0xba49;
26919 size = 10;
26920 }
26921
26922 mem = adjust_address (m_tramp, HImode, offset);
26923 emit_move_insn (mem, gen_int_mode (opcode, HImode));
26924
26925 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
26926 emit_move_insn (mem, chain_value);
26927 offset += size;
26928
26929 /* Jump to r11; the last (unused) byte is a nop, only there to
26930 pad the write out to a single 32-bit store. */
26931 mem = adjust_address (m_tramp, SImode, offset);
26932 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
26933 offset += 4;
26934 }
26935 else
26936 {
26937 rtx disp, chain;
26938
26939 /* Depending on the static chain location, either load a register
26940 with a constant, or push the constant to the stack. All of the
26941 instructions are the same size. */
26942 chain = ix86_static_chain (fndecl, true);
26943 if (REG_P (chain))
26944 {
26945 switch (REGNO (chain))
26946 {
26947 case AX_REG:
26948 opcode = 0xb8; break;
26949 case CX_REG:
26950 opcode = 0xb9; break;
26951 default:
26952 gcc_unreachable ();
26953 }
26954 }
26955 else
26956 opcode = 0x68;
26957
26958 mem = adjust_address (m_tramp, QImode, offset);
26959 emit_move_insn (mem, gen_int_mode (opcode, QImode));
26960
26961 mem = adjust_address (m_tramp, SImode, offset + 1);
26962 emit_move_insn (mem, chain_value);
26963 offset += 5;
26964
26965 mem = adjust_address (m_tramp, QImode, offset);
26966 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
26967
26968 mem = adjust_address (m_tramp, SImode, offset + 1);
26969
26970 /* Compute offset from the end of the jmp to the target function.
26971 In the case in which the trampoline stores the static chain on
26972 the stack, we need to skip the first insn which pushes the
26973 (call-saved) register static chain; this push is 1 byte. */
26974 offset += 5;
26975 disp = expand_binop (SImode, sub_optab, fnaddr,
26976 plus_constant (Pmode, XEXP (m_tramp, 0),
26977 offset - (MEM_P (chain) ? 1 : 0)),
26978 NULL_RTX, 1, OPTAB_DIRECT);
26979 emit_move_insn (mem, disp);
26980 }
26981
26982 gcc_assert (offset <= TRAMPOLINE_SIZE);
26983
26984 #ifdef HAVE_ENABLE_EXECUTE_STACK
26985 #ifdef CHECK_EXECUTE_STACK_ENABLED
26986 if (CHECK_EXECUTE_STACK_ENABLED)
26987 #endif
26988 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
26989 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
26990 #endif
26991 }
26992 \f
26993 /* The following file contains several enumerations and data structures
26994 built from the definitions in i386-builtin-types.def. */
26995
26996 #include "i386-builtin-types.inc"
26997
26998 /* Table for the ix86 builtin non-function types. */
26999 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27000
27001 /* Retrieve an element from the above table, building some of
27002 the types lazily. */
27003
27004 static tree
27005 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27006 {
27007 unsigned int index;
27008 tree type, itype;
27009
27010 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27011
27012 type = ix86_builtin_type_tab[(int) tcode];
27013 if (type != NULL)
27014 return type;
27015
27016 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27017 if (tcode <= IX86_BT_LAST_VECT)
27018 {
27019 enum machine_mode mode;
27020
27021 index = tcode - IX86_BT_LAST_PRIM - 1;
27022 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27023 mode = ix86_builtin_type_vect_mode[index];
27024
27025 type = build_vector_type_for_mode (itype, mode);
27026 }
27027 else
27028 {
27029 int quals;
27030
27031 index = tcode - IX86_BT_LAST_VECT - 1;
27032 if (tcode <= IX86_BT_LAST_PTR)
27033 quals = TYPE_UNQUALIFIED;
27034 else
27035 quals = TYPE_QUAL_CONST;
27036
27037 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27038 if (quals != TYPE_UNQUALIFIED)
27039 itype = build_qualified_type (itype, quals);
27040
27041 type = build_pointer_type (itype);
27042 }
27043
27044 ix86_builtin_type_tab[(int) tcode] = type;
27045 return type;
27046 }
27047
27048 /* Table for the ix86 builtin function types. */
27049 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27050
27051 /* Retrieve an element from the above table, building some of
27052 the types lazily. */
27053
27054 static tree
27055 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27056 {
27057 tree type;
27058
27059 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27060
27061 type = ix86_builtin_func_type_tab[(int) tcode];
27062 if (type != NULL)
27063 return type;
27064
27065 if (tcode <= IX86_BT_LAST_FUNC)
27066 {
27067 unsigned start = ix86_builtin_func_start[(int) tcode];
27068 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27069 tree rtype, atype, args = void_list_node;
27070 unsigned i;
27071
27072 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27073 for (i = after - 1; i > start; --i)
27074 {
27075 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27076 args = tree_cons (NULL, atype, args);
27077 }
27078
27079 type = build_function_type (rtype, args);
27080 }
27081 else
27082 {
27083 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27084 enum ix86_builtin_func_type icode;
27085
27086 icode = ix86_builtin_func_alias_base[index];
27087 type = ix86_get_builtin_func_type (icode);
27088 }
27089
27090 ix86_builtin_func_type_tab[(int) tcode] = type;
27091 return type;
27092 }
27093
27094
27095 /* Codes for all the SSE/MMX builtins. */
27096 enum ix86_builtins
27097 {
27098 IX86_BUILTIN_ADDPS,
27099 IX86_BUILTIN_ADDSS,
27100 IX86_BUILTIN_DIVPS,
27101 IX86_BUILTIN_DIVSS,
27102 IX86_BUILTIN_MULPS,
27103 IX86_BUILTIN_MULSS,
27104 IX86_BUILTIN_SUBPS,
27105 IX86_BUILTIN_SUBSS,
27106
27107 IX86_BUILTIN_CMPEQPS,
27108 IX86_BUILTIN_CMPLTPS,
27109 IX86_BUILTIN_CMPLEPS,
27110 IX86_BUILTIN_CMPGTPS,
27111 IX86_BUILTIN_CMPGEPS,
27112 IX86_BUILTIN_CMPNEQPS,
27113 IX86_BUILTIN_CMPNLTPS,
27114 IX86_BUILTIN_CMPNLEPS,
27115 IX86_BUILTIN_CMPNGTPS,
27116 IX86_BUILTIN_CMPNGEPS,
27117 IX86_BUILTIN_CMPORDPS,
27118 IX86_BUILTIN_CMPUNORDPS,
27119 IX86_BUILTIN_CMPEQSS,
27120 IX86_BUILTIN_CMPLTSS,
27121 IX86_BUILTIN_CMPLESS,
27122 IX86_BUILTIN_CMPNEQSS,
27123 IX86_BUILTIN_CMPNLTSS,
27124 IX86_BUILTIN_CMPNLESS,
27125 IX86_BUILTIN_CMPORDSS,
27126 IX86_BUILTIN_CMPUNORDSS,
27127
27128 IX86_BUILTIN_COMIEQSS,
27129 IX86_BUILTIN_COMILTSS,
27130 IX86_BUILTIN_COMILESS,
27131 IX86_BUILTIN_COMIGTSS,
27132 IX86_BUILTIN_COMIGESS,
27133 IX86_BUILTIN_COMINEQSS,
27134 IX86_BUILTIN_UCOMIEQSS,
27135 IX86_BUILTIN_UCOMILTSS,
27136 IX86_BUILTIN_UCOMILESS,
27137 IX86_BUILTIN_UCOMIGTSS,
27138 IX86_BUILTIN_UCOMIGESS,
27139 IX86_BUILTIN_UCOMINEQSS,
27140
27141 IX86_BUILTIN_CVTPI2PS,
27142 IX86_BUILTIN_CVTPS2PI,
27143 IX86_BUILTIN_CVTSI2SS,
27144 IX86_BUILTIN_CVTSI642SS,
27145 IX86_BUILTIN_CVTSS2SI,
27146 IX86_BUILTIN_CVTSS2SI64,
27147 IX86_BUILTIN_CVTTPS2PI,
27148 IX86_BUILTIN_CVTTSS2SI,
27149 IX86_BUILTIN_CVTTSS2SI64,
27150
27151 IX86_BUILTIN_MAXPS,
27152 IX86_BUILTIN_MAXSS,
27153 IX86_BUILTIN_MINPS,
27154 IX86_BUILTIN_MINSS,
27155
27156 IX86_BUILTIN_LOADUPS,
27157 IX86_BUILTIN_STOREUPS,
27158 IX86_BUILTIN_MOVSS,
27159
27160 IX86_BUILTIN_MOVHLPS,
27161 IX86_BUILTIN_MOVLHPS,
27162 IX86_BUILTIN_LOADHPS,
27163 IX86_BUILTIN_LOADLPS,
27164 IX86_BUILTIN_STOREHPS,
27165 IX86_BUILTIN_STORELPS,
27166
27167 IX86_BUILTIN_MASKMOVQ,
27168 IX86_BUILTIN_MOVMSKPS,
27169 IX86_BUILTIN_PMOVMSKB,
27170
27171 IX86_BUILTIN_MOVNTPS,
27172 IX86_BUILTIN_MOVNTQ,
27173
27174 IX86_BUILTIN_LOADDQU,
27175 IX86_BUILTIN_STOREDQU,
27176
27177 IX86_BUILTIN_PACKSSWB,
27178 IX86_BUILTIN_PACKSSDW,
27179 IX86_BUILTIN_PACKUSWB,
27180
27181 IX86_BUILTIN_PADDB,
27182 IX86_BUILTIN_PADDW,
27183 IX86_BUILTIN_PADDD,
27184 IX86_BUILTIN_PADDQ,
27185 IX86_BUILTIN_PADDSB,
27186 IX86_BUILTIN_PADDSW,
27187 IX86_BUILTIN_PADDUSB,
27188 IX86_BUILTIN_PADDUSW,
27189 IX86_BUILTIN_PSUBB,
27190 IX86_BUILTIN_PSUBW,
27191 IX86_BUILTIN_PSUBD,
27192 IX86_BUILTIN_PSUBQ,
27193 IX86_BUILTIN_PSUBSB,
27194 IX86_BUILTIN_PSUBSW,
27195 IX86_BUILTIN_PSUBUSB,
27196 IX86_BUILTIN_PSUBUSW,
27197
27198 IX86_BUILTIN_PAND,
27199 IX86_BUILTIN_PANDN,
27200 IX86_BUILTIN_POR,
27201 IX86_BUILTIN_PXOR,
27202
27203 IX86_BUILTIN_PAVGB,
27204 IX86_BUILTIN_PAVGW,
27205
27206 IX86_BUILTIN_PCMPEQB,
27207 IX86_BUILTIN_PCMPEQW,
27208 IX86_BUILTIN_PCMPEQD,
27209 IX86_BUILTIN_PCMPGTB,
27210 IX86_BUILTIN_PCMPGTW,
27211 IX86_BUILTIN_PCMPGTD,
27212
27213 IX86_BUILTIN_PMADDWD,
27214
27215 IX86_BUILTIN_PMAXSW,
27216 IX86_BUILTIN_PMAXUB,
27217 IX86_BUILTIN_PMINSW,
27218 IX86_BUILTIN_PMINUB,
27219
27220 IX86_BUILTIN_PMULHUW,
27221 IX86_BUILTIN_PMULHW,
27222 IX86_BUILTIN_PMULLW,
27223
27224 IX86_BUILTIN_PSADBW,
27225 IX86_BUILTIN_PSHUFW,
27226
27227 IX86_BUILTIN_PSLLW,
27228 IX86_BUILTIN_PSLLD,
27229 IX86_BUILTIN_PSLLQ,
27230 IX86_BUILTIN_PSRAW,
27231 IX86_BUILTIN_PSRAD,
27232 IX86_BUILTIN_PSRLW,
27233 IX86_BUILTIN_PSRLD,
27234 IX86_BUILTIN_PSRLQ,
27235 IX86_BUILTIN_PSLLWI,
27236 IX86_BUILTIN_PSLLDI,
27237 IX86_BUILTIN_PSLLQI,
27238 IX86_BUILTIN_PSRAWI,
27239 IX86_BUILTIN_PSRADI,
27240 IX86_BUILTIN_PSRLWI,
27241 IX86_BUILTIN_PSRLDI,
27242 IX86_BUILTIN_PSRLQI,
27243
27244 IX86_BUILTIN_PUNPCKHBW,
27245 IX86_BUILTIN_PUNPCKHWD,
27246 IX86_BUILTIN_PUNPCKHDQ,
27247 IX86_BUILTIN_PUNPCKLBW,
27248 IX86_BUILTIN_PUNPCKLWD,
27249 IX86_BUILTIN_PUNPCKLDQ,
27250
27251 IX86_BUILTIN_SHUFPS,
27252
27253 IX86_BUILTIN_RCPPS,
27254 IX86_BUILTIN_RCPSS,
27255 IX86_BUILTIN_RSQRTPS,
27256 IX86_BUILTIN_RSQRTPS_NR,
27257 IX86_BUILTIN_RSQRTSS,
27258 IX86_BUILTIN_RSQRTF,
27259 IX86_BUILTIN_SQRTPS,
27260 IX86_BUILTIN_SQRTPS_NR,
27261 IX86_BUILTIN_SQRTSS,
27262
27263 IX86_BUILTIN_UNPCKHPS,
27264 IX86_BUILTIN_UNPCKLPS,
27265
27266 IX86_BUILTIN_ANDPS,
27267 IX86_BUILTIN_ANDNPS,
27268 IX86_BUILTIN_ORPS,
27269 IX86_BUILTIN_XORPS,
27270
27271 IX86_BUILTIN_EMMS,
27272 IX86_BUILTIN_LDMXCSR,
27273 IX86_BUILTIN_STMXCSR,
27274 IX86_BUILTIN_SFENCE,
27275
27276 IX86_BUILTIN_FXSAVE,
27277 IX86_BUILTIN_FXRSTOR,
27278 IX86_BUILTIN_FXSAVE64,
27279 IX86_BUILTIN_FXRSTOR64,
27280
27281 IX86_BUILTIN_XSAVE,
27282 IX86_BUILTIN_XRSTOR,
27283 IX86_BUILTIN_XSAVE64,
27284 IX86_BUILTIN_XRSTOR64,
27285
27286 IX86_BUILTIN_XSAVEOPT,
27287 IX86_BUILTIN_XSAVEOPT64,
27288
27289 /* 3DNow! Original */
27290 IX86_BUILTIN_FEMMS,
27291 IX86_BUILTIN_PAVGUSB,
27292 IX86_BUILTIN_PF2ID,
27293 IX86_BUILTIN_PFACC,
27294 IX86_BUILTIN_PFADD,
27295 IX86_BUILTIN_PFCMPEQ,
27296 IX86_BUILTIN_PFCMPGE,
27297 IX86_BUILTIN_PFCMPGT,
27298 IX86_BUILTIN_PFMAX,
27299 IX86_BUILTIN_PFMIN,
27300 IX86_BUILTIN_PFMUL,
27301 IX86_BUILTIN_PFRCP,
27302 IX86_BUILTIN_PFRCPIT1,
27303 IX86_BUILTIN_PFRCPIT2,
27304 IX86_BUILTIN_PFRSQIT1,
27305 IX86_BUILTIN_PFRSQRT,
27306 IX86_BUILTIN_PFSUB,
27307 IX86_BUILTIN_PFSUBR,
27308 IX86_BUILTIN_PI2FD,
27309 IX86_BUILTIN_PMULHRW,
27310
27311 /* 3DNow! Athlon Extensions */
27312 IX86_BUILTIN_PF2IW,
27313 IX86_BUILTIN_PFNACC,
27314 IX86_BUILTIN_PFPNACC,
27315 IX86_BUILTIN_PI2FW,
27316 IX86_BUILTIN_PSWAPDSI,
27317 IX86_BUILTIN_PSWAPDSF,
27318
27319 /* SSE2 */
27320 IX86_BUILTIN_ADDPD,
27321 IX86_BUILTIN_ADDSD,
27322 IX86_BUILTIN_DIVPD,
27323 IX86_BUILTIN_DIVSD,
27324 IX86_BUILTIN_MULPD,
27325 IX86_BUILTIN_MULSD,
27326 IX86_BUILTIN_SUBPD,
27327 IX86_BUILTIN_SUBSD,
27328
27329 IX86_BUILTIN_CMPEQPD,
27330 IX86_BUILTIN_CMPLTPD,
27331 IX86_BUILTIN_CMPLEPD,
27332 IX86_BUILTIN_CMPGTPD,
27333 IX86_BUILTIN_CMPGEPD,
27334 IX86_BUILTIN_CMPNEQPD,
27335 IX86_BUILTIN_CMPNLTPD,
27336 IX86_BUILTIN_CMPNLEPD,
27337 IX86_BUILTIN_CMPNGTPD,
27338 IX86_BUILTIN_CMPNGEPD,
27339 IX86_BUILTIN_CMPORDPD,
27340 IX86_BUILTIN_CMPUNORDPD,
27341 IX86_BUILTIN_CMPEQSD,
27342 IX86_BUILTIN_CMPLTSD,
27343 IX86_BUILTIN_CMPLESD,
27344 IX86_BUILTIN_CMPNEQSD,
27345 IX86_BUILTIN_CMPNLTSD,
27346 IX86_BUILTIN_CMPNLESD,
27347 IX86_BUILTIN_CMPORDSD,
27348 IX86_BUILTIN_CMPUNORDSD,
27349
27350 IX86_BUILTIN_COMIEQSD,
27351 IX86_BUILTIN_COMILTSD,
27352 IX86_BUILTIN_COMILESD,
27353 IX86_BUILTIN_COMIGTSD,
27354 IX86_BUILTIN_COMIGESD,
27355 IX86_BUILTIN_COMINEQSD,
27356 IX86_BUILTIN_UCOMIEQSD,
27357 IX86_BUILTIN_UCOMILTSD,
27358 IX86_BUILTIN_UCOMILESD,
27359 IX86_BUILTIN_UCOMIGTSD,
27360 IX86_BUILTIN_UCOMIGESD,
27361 IX86_BUILTIN_UCOMINEQSD,
27362
27363 IX86_BUILTIN_MAXPD,
27364 IX86_BUILTIN_MAXSD,
27365 IX86_BUILTIN_MINPD,
27366 IX86_BUILTIN_MINSD,
27367
27368 IX86_BUILTIN_ANDPD,
27369 IX86_BUILTIN_ANDNPD,
27370 IX86_BUILTIN_ORPD,
27371 IX86_BUILTIN_XORPD,
27372
27373 IX86_BUILTIN_SQRTPD,
27374 IX86_BUILTIN_SQRTSD,
27375
27376 IX86_BUILTIN_UNPCKHPD,
27377 IX86_BUILTIN_UNPCKLPD,
27378
27379 IX86_BUILTIN_SHUFPD,
27380
27381 IX86_BUILTIN_LOADUPD,
27382 IX86_BUILTIN_STOREUPD,
27383 IX86_BUILTIN_MOVSD,
27384
27385 IX86_BUILTIN_LOADHPD,
27386 IX86_BUILTIN_LOADLPD,
27387
27388 IX86_BUILTIN_CVTDQ2PD,
27389 IX86_BUILTIN_CVTDQ2PS,
27390
27391 IX86_BUILTIN_CVTPD2DQ,
27392 IX86_BUILTIN_CVTPD2PI,
27393 IX86_BUILTIN_CVTPD2PS,
27394 IX86_BUILTIN_CVTTPD2DQ,
27395 IX86_BUILTIN_CVTTPD2PI,
27396
27397 IX86_BUILTIN_CVTPI2PD,
27398 IX86_BUILTIN_CVTSI2SD,
27399 IX86_BUILTIN_CVTSI642SD,
27400
27401 IX86_BUILTIN_CVTSD2SI,
27402 IX86_BUILTIN_CVTSD2SI64,
27403 IX86_BUILTIN_CVTSD2SS,
27404 IX86_BUILTIN_CVTSS2SD,
27405 IX86_BUILTIN_CVTTSD2SI,
27406 IX86_BUILTIN_CVTTSD2SI64,
27407
27408 IX86_BUILTIN_CVTPS2DQ,
27409 IX86_BUILTIN_CVTPS2PD,
27410 IX86_BUILTIN_CVTTPS2DQ,
27411
27412 IX86_BUILTIN_MOVNTI,
27413 IX86_BUILTIN_MOVNTI64,
27414 IX86_BUILTIN_MOVNTPD,
27415 IX86_BUILTIN_MOVNTDQ,
27416
27417 IX86_BUILTIN_MOVQ128,
27418
27419 /* SSE2 MMX */
27420 IX86_BUILTIN_MASKMOVDQU,
27421 IX86_BUILTIN_MOVMSKPD,
27422 IX86_BUILTIN_PMOVMSKB128,
27423
27424 IX86_BUILTIN_PACKSSWB128,
27425 IX86_BUILTIN_PACKSSDW128,
27426 IX86_BUILTIN_PACKUSWB128,
27427
27428 IX86_BUILTIN_PADDB128,
27429 IX86_BUILTIN_PADDW128,
27430 IX86_BUILTIN_PADDD128,
27431 IX86_BUILTIN_PADDQ128,
27432 IX86_BUILTIN_PADDSB128,
27433 IX86_BUILTIN_PADDSW128,
27434 IX86_BUILTIN_PADDUSB128,
27435 IX86_BUILTIN_PADDUSW128,
27436 IX86_BUILTIN_PSUBB128,
27437 IX86_BUILTIN_PSUBW128,
27438 IX86_BUILTIN_PSUBD128,
27439 IX86_BUILTIN_PSUBQ128,
27440 IX86_BUILTIN_PSUBSB128,
27441 IX86_BUILTIN_PSUBSW128,
27442 IX86_BUILTIN_PSUBUSB128,
27443 IX86_BUILTIN_PSUBUSW128,
27444
27445 IX86_BUILTIN_PAND128,
27446 IX86_BUILTIN_PANDN128,
27447 IX86_BUILTIN_POR128,
27448 IX86_BUILTIN_PXOR128,
27449
27450 IX86_BUILTIN_PAVGB128,
27451 IX86_BUILTIN_PAVGW128,
27452
27453 IX86_BUILTIN_PCMPEQB128,
27454 IX86_BUILTIN_PCMPEQW128,
27455 IX86_BUILTIN_PCMPEQD128,
27456 IX86_BUILTIN_PCMPGTB128,
27457 IX86_BUILTIN_PCMPGTW128,
27458 IX86_BUILTIN_PCMPGTD128,
27459
27460 IX86_BUILTIN_PMADDWD128,
27461
27462 IX86_BUILTIN_PMAXSW128,
27463 IX86_BUILTIN_PMAXUB128,
27464 IX86_BUILTIN_PMINSW128,
27465 IX86_BUILTIN_PMINUB128,
27466
27467 IX86_BUILTIN_PMULUDQ,
27468 IX86_BUILTIN_PMULUDQ128,
27469 IX86_BUILTIN_PMULHUW128,
27470 IX86_BUILTIN_PMULHW128,
27471 IX86_BUILTIN_PMULLW128,
27472
27473 IX86_BUILTIN_PSADBW128,
27474 IX86_BUILTIN_PSHUFHW,
27475 IX86_BUILTIN_PSHUFLW,
27476 IX86_BUILTIN_PSHUFD,
27477
27478 IX86_BUILTIN_PSLLDQI128,
27479 IX86_BUILTIN_PSLLWI128,
27480 IX86_BUILTIN_PSLLDI128,
27481 IX86_BUILTIN_PSLLQI128,
27482 IX86_BUILTIN_PSRAWI128,
27483 IX86_BUILTIN_PSRADI128,
27484 IX86_BUILTIN_PSRLDQI128,
27485 IX86_BUILTIN_PSRLWI128,
27486 IX86_BUILTIN_PSRLDI128,
27487 IX86_BUILTIN_PSRLQI128,
27488
27489 IX86_BUILTIN_PSLLDQ128,
27490 IX86_BUILTIN_PSLLW128,
27491 IX86_BUILTIN_PSLLD128,
27492 IX86_BUILTIN_PSLLQ128,
27493 IX86_BUILTIN_PSRAW128,
27494 IX86_BUILTIN_PSRAD128,
27495 IX86_BUILTIN_PSRLW128,
27496 IX86_BUILTIN_PSRLD128,
27497 IX86_BUILTIN_PSRLQ128,
27498
27499 IX86_BUILTIN_PUNPCKHBW128,
27500 IX86_BUILTIN_PUNPCKHWD128,
27501 IX86_BUILTIN_PUNPCKHDQ128,
27502 IX86_BUILTIN_PUNPCKHQDQ128,
27503 IX86_BUILTIN_PUNPCKLBW128,
27504 IX86_BUILTIN_PUNPCKLWD128,
27505 IX86_BUILTIN_PUNPCKLDQ128,
27506 IX86_BUILTIN_PUNPCKLQDQ128,
27507
27508 IX86_BUILTIN_CLFLUSH,
27509 IX86_BUILTIN_MFENCE,
27510 IX86_BUILTIN_LFENCE,
27511 IX86_BUILTIN_PAUSE,
27512
27513 IX86_BUILTIN_FNSTENV,
27514 IX86_BUILTIN_FLDENV,
27515 IX86_BUILTIN_FNSTSW,
27516 IX86_BUILTIN_FNCLEX,
27517
27518 IX86_BUILTIN_BSRSI,
27519 IX86_BUILTIN_BSRDI,
27520 IX86_BUILTIN_RDPMC,
27521 IX86_BUILTIN_RDTSC,
27522 IX86_BUILTIN_RDTSCP,
27523 IX86_BUILTIN_ROLQI,
27524 IX86_BUILTIN_ROLHI,
27525 IX86_BUILTIN_RORQI,
27526 IX86_BUILTIN_RORHI,
27527
27528 /* SSE3. */
27529 IX86_BUILTIN_ADDSUBPS,
27530 IX86_BUILTIN_HADDPS,
27531 IX86_BUILTIN_HSUBPS,
27532 IX86_BUILTIN_MOVSHDUP,
27533 IX86_BUILTIN_MOVSLDUP,
27534 IX86_BUILTIN_ADDSUBPD,
27535 IX86_BUILTIN_HADDPD,
27536 IX86_BUILTIN_HSUBPD,
27537 IX86_BUILTIN_LDDQU,
27538
27539 IX86_BUILTIN_MONITOR,
27540 IX86_BUILTIN_MWAIT,
27541
27542 /* SSSE3. */
27543 IX86_BUILTIN_PHADDW,
27544 IX86_BUILTIN_PHADDD,
27545 IX86_BUILTIN_PHADDSW,
27546 IX86_BUILTIN_PHSUBW,
27547 IX86_BUILTIN_PHSUBD,
27548 IX86_BUILTIN_PHSUBSW,
27549 IX86_BUILTIN_PMADDUBSW,
27550 IX86_BUILTIN_PMULHRSW,
27551 IX86_BUILTIN_PSHUFB,
27552 IX86_BUILTIN_PSIGNB,
27553 IX86_BUILTIN_PSIGNW,
27554 IX86_BUILTIN_PSIGND,
27555 IX86_BUILTIN_PALIGNR,
27556 IX86_BUILTIN_PABSB,
27557 IX86_BUILTIN_PABSW,
27558 IX86_BUILTIN_PABSD,
27559
27560 IX86_BUILTIN_PHADDW128,
27561 IX86_BUILTIN_PHADDD128,
27562 IX86_BUILTIN_PHADDSW128,
27563 IX86_BUILTIN_PHSUBW128,
27564 IX86_BUILTIN_PHSUBD128,
27565 IX86_BUILTIN_PHSUBSW128,
27566 IX86_BUILTIN_PMADDUBSW128,
27567 IX86_BUILTIN_PMULHRSW128,
27568 IX86_BUILTIN_PSHUFB128,
27569 IX86_BUILTIN_PSIGNB128,
27570 IX86_BUILTIN_PSIGNW128,
27571 IX86_BUILTIN_PSIGND128,
27572 IX86_BUILTIN_PALIGNR128,
27573 IX86_BUILTIN_PABSB128,
27574 IX86_BUILTIN_PABSW128,
27575 IX86_BUILTIN_PABSD128,
27576
27577 /* AMDFAM10 - SSE4A New Instructions. */
27578 IX86_BUILTIN_MOVNTSD,
27579 IX86_BUILTIN_MOVNTSS,
27580 IX86_BUILTIN_EXTRQI,
27581 IX86_BUILTIN_EXTRQ,
27582 IX86_BUILTIN_INSERTQI,
27583 IX86_BUILTIN_INSERTQ,
27584
27585 /* SSE4.1. */
27586 IX86_BUILTIN_BLENDPD,
27587 IX86_BUILTIN_BLENDPS,
27588 IX86_BUILTIN_BLENDVPD,
27589 IX86_BUILTIN_BLENDVPS,
27590 IX86_BUILTIN_PBLENDVB128,
27591 IX86_BUILTIN_PBLENDW128,
27592
27593 IX86_BUILTIN_DPPD,
27594 IX86_BUILTIN_DPPS,
27595
27596 IX86_BUILTIN_INSERTPS128,
27597
27598 IX86_BUILTIN_MOVNTDQA,
27599 IX86_BUILTIN_MPSADBW128,
27600 IX86_BUILTIN_PACKUSDW128,
27601 IX86_BUILTIN_PCMPEQQ,
27602 IX86_BUILTIN_PHMINPOSUW128,
27603
27604 IX86_BUILTIN_PMAXSB128,
27605 IX86_BUILTIN_PMAXSD128,
27606 IX86_BUILTIN_PMAXUD128,
27607 IX86_BUILTIN_PMAXUW128,
27608
27609 IX86_BUILTIN_PMINSB128,
27610 IX86_BUILTIN_PMINSD128,
27611 IX86_BUILTIN_PMINUD128,
27612 IX86_BUILTIN_PMINUW128,
27613
27614 IX86_BUILTIN_PMOVSXBW128,
27615 IX86_BUILTIN_PMOVSXBD128,
27616 IX86_BUILTIN_PMOVSXBQ128,
27617 IX86_BUILTIN_PMOVSXWD128,
27618 IX86_BUILTIN_PMOVSXWQ128,
27619 IX86_BUILTIN_PMOVSXDQ128,
27620
27621 IX86_BUILTIN_PMOVZXBW128,
27622 IX86_BUILTIN_PMOVZXBD128,
27623 IX86_BUILTIN_PMOVZXBQ128,
27624 IX86_BUILTIN_PMOVZXWD128,
27625 IX86_BUILTIN_PMOVZXWQ128,
27626 IX86_BUILTIN_PMOVZXDQ128,
27627
27628 IX86_BUILTIN_PMULDQ128,
27629 IX86_BUILTIN_PMULLD128,
27630
27631 IX86_BUILTIN_ROUNDSD,
27632 IX86_BUILTIN_ROUNDSS,
27633
27634 IX86_BUILTIN_ROUNDPD,
27635 IX86_BUILTIN_ROUNDPS,
27636
27637 IX86_BUILTIN_FLOORPD,
27638 IX86_BUILTIN_CEILPD,
27639 IX86_BUILTIN_TRUNCPD,
27640 IX86_BUILTIN_RINTPD,
27641 IX86_BUILTIN_ROUNDPD_AZ,
27642
27643 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
27644 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
27645 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
27646
27647 IX86_BUILTIN_FLOORPS,
27648 IX86_BUILTIN_CEILPS,
27649 IX86_BUILTIN_TRUNCPS,
27650 IX86_BUILTIN_RINTPS,
27651 IX86_BUILTIN_ROUNDPS_AZ,
27652
27653 IX86_BUILTIN_FLOORPS_SFIX,
27654 IX86_BUILTIN_CEILPS_SFIX,
27655 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
27656
27657 IX86_BUILTIN_PTESTZ,
27658 IX86_BUILTIN_PTESTC,
27659 IX86_BUILTIN_PTESTNZC,
27660
27661 IX86_BUILTIN_VEC_INIT_V2SI,
27662 IX86_BUILTIN_VEC_INIT_V4HI,
27663 IX86_BUILTIN_VEC_INIT_V8QI,
27664 IX86_BUILTIN_VEC_EXT_V2DF,
27665 IX86_BUILTIN_VEC_EXT_V2DI,
27666 IX86_BUILTIN_VEC_EXT_V4SF,
27667 IX86_BUILTIN_VEC_EXT_V4SI,
27668 IX86_BUILTIN_VEC_EXT_V8HI,
27669 IX86_BUILTIN_VEC_EXT_V2SI,
27670 IX86_BUILTIN_VEC_EXT_V4HI,
27671 IX86_BUILTIN_VEC_EXT_V16QI,
27672 IX86_BUILTIN_VEC_SET_V2DI,
27673 IX86_BUILTIN_VEC_SET_V4SF,
27674 IX86_BUILTIN_VEC_SET_V4SI,
27675 IX86_BUILTIN_VEC_SET_V8HI,
27676 IX86_BUILTIN_VEC_SET_V4HI,
27677 IX86_BUILTIN_VEC_SET_V16QI,
27678
27679 IX86_BUILTIN_VEC_PACK_SFIX,
27680 IX86_BUILTIN_VEC_PACK_SFIX256,
27681
27682 /* SSE4.2. */
27683 IX86_BUILTIN_CRC32QI,
27684 IX86_BUILTIN_CRC32HI,
27685 IX86_BUILTIN_CRC32SI,
27686 IX86_BUILTIN_CRC32DI,
27687
27688 IX86_BUILTIN_PCMPESTRI128,
27689 IX86_BUILTIN_PCMPESTRM128,
27690 IX86_BUILTIN_PCMPESTRA128,
27691 IX86_BUILTIN_PCMPESTRC128,
27692 IX86_BUILTIN_PCMPESTRO128,
27693 IX86_BUILTIN_PCMPESTRS128,
27694 IX86_BUILTIN_PCMPESTRZ128,
27695 IX86_BUILTIN_PCMPISTRI128,
27696 IX86_BUILTIN_PCMPISTRM128,
27697 IX86_BUILTIN_PCMPISTRA128,
27698 IX86_BUILTIN_PCMPISTRC128,
27699 IX86_BUILTIN_PCMPISTRO128,
27700 IX86_BUILTIN_PCMPISTRS128,
27701 IX86_BUILTIN_PCMPISTRZ128,
27702
27703 IX86_BUILTIN_PCMPGTQ,
27704
27705 /* AES instructions */
27706 IX86_BUILTIN_AESENC128,
27707 IX86_BUILTIN_AESENCLAST128,
27708 IX86_BUILTIN_AESDEC128,
27709 IX86_BUILTIN_AESDECLAST128,
27710 IX86_BUILTIN_AESIMC128,
27711 IX86_BUILTIN_AESKEYGENASSIST128,
27712
27713 /* PCLMUL instruction */
27714 IX86_BUILTIN_PCLMULQDQ128,
27715
27716 /* AVX */
27717 IX86_BUILTIN_ADDPD256,
27718 IX86_BUILTIN_ADDPS256,
27719 IX86_BUILTIN_ADDSUBPD256,
27720 IX86_BUILTIN_ADDSUBPS256,
27721 IX86_BUILTIN_ANDPD256,
27722 IX86_BUILTIN_ANDPS256,
27723 IX86_BUILTIN_ANDNPD256,
27724 IX86_BUILTIN_ANDNPS256,
27725 IX86_BUILTIN_BLENDPD256,
27726 IX86_BUILTIN_BLENDPS256,
27727 IX86_BUILTIN_BLENDVPD256,
27728 IX86_BUILTIN_BLENDVPS256,
27729 IX86_BUILTIN_DIVPD256,
27730 IX86_BUILTIN_DIVPS256,
27731 IX86_BUILTIN_DPPS256,
27732 IX86_BUILTIN_HADDPD256,
27733 IX86_BUILTIN_HADDPS256,
27734 IX86_BUILTIN_HSUBPD256,
27735 IX86_BUILTIN_HSUBPS256,
27736 IX86_BUILTIN_MAXPD256,
27737 IX86_BUILTIN_MAXPS256,
27738 IX86_BUILTIN_MINPD256,
27739 IX86_BUILTIN_MINPS256,
27740 IX86_BUILTIN_MULPD256,
27741 IX86_BUILTIN_MULPS256,
27742 IX86_BUILTIN_ORPD256,
27743 IX86_BUILTIN_ORPS256,
27744 IX86_BUILTIN_SHUFPD256,
27745 IX86_BUILTIN_SHUFPS256,
27746 IX86_BUILTIN_SUBPD256,
27747 IX86_BUILTIN_SUBPS256,
27748 IX86_BUILTIN_XORPD256,
27749 IX86_BUILTIN_XORPS256,
27750 IX86_BUILTIN_CMPSD,
27751 IX86_BUILTIN_CMPSS,
27752 IX86_BUILTIN_CMPPD,
27753 IX86_BUILTIN_CMPPS,
27754 IX86_BUILTIN_CMPPD256,
27755 IX86_BUILTIN_CMPPS256,
27756 IX86_BUILTIN_CVTDQ2PD256,
27757 IX86_BUILTIN_CVTDQ2PS256,
27758 IX86_BUILTIN_CVTPD2PS256,
27759 IX86_BUILTIN_CVTPS2DQ256,
27760 IX86_BUILTIN_CVTPS2PD256,
27761 IX86_BUILTIN_CVTTPD2DQ256,
27762 IX86_BUILTIN_CVTPD2DQ256,
27763 IX86_BUILTIN_CVTTPS2DQ256,
27764 IX86_BUILTIN_EXTRACTF128PD256,
27765 IX86_BUILTIN_EXTRACTF128PS256,
27766 IX86_BUILTIN_EXTRACTF128SI256,
27767 IX86_BUILTIN_VZEROALL,
27768 IX86_BUILTIN_VZEROUPPER,
27769 IX86_BUILTIN_VPERMILVARPD,
27770 IX86_BUILTIN_VPERMILVARPS,
27771 IX86_BUILTIN_VPERMILVARPD256,
27772 IX86_BUILTIN_VPERMILVARPS256,
27773 IX86_BUILTIN_VPERMILPD,
27774 IX86_BUILTIN_VPERMILPS,
27775 IX86_BUILTIN_VPERMILPD256,
27776 IX86_BUILTIN_VPERMILPS256,
27777 IX86_BUILTIN_VPERMIL2PD,
27778 IX86_BUILTIN_VPERMIL2PS,
27779 IX86_BUILTIN_VPERMIL2PD256,
27780 IX86_BUILTIN_VPERMIL2PS256,
27781 IX86_BUILTIN_VPERM2F128PD256,
27782 IX86_BUILTIN_VPERM2F128PS256,
27783 IX86_BUILTIN_VPERM2F128SI256,
27784 IX86_BUILTIN_VBROADCASTSS,
27785 IX86_BUILTIN_VBROADCASTSD256,
27786 IX86_BUILTIN_VBROADCASTSS256,
27787 IX86_BUILTIN_VBROADCASTPD256,
27788 IX86_BUILTIN_VBROADCASTPS256,
27789 IX86_BUILTIN_VINSERTF128PD256,
27790 IX86_BUILTIN_VINSERTF128PS256,
27791 IX86_BUILTIN_VINSERTF128SI256,
27792 IX86_BUILTIN_LOADUPD256,
27793 IX86_BUILTIN_LOADUPS256,
27794 IX86_BUILTIN_STOREUPD256,
27795 IX86_BUILTIN_STOREUPS256,
27796 IX86_BUILTIN_LDDQU256,
27797 IX86_BUILTIN_MOVNTDQ256,
27798 IX86_BUILTIN_MOVNTPD256,
27799 IX86_BUILTIN_MOVNTPS256,
27800 IX86_BUILTIN_LOADDQU256,
27801 IX86_BUILTIN_STOREDQU256,
27802 IX86_BUILTIN_MASKLOADPD,
27803 IX86_BUILTIN_MASKLOADPS,
27804 IX86_BUILTIN_MASKSTOREPD,
27805 IX86_BUILTIN_MASKSTOREPS,
27806 IX86_BUILTIN_MASKLOADPD256,
27807 IX86_BUILTIN_MASKLOADPS256,
27808 IX86_BUILTIN_MASKSTOREPD256,
27809 IX86_BUILTIN_MASKSTOREPS256,
27810 IX86_BUILTIN_MOVSHDUP256,
27811 IX86_BUILTIN_MOVSLDUP256,
27812 IX86_BUILTIN_MOVDDUP256,
27813
27814 IX86_BUILTIN_SQRTPD256,
27815 IX86_BUILTIN_SQRTPS256,
27816 IX86_BUILTIN_SQRTPS_NR256,
27817 IX86_BUILTIN_RSQRTPS256,
27818 IX86_BUILTIN_RSQRTPS_NR256,
27819
27820 IX86_BUILTIN_RCPPS256,
27821
27822 IX86_BUILTIN_ROUNDPD256,
27823 IX86_BUILTIN_ROUNDPS256,
27824
27825 IX86_BUILTIN_FLOORPD256,
27826 IX86_BUILTIN_CEILPD256,
27827 IX86_BUILTIN_TRUNCPD256,
27828 IX86_BUILTIN_RINTPD256,
27829 IX86_BUILTIN_ROUNDPD_AZ256,
27830
27831 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
27832 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
27833 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
27834
27835 IX86_BUILTIN_FLOORPS256,
27836 IX86_BUILTIN_CEILPS256,
27837 IX86_BUILTIN_TRUNCPS256,
27838 IX86_BUILTIN_RINTPS256,
27839 IX86_BUILTIN_ROUNDPS_AZ256,
27840
27841 IX86_BUILTIN_FLOORPS_SFIX256,
27842 IX86_BUILTIN_CEILPS_SFIX256,
27843 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
27844
27845 IX86_BUILTIN_UNPCKHPD256,
27846 IX86_BUILTIN_UNPCKLPD256,
27847 IX86_BUILTIN_UNPCKHPS256,
27848 IX86_BUILTIN_UNPCKLPS256,
27849
27850 IX86_BUILTIN_SI256_SI,
27851 IX86_BUILTIN_PS256_PS,
27852 IX86_BUILTIN_PD256_PD,
27853 IX86_BUILTIN_SI_SI256,
27854 IX86_BUILTIN_PS_PS256,
27855 IX86_BUILTIN_PD_PD256,
27856
27857 IX86_BUILTIN_VTESTZPD,
27858 IX86_BUILTIN_VTESTCPD,
27859 IX86_BUILTIN_VTESTNZCPD,
27860 IX86_BUILTIN_VTESTZPS,
27861 IX86_BUILTIN_VTESTCPS,
27862 IX86_BUILTIN_VTESTNZCPS,
27863 IX86_BUILTIN_VTESTZPD256,
27864 IX86_BUILTIN_VTESTCPD256,
27865 IX86_BUILTIN_VTESTNZCPD256,
27866 IX86_BUILTIN_VTESTZPS256,
27867 IX86_BUILTIN_VTESTCPS256,
27868 IX86_BUILTIN_VTESTNZCPS256,
27869 IX86_BUILTIN_PTESTZ256,
27870 IX86_BUILTIN_PTESTC256,
27871 IX86_BUILTIN_PTESTNZC256,
27872
27873 IX86_BUILTIN_MOVMSKPD256,
27874 IX86_BUILTIN_MOVMSKPS256,
27875
27876 /* AVX2 */
27877 IX86_BUILTIN_MPSADBW256,
27878 IX86_BUILTIN_PABSB256,
27879 IX86_BUILTIN_PABSW256,
27880 IX86_BUILTIN_PABSD256,
27881 IX86_BUILTIN_PACKSSDW256,
27882 IX86_BUILTIN_PACKSSWB256,
27883 IX86_BUILTIN_PACKUSDW256,
27884 IX86_BUILTIN_PACKUSWB256,
27885 IX86_BUILTIN_PADDB256,
27886 IX86_BUILTIN_PADDW256,
27887 IX86_BUILTIN_PADDD256,
27888 IX86_BUILTIN_PADDQ256,
27889 IX86_BUILTIN_PADDSB256,
27890 IX86_BUILTIN_PADDSW256,
27891 IX86_BUILTIN_PADDUSB256,
27892 IX86_BUILTIN_PADDUSW256,
27893 IX86_BUILTIN_PALIGNR256,
27894 IX86_BUILTIN_AND256I,
27895 IX86_BUILTIN_ANDNOT256I,
27896 IX86_BUILTIN_PAVGB256,
27897 IX86_BUILTIN_PAVGW256,
27898 IX86_BUILTIN_PBLENDVB256,
27899 IX86_BUILTIN_PBLENDVW256,
27900 IX86_BUILTIN_PCMPEQB256,
27901 IX86_BUILTIN_PCMPEQW256,
27902 IX86_BUILTIN_PCMPEQD256,
27903 IX86_BUILTIN_PCMPEQQ256,
27904 IX86_BUILTIN_PCMPGTB256,
27905 IX86_BUILTIN_PCMPGTW256,
27906 IX86_BUILTIN_PCMPGTD256,
27907 IX86_BUILTIN_PCMPGTQ256,
27908 IX86_BUILTIN_PHADDW256,
27909 IX86_BUILTIN_PHADDD256,
27910 IX86_BUILTIN_PHADDSW256,
27911 IX86_BUILTIN_PHSUBW256,
27912 IX86_BUILTIN_PHSUBD256,
27913 IX86_BUILTIN_PHSUBSW256,
27914 IX86_BUILTIN_PMADDUBSW256,
27915 IX86_BUILTIN_PMADDWD256,
27916 IX86_BUILTIN_PMAXSB256,
27917 IX86_BUILTIN_PMAXSW256,
27918 IX86_BUILTIN_PMAXSD256,
27919 IX86_BUILTIN_PMAXUB256,
27920 IX86_BUILTIN_PMAXUW256,
27921 IX86_BUILTIN_PMAXUD256,
27922 IX86_BUILTIN_PMINSB256,
27923 IX86_BUILTIN_PMINSW256,
27924 IX86_BUILTIN_PMINSD256,
27925 IX86_BUILTIN_PMINUB256,
27926 IX86_BUILTIN_PMINUW256,
27927 IX86_BUILTIN_PMINUD256,
27928 IX86_BUILTIN_PMOVMSKB256,
27929 IX86_BUILTIN_PMOVSXBW256,
27930 IX86_BUILTIN_PMOVSXBD256,
27931 IX86_BUILTIN_PMOVSXBQ256,
27932 IX86_BUILTIN_PMOVSXWD256,
27933 IX86_BUILTIN_PMOVSXWQ256,
27934 IX86_BUILTIN_PMOVSXDQ256,
27935 IX86_BUILTIN_PMOVZXBW256,
27936 IX86_BUILTIN_PMOVZXBD256,
27937 IX86_BUILTIN_PMOVZXBQ256,
27938 IX86_BUILTIN_PMOVZXWD256,
27939 IX86_BUILTIN_PMOVZXWQ256,
27940 IX86_BUILTIN_PMOVZXDQ256,
27941 IX86_BUILTIN_PMULDQ256,
27942 IX86_BUILTIN_PMULHRSW256,
27943 IX86_BUILTIN_PMULHUW256,
27944 IX86_BUILTIN_PMULHW256,
27945 IX86_BUILTIN_PMULLW256,
27946 IX86_BUILTIN_PMULLD256,
27947 IX86_BUILTIN_PMULUDQ256,
27948 IX86_BUILTIN_POR256,
27949 IX86_BUILTIN_PSADBW256,
27950 IX86_BUILTIN_PSHUFB256,
27951 IX86_BUILTIN_PSHUFD256,
27952 IX86_BUILTIN_PSHUFHW256,
27953 IX86_BUILTIN_PSHUFLW256,
27954 IX86_BUILTIN_PSIGNB256,
27955 IX86_BUILTIN_PSIGNW256,
27956 IX86_BUILTIN_PSIGND256,
27957 IX86_BUILTIN_PSLLDQI256,
27958 IX86_BUILTIN_PSLLWI256,
27959 IX86_BUILTIN_PSLLW256,
27960 IX86_BUILTIN_PSLLDI256,
27961 IX86_BUILTIN_PSLLD256,
27962 IX86_BUILTIN_PSLLQI256,
27963 IX86_BUILTIN_PSLLQ256,
27964 IX86_BUILTIN_PSRAWI256,
27965 IX86_BUILTIN_PSRAW256,
27966 IX86_BUILTIN_PSRADI256,
27967 IX86_BUILTIN_PSRAD256,
27968 IX86_BUILTIN_PSRLDQI256,
27969 IX86_BUILTIN_PSRLWI256,
27970 IX86_BUILTIN_PSRLW256,
27971 IX86_BUILTIN_PSRLDI256,
27972 IX86_BUILTIN_PSRLD256,
27973 IX86_BUILTIN_PSRLQI256,
27974 IX86_BUILTIN_PSRLQ256,
27975 IX86_BUILTIN_PSUBB256,
27976 IX86_BUILTIN_PSUBW256,
27977 IX86_BUILTIN_PSUBD256,
27978 IX86_BUILTIN_PSUBQ256,
27979 IX86_BUILTIN_PSUBSB256,
27980 IX86_BUILTIN_PSUBSW256,
27981 IX86_BUILTIN_PSUBUSB256,
27982 IX86_BUILTIN_PSUBUSW256,
27983 IX86_BUILTIN_PUNPCKHBW256,
27984 IX86_BUILTIN_PUNPCKHWD256,
27985 IX86_BUILTIN_PUNPCKHDQ256,
27986 IX86_BUILTIN_PUNPCKHQDQ256,
27987 IX86_BUILTIN_PUNPCKLBW256,
27988 IX86_BUILTIN_PUNPCKLWD256,
27989 IX86_BUILTIN_PUNPCKLDQ256,
27990 IX86_BUILTIN_PUNPCKLQDQ256,
27991 IX86_BUILTIN_PXOR256,
27992 IX86_BUILTIN_MOVNTDQA256,
27993 IX86_BUILTIN_VBROADCASTSS_PS,
27994 IX86_BUILTIN_VBROADCASTSS_PS256,
27995 IX86_BUILTIN_VBROADCASTSD_PD256,
27996 IX86_BUILTIN_VBROADCASTSI256,
27997 IX86_BUILTIN_PBLENDD256,
27998 IX86_BUILTIN_PBLENDD128,
27999 IX86_BUILTIN_PBROADCASTB256,
28000 IX86_BUILTIN_PBROADCASTW256,
28001 IX86_BUILTIN_PBROADCASTD256,
28002 IX86_BUILTIN_PBROADCASTQ256,
28003 IX86_BUILTIN_PBROADCASTB128,
28004 IX86_BUILTIN_PBROADCASTW128,
28005 IX86_BUILTIN_PBROADCASTD128,
28006 IX86_BUILTIN_PBROADCASTQ128,
28007 IX86_BUILTIN_VPERMVARSI256,
28008 IX86_BUILTIN_VPERMDF256,
28009 IX86_BUILTIN_VPERMVARSF256,
28010 IX86_BUILTIN_VPERMDI256,
28011 IX86_BUILTIN_VPERMTI256,
28012 IX86_BUILTIN_VEXTRACT128I256,
28013 IX86_BUILTIN_VINSERT128I256,
28014 IX86_BUILTIN_MASKLOADD,
28015 IX86_BUILTIN_MASKLOADQ,
28016 IX86_BUILTIN_MASKLOADD256,
28017 IX86_BUILTIN_MASKLOADQ256,
28018 IX86_BUILTIN_MASKSTORED,
28019 IX86_BUILTIN_MASKSTOREQ,
28020 IX86_BUILTIN_MASKSTORED256,
28021 IX86_BUILTIN_MASKSTOREQ256,
28022 IX86_BUILTIN_PSLLVV4DI,
28023 IX86_BUILTIN_PSLLVV2DI,
28024 IX86_BUILTIN_PSLLVV8SI,
28025 IX86_BUILTIN_PSLLVV4SI,
28026 IX86_BUILTIN_PSRAVV8SI,
28027 IX86_BUILTIN_PSRAVV4SI,
28028 IX86_BUILTIN_PSRLVV4DI,
28029 IX86_BUILTIN_PSRLVV2DI,
28030 IX86_BUILTIN_PSRLVV8SI,
28031 IX86_BUILTIN_PSRLVV4SI,
28032
28033 IX86_BUILTIN_GATHERSIV2DF,
28034 IX86_BUILTIN_GATHERSIV4DF,
28035 IX86_BUILTIN_GATHERDIV2DF,
28036 IX86_BUILTIN_GATHERDIV4DF,
28037 IX86_BUILTIN_GATHERSIV4SF,
28038 IX86_BUILTIN_GATHERSIV8SF,
28039 IX86_BUILTIN_GATHERDIV4SF,
28040 IX86_BUILTIN_GATHERDIV8SF,
28041 IX86_BUILTIN_GATHERSIV2DI,
28042 IX86_BUILTIN_GATHERSIV4DI,
28043 IX86_BUILTIN_GATHERDIV2DI,
28044 IX86_BUILTIN_GATHERDIV4DI,
28045 IX86_BUILTIN_GATHERSIV4SI,
28046 IX86_BUILTIN_GATHERSIV8SI,
28047 IX86_BUILTIN_GATHERDIV4SI,
28048 IX86_BUILTIN_GATHERDIV8SI,
28049
28050 /* AVX512F */
28051 IX86_BUILTIN_ADDPD512,
28052 IX86_BUILTIN_ADDPS512,
28053 IX86_BUILTIN_ADDSD_ROUND,
28054 IX86_BUILTIN_ADDSS_ROUND,
28055 IX86_BUILTIN_ALIGND512,
28056 IX86_BUILTIN_ALIGNQ512,
28057 IX86_BUILTIN_BLENDMD512,
28058 IX86_BUILTIN_BLENDMPD512,
28059 IX86_BUILTIN_BLENDMPS512,
28060 IX86_BUILTIN_BLENDMQ512,
28061 IX86_BUILTIN_BROADCASTF32X4_512,
28062 IX86_BUILTIN_BROADCASTF64X4_512,
28063 IX86_BUILTIN_BROADCASTI32X4_512,
28064 IX86_BUILTIN_BROADCASTI64X4_512,
28065 IX86_BUILTIN_BROADCASTSD512,
28066 IX86_BUILTIN_BROADCASTSS512,
28067 IX86_BUILTIN_CMPD512,
28068 IX86_BUILTIN_CMPPD512,
28069 IX86_BUILTIN_CMPPS512,
28070 IX86_BUILTIN_CMPQ512,
28071 IX86_BUILTIN_CMPSD_MASK,
28072 IX86_BUILTIN_CMPSS_MASK,
28073 IX86_BUILTIN_COMIDF,
28074 IX86_BUILTIN_COMISF,
28075 IX86_BUILTIN_COMPRESSPD512,
28076 IX86_BUILTIN_COMPRESSPDSTORE512,
28077 IX86_BUILTIN_COMPRESSPS512,
28078 IX86_BUILTIN_COMPRESSPSSTORE512,
28079 IX86_BUILTIN_CVTDQ2PD512,
28080 IX86_BUILTIN_CVTDQ2PS512,
28081 IX86_BUILTIN_CVTPD2DQ512,
28082 IX86_BUILTIN_CVTPD2PS512,
28083 IX86_BUILTIN_CVTPD2UDQ512,
28084 IX86_BUILTIN_CVTPH2PS512,
28085 IX86_BUILTIN_CVTPS2DQ512,
28086 IX86_BUILTIN_CVTPS2PD512,
28087 IX86_BUILTIN_CVTPS2PH512,
28088 IX86_BUILTIN_CVTPS2UDQ512,
28089 IX86_BUILTIN_CVTSD2SS_ROUND,
28090 IX86_BUILTIN_CVTSI2SD64,
28091 IX86_BUILTIN_CVTSI2SS32,
28092 IX86_BUILTIN_CVTSI2SS64,
28093 IX86_BUILTIN_CVTSS2SD_ROUND,
28094 IX86_BUILTIN_CVTTPD2DQ512,
28095 IX86_BUILTIN_CVTTPD2UDQ512,
28096 IX86_BUILTIN_CVTTPS2DQ512,
28097 IX86_BUILTIN_CVTTPS2UDQ512,
28098 IX86_BUILTIN_CVTUDQ2PD512,
28099 IX86_BUILTIN_CVTUDQ2PS512,
28100 IX86_BUILTIN_CVTUSI2SD32,
28101 IX86_BUILTIN_CVTUSI2SD64,
28102 IX86_BUILTIN_CVTUSI2SS32,
28103 IX86_BUILTIN_CVTUSI2SS64,
28104 IX86_BUILTIN_DIVPD512,
28105 IX86_BUILTIN_DIVPS512,
28106 IX86_BUILTIN_DIVSD_ROUND,
28107 IX86_BUILTIN_DIVSS_ROUND,
28108 IX86_BUILTIN_EXPANDPD512,
28109 IX86_BUILTIN_EXPANDPD512Z,
28110 IX86_BUILTIN_EXPANDPDLOAD512,
28111 IX86_BUILTIN_EXPANDPDLOAD512Z,
28112 IX86_BUILTIN_EXPANDPS512,
28113 IX86_BUILTIN_EXPANDPS512Z,
28114 IX86_BUILTIN_EXPANDPSLOAD512,
28115 IX86_BUILTIN_EXPANDPSLOAD512Z,
28116 IX86_BUILTIN_EXTRACTF32X4,
28117 IX86_BUILTIN_EXTRACTF64X4,
28118 IX86_BUILTIN_EXTRACTI32X4,
28119 IX86_BUILTIN_EXTRACTI64X4,
28120 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28121 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28122 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28123 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28124 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28125 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28126 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28127 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28128 IX86_BUILTIN_GETEXPPD512,
28129 IX86_BUILTIN_GETEXPPS512,
28130 IX86_BUILTIN_GETEXPSD128,
28131 IX86_BUILTIN_GETEXPSS128,
28132 IX86_BUILTIN_GETMANTPD512,
28133 IX86_BUILTIN_GETMANTPS512,
28134 IX86_BUILTIN_GETMANTSD128,
28135 IX86_BUILTIN_GETMANTSS128,
28136 IX86_BUILTIN_INSERTF32X4,
28137 IX86_BUILTIN_INSERTF64X4,
28138 IX86_BUILTIN_INSERTI32X4,
28139 IX86_BUILTIN_INSERTI64X4,
28140 IX86_BUILTIN_LOADAPD512,
28141 IX86_BUILTIN_LOADAPS512,
28142 IX86_BUILTIN_LOADDQUDI512,
28143 IX86_BUILTIN_LOADDQUSI512,
28144 IX86_BUILTIN_LOADUPD512,
28145 IX86_BUILTIN_LOADUPS512,
28146 IX86_BUILTIN_MAXPD512,
28147 IX86_BUILTIN_MAXPS512,
28148 IX86_BUILTIN_MAXSD_ROUND,
28149 IX86_BUILTIN_MAXSS_ROUND,
28150 IX86_BUILTIN_MINPD512,
28151 IX86_BUILTIN_MINPS512,
28152 IX86_BUILTIN_MINSD_ROUND,
28153 IX86_BUILTIN_MINSS_ROUND,
28154 IX86_BUILTIN_MOVAPD512,
28155 IX86_BUILTIN_MOVAPS512,
28156 IX86_BUILTIN_MOVDDUP512,
28157 IX86_BUILTIN_MOVDQA32LOAD512,
28158 IX86_BUILTIN_MOVDQA32STORE512,
28159 IX86_BUILTIN_MOVDQA32_512,
28160 IX86_BUILTIN_MOVDQA64LOAD512,
28161 IX86_BUILTIN_MOVDQA64STORE512,
28162 IX86_BUILTIN_MOVDQA64_512,
28163 IX86_BUILTIN_MOVNTDQ512,
28164 IX86_BUILTIN_MOVNTDQA512,
28165 IX86_BUILTIN_MOVNTPD512,
28166 IX86_BUILTIN_MOVNTPS512,
28167 IX86_BUILTIN_MOVSHDUP512,
28168 IX86_BUILTIN_MOVSLDUP512,
28169 IX86_BUILTIN_MULPD512,
28170 IX86_BUILTIN_MULPS512,
28171 IX86_BUILTIN_MULSD_ROUND,
28172 IX86_BUILTIN_MULSS_ROUND,
28173 IX86_BUILTIN_PABSD512,
28174 IX86_BUILTIN_PABSQ512,
28175 IX86_BUILTIN_PADDD512,
28176 IX86_BUILTIN_PADDQ512,
28177 IX86_BUILTIN_PANDD512,
28178 IX86_BUILTIN_PANDND512,
28179 IX86_BUILTIN_PANDNQ512,
28180 IX86_BUILTIN_PANDQ512,
28181 IX86_BUILTIN_PBROADCASTD512,
28182 IX86_BUILTIN_PBROADCASTD512_GPR,
28183 IX86_BUILTIN_PBROADCASTMB512,
28184 IX86_BUILTIN_PBROADCASTMW512,
28185 IX86_BUILTIN_PBROADCASTQ512,
28186 IX86_BUILTIN_PBROADCASTQ512_GPR,
28187 IX86_BUILTIN_PBROADCASTQ512_MEM,
28188 IX86_BUILTIN_PCMPEQD512_MASK,
28189 IX86_BUILTIN_PCMPEQQ512_MASK,
28190 IX86_BUILTIN_PCMPGTD512_MASK,
28191 IX86_BUILTIN_PCMPGTQ512_MASK,
28192 IX86_BUILTIN_PCOMPRESSD512,
28193 IX86_BUILTIN_PCOMPRESSDSTORE512,
28194 IX86_BUILTIN_PCOMPRESSQ512,
28195 IX86_BUILTIN_PCOMPRESSQSTORE512,
28196 IX86_BUILTIN_PEXPANDD512,
28197 IX86_BUILTIN_PEXPANDD512Z,
28198 IX86_BUILTIN_PEXPANDDLOAD512,
28199 IX86_BUILTIN_PEXPANDDLOAD512Z,
28200 IX86_BUILTIN_PEXPANDQ512,
28201 IX86_BUILTIN_PEXPANDQ512Z,
28202 IX86_BUILTIN_PEXPANDQLOAD512,
28203 IX86_BUILTIN_PEXPANDQLOAD512Z,
28204 IX86_BUILTIN_PMAXSD512,
28205 IX86_BUILTIN_PMAXSQ512,
28206 IX86_BUILTIN_PMAXUD512,
28207 IX86_BUILTIN_PMAXUQ512,
28208 IX86_BUILTIN_PMINSD512,
28209 IX86_BUILTIN_PMINSQ512,
28210 IX86_BUILTIN_PMINUD512,
28211 IX86_BUILTIN_PMINUQ512,
28212 IX86_BUILTIN_PMOVDB512,
28213 IX86_BUILTIN_PMOVDB512_MEM,
28214 IX86_BUILTIN_PMOVDW512,
28215 IX86_BUILTIN_PMOVDW512_MEM,
28216 IX86_BUILTIN_PMOVQB512,
28217 IX86_BUILTIN_PMOVQB512_MEM,
28218 IX86_BUILTIN_PMOVQD512,
28219 IX86_BUILTIN_PMOVQD512_MEM,
28220 IX86_BUILTIN_PMOVQW512,
28221 IX86_BUILTIN_PMOVQW512_MEM,
28222 IX86_BUILTIN_PMOVSDB512,
28223 IX86_BUILTIN_PMOVSDB512_MEM,
28224 IX86_BUILTIN_PMOVSDW512,
28225 IX86_BUILTIN_PMOVSDW512_MEM,
28226 IX86_BUILTIN_PMOVSQB512,
28227 IX86_BUILTIN_PMOVSQB512_MEM,
28228 IX86_BUILTIN_PMOVSQD512,
28229 IX86_BUILTIN_PMOVSQD512_MEM,
28230 IX86_BUILTIN_PMOVSQW512,
28231 IX86_BUILTIN_PMOVSQW512_MEM,
28232 IX86_BUILTIN_PMOVSXBD512,
28233 IX86_BUILTIN_PMOVSXBQ512,
28234 IX86_BUILTIN_PMOVSXDQ512,
28235 IX86_BUILTIN_PMOVSXWD512,
28236 IX86_BUILTIN_PMOVSXWQ512,
28237 IX86_BUILTIN_PMOVUSDB512,
28238 IX86_BUILTIN_PMOVUSDB512_MEM,
28239 IX86_BUILTIN_PMOVUSDW512,
28240 IX86_BUILTIN_PMOVUSDW512_MEM,
28241 IX86_BUILTIN_PMOVUSQB512,
28242 IX86_BUILTIN_PMOVUSQB512_MEM,
28243 IX86_BUILTIN_PMOVUSQD512,
28244 IX86_BUILTIN_PMOVUSQD512_MEM,
28245 IX86_BUILTIN_PMOVUSQW512,
28246 IX86_BUILTIN_PMOVUSQW512_MEM,
28247 IX86_BUILTIN_PMOVZXBD512,
28248 IX86_BUILTIN_PMOVZXBQ512,
28249 IX86_BUILTIN_PMOVZXDQ512,
28250 IX86_BUILTIN_PMOVZXWD512,
28251 IX86_BUILTIN_PMOVZXWQ512,
28252 IX86_BUILTIN_PMULDQ512,
28253 IX86_BUILTIN_PMULLD512,
28254 IX86_BUILTIN_PMULUDQ512,
28255 IX86_BUILTIN_PORD512,
28256 IX86_BUILTIN_PORQ512,
28257 IX86_BUILTIN_PROLD512,
28258 IX86_BUILTIN_PROLQ512,
28259 IX86_BUILTIN_PROLVD512,
28260 IX86_BUILTIN_PROLVQ512,
28261 IX86_BUILTIN_PRORD512,
28262 IX86_BUILTIN_PRORQ512,
28263 IX86_BUILTIN_PRORVD512,
28264 IX86_BUILTIN_PRORVQ512,
28265 IX86_BUILTIN_PSHUFD512,
28266 IX86_BUILTIN_PSLLD512,
28267 IX86_BUILTIN_PSLLDI512,
28268 IX86_BUILTIN_PSLLQ512,
28269 IX86_BUILTIN_PSLLQI512,
28270 IX86_BUILTIN_PSLLVV16SI,
28271 IX86_BUILTIN_PSLLVV8DI,
28272 IX86_BUILTIN_PSRAD512,
28273 IX86_BUILTIN_PSRADI512,
28274 IX86_BUILTIN_PSRAQ512,
28275 IX86_BUILTIN_PSRAQI512,
28276 IX86_BUILTIN_PSRAVV16SI,
28277 IX86_BUILTIN_PSRAVV8DI,
28278 IX86_BUILTIN_PSRLD512,
28279 IX86_BUILTIN_PSRLDI512,
28280 IX86_BUILTIN_PSRLQ512,
28281 IX86_BUILTIN_PSRLQI512,
28282 IX86_BUILTIN_PSRLVV16SI,
28283 IX86_BUILTIN_PSRLVV8DI,
28284 IX86_BUILTIN_PSUBD512,
28285 IX86_BUILTIN_PSUBQ512,
28286 IX86_BUILTIN_PTESTMD512,
28287 IX86_BUILTIN_PTESTMQ512,
28288 IX86_BUILTIN_PTESTNMD512,
28289 IX86_BUILTIN_PTESTNMQ512,
28290 IX86_BUILTIN_PUNPCKHDQ512,
28291 IX86_BUILTIN_PUNPCKHQDQ512,
28292 IX86_BUILTIN_PUNPCKLDQ512,
28293 IX86_BUILTIN_PUNPCKLQDQ512,
28294 IX86_BUILTIN_PXORD512,
28295 IX86_BUILTIN_PXORQ512,
28296 IX86_BUILTIN_RCP14PD512,
28297 IX86_BUILTIN_RCP14PS512,
28298 IX86_BUILTIN_RCP14SD,
28299 IX86_BUILTIN_RCP14SS,
28300 IX86_BUILTIN_RNDSCALEPD,
28301 IX86_BUILTIN_RNDSCALEPS,
28302 IX86_BUILTIN_RNDSCALESD,
28303 IX86_BUILTIN_RNDSCALESS,
28304 IX86_BUILTIN_RSQRT14PD512,
28305 IX86_BUILTIN_RSQRT14PS512,
28306 IX86_BUILTIN_RSQRT14SD,
28307 IX86_BUILTIN_RSQRT14SS,
28308 IX86_BUILTIN_SCALEFPD512,
28309 IX86_BUILTIN_SCALEFPS512,
28310 IX86_BUILTIN_SCALEFSD,
28311 IX86_BUILTIN_SCALEFSS,
28312 IX86_BUILTIN_SHUFPD512,
28313 IX86_BUILTIN_SHUFPS512,
28314 IX86_BUILTIN_SHUF_F32x4,
28315 IX86_BUILTIN_SHUF_F64x2,
28316 IX86_BUILTIN_SHUF_I32x4,
28317 IX86_BUILTIN_SHUF_I64x2,
28318 IX86_BUILTIN_SQRTPD512,
28319 IX86_BUILTIN_SQRTPD512_MASK,
28320 IX86_BUILTIN_SQRTPS512_MASK,
28321 IX86_BUILTIN_SQRTPS_NR512,
28322 IX86_BUILTIN_SQRTSD_ROUND,
28323 IX86_BUILTIN_SQRTSS_ROUND,
28324 IX86_BUILTIN_STOREAPD512,
28325 IX86_BUILTIN_STOREAPS512,
28326 IX86_BUILTIN_STOREDQUDI512,
28327 IX86_BUILTIN_STOREDQUSI512,
28328 IX86_BUILTIN_STOREUPD512,
28329 IX86_BUILTIN_STOREUPS512,
28330 IX86_BUILTIN_SUBPD512,
28331 IX86_BUILTIN_SUBPS512,
28332 IX86_BUILTIN_SUBSD_ROUND,
28333 IX86_BUILTIN_SUBSS_ROUND,
28334 IX86_BUILTIN_UCMPD512,
28335 IX86_BUILTIN_UCMPQ512,
28336 IX86_BUILTIN_UNPCKHPD512,
28337 IX86_BUILTIN_UNPCKHPS512,
28338 IX86_BUILTIN_UNPCKLPD512,
28339 IX86_BUILTIN_UNPCKLPS512,
28340 IX86_BUILTIN_VCVTSD2SI32,
28341 IX86_BUILTIN_VCVTSD2SI64,
28342 IX86_BUILTIN_VCVTSD2USI32,
28343 IX86_BUILTIN_VCVTSD2USI64,
28344 IX86_BUILTIN_VCVTSS2SI32,
28345 IX86_BUILTIN_VCVTSS2SI64,
28346 IX86_BUILTIN_VCVTSS2USI32,
28347 IX86_BUILTIN_VCVTSS2USI64,
28348 IX86_BUILTIN_VCVTTSD2SI32,
28349 IX86_BUILTIN_VCVTTSD2SI64,
28350 IX86_BUILTIN_VCVTTSD2USI32,
28351 IX86_BUILTIN_VCVTTSD2USI64,
28352 IX86_BUILTIN_VCVTTSS2SI32,
28353 IX86_BUILTIN_VCVTTSS2SI64,
28354 IX86_BUILTIN_VCVTTSS2USI32,
28355 IX86_BUILTIN_VCVTTSS2USI64,
28356 IX86_BUILTIN_VFMADDPD512_MASK,
28357 IX86_BUILTIN_VFMADDPD512_MASK3,
28358 IX86_BUILTIN_VFMADDPD512_MASKZ,
28359 IX86_BUILTIN_VFMADDPS512_MASK,
28360 IX86_BUILTIN_VFMADDPS512_MASK3,
28361 IX86_BUILTIN_VFMADDPS512_MASKZ,
28362 IX86_BUILTIN_VFMADDSD3_ROUND,
28363 IX86_BUILTIN_VFMADDSS3_ROUND,
28364 IX86_BUILTIN_VFMADDSUBPD512_MASK,
28365 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
28366 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
28367 IX86_BUILTIN_VFMADDSUBPS512_MASK,
28368 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
28369 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
28370 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
28371 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
28372 IX86_BUILTIN_VFMSUBPD512_MASK3,
28373 IX86_BUILTIN_VFMSUBPS512_MASK3,
28374 IX86_BUILTIN_VFMSUBSD3_MASK3,
28375 IX86_BUILTIN_VFMSUBSS3_MASK3,
28376 IX86_BUILTIN_VFNMADDPD512_MASK,
28377 IX86_BUILTIN_VFNMADDPS512_MASK,
28378 IX86_BUILTIN_VFNMSUBPD512_MASK,
28379 IX86_BUILTIN_VFNMSUBPD512_MASK3,
28380 IX86_BUILTIN_VFNMSUBPS512_MASK,
28381 IX86_BUILTIN_VFNMSUBPS512_MASK3,
28382 IX86_BUILTIN_VPCLZCNTD512,
28383 IX86_BUILTIN_VPCLZCNTQ512,
28384 IX86_BUILTIN_VPCONFLICTD512,
28385 IX86_BUILTIN_VPCONFLICTQ512,
28386 IX86_BUILTIN_VPERMDF512,
28387 IX86_BUILTIN_VPERMDI512,
28388 IX86_BUILTIN_VPERMI2VARD512,
28389 IX86_BUILTIN_VPERMI2VARPD512,
28390 IX86_BUILTIN_VPERMI2VARPS512,
28391 IX86_BUILTIN_VPERMI2VARQ512,
28392 IX86_BUILTIN_VPERMILPD512,
28393 IX86_BUILTIN_VPERMILPS512,
28394 IX86_BUILTIN_VPERMILVARPD512,
28395 IX86_BUILTIN_VPERMILVARPS512,
28396 IX86_BUILTIN_VPERMT2VARD512,
28397 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
28398 IX86_BUILTIN_VPERMT2VARPD512,
28399 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
28400 IX86_BUILTIN_VPERMT2VARPS512,
28401 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
28402 IX86_BUILTIN_VPERMT2VARQ512,
28403 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
28404 IX86_BUILTIN_VPERMVARDF512,
28405 IX86_BUILTIN_VPERMVARDI512,
28406 IX86_BUILTIN_VPERMVARSF512,
28407 IX86_BUILTIN_VPERMVARSI512,
28408 IX86_BUILTIN_VTERNLOGD512_MASK,
28409 IX86_BUILTIN_VTERNLOGD512_MASKZ,
28410 IX86_BUILTIN_VTERNLOGQ512_MASK,
28411 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
28412
28413 /* Mask arithmetic operations */
28414 IX86_BUILTIN_KAND16,
28415 IX86_BUILTIN_KANDN16,
28416 IX86_BUILTIN_KNOT16,
28417 IX86_BUILTIN_KOR16,
28418 IX86_BUILTIN_KORTESTC16,
28419 IX86_BUILTIN_KORTESTZ16,
28420 IX86_BUILTIN_KUNPCKBW,
28421 IX86_BUILTIN_KXNOR16,
28422 IX86_BUILTIN_KXOR16,
28423 IX86_BUILTIN_KMOV16,
28424
28425 /* Alternate 4 and 8 element gather/scatter for the vectorizer
28426 where all operands are 32-byte or 64-byte wide respectively. */
28427 IX86_BUILTIN_GATHERALTSIV4DF,
28428 IX86_BUILTIN_GATHERALTDIV8SF,
28429 IX86_BUILTIN_GATHERALTSIV4DI,
28430 IX86_BUILTIN_GATHERALTDIV8SI,
28431 IX86_BUILTIN_GATHER3ALTDIV16SF,
28432 IX86_BUILTIN_GATHER3ALTDIV16SI,
28433 IX86_BUILTIN_GATHER3ALTSIV8DF,
28434 IX86_BUILTIN_GATHER3ALTSIV8DI,
28435 IX86_BUILTIN_GATHER3DIV16SF,
28436 IX86_BUILTIN_GATHER3DIV16SI,
28437 IX86_BUILTIN_GATHER3DIV8DF,
28438 IX86_BUILTIN_GATHER3DIV8DI,
28439 IX86_BUILTIN_GATHER3SIV16SF,
28440 IX86_BUILTIN_GATHER3SIV16SI,
28441 IX86_BUILTIN_GATHER3SIV8DF,
28442 IX86_BUILTIN_GATHER3SIV8DI,
28443 IX86_BUILTIN_SCATTERDIV16SF,
28444 IX86_BUILTIN_SCATTERDIV16SI,
28445 IX86_BUILTIN_SCATTERDIV8DF,
28446 IX86_BUILTIN_SCATTERDIV8DI,
28447 IX86_BUILTIN_SCATTERSIV16SF,
28448 IX86_BUILTIN_SCATTERSIV16SI,
28449 IX86_BUILTIN_SCATTERSIV8DF,
28450 IX86_BUILTIN_SCATTERSIV8DI,
28451
28452 /* AVX512PF */
28453 IX86_BUILTIN_GATHERPFQPD,
28454 IX86_BUILTIN_GATHERPFDPS,
28455 IX86_BUILTIN_GATHERPFDPD,
28456 IX86_BUILTIN_GATHERPFQPS,
28457 IX86_BUILTIN_SCATTERPFDPD,
28458 IX86_BUILTIN_SCATTERPFDPS,
28459 IX86_BUILTIN_SCATTERPFQPD,
28460 IX86_BUILTIN_SCATTERPFQPS,
28461
28462 /* AVX-512ER */
28463 IX86_BUILTIN_EXP2PD_MASK,
28464 IX86_BUILTIN_EXP2PS_MASK,
28465 IX86_BUILTIN_EXP2PS,
28466 IX86_BUILTIN_RCP28PD,
28467 IX86_BUILTIN_RCP28PS,
28468 IX86_BUILTIN_RCP28SD,
28469 IX86_BUILTIN_RCP28SS,
28470 IX86_BUILTIN_RSQRT28PD,
28471 IX86_BUILTIN_RSQRT28PS,
28472 IX86_BUILTIN_RSQRT28SD,
28473 IX86_BUILTIN_RSQRT28SS,
28474
28475 /* SHA builtins. */
28476 IX86_BUILTIN_SHA1MSG1,
28477 IX86_BUILTIN_SHA1MSG2,
28478 IX86_BUILTIN_SHA1NEXTE,
28479 IX86_BUILTIN_SHA1RNDS4,
28480 IX86_BUILTIN_SHA256MSG1,
28481 IX86_BUILTIN_SHA256MSG2,
28482 IX86_BUILTIN_SHA256RNDS2,
28483
28484 /* TFmode support builtins. */
28485 IX86_BUILTIN_INFQ,
28486 IX86_BUILTIN_HUGE_VALQ,
28487 IX86_BUILTIN_FABSQ,
28488 IX86_BUILTIN_COPYSIGNQ,
28489
28490 /* Vectorizer support builtins. */
28491 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
28492 IX86_BUILTIN_CPYSGNPS,
28493 IX86_BUILTIN_CPYSGNPD,
28494 IX86_BUILTIN_CPYSGNPS256,
28495 IX86_BUILTIN_CPYSGNPS512,
28496 IX86_BUILTIN_CPYSGNPD256,
28497 IX86_BUILTIN_CPYSGNPD512,
28498 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
28499 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
28500
28501
28502 /* FMA4 instructions. */
28503 IX86_BUILTIN_VFMADDSS,
28504 IX86_BUILTIN_VFMADDSD,
28505 IX86_BUILTIN_VFMADDPS,
28506 IX86_BUILTIN_VFMADDPD,
28507 IX86_BUILTIN_VFMADDPS256,
28508 IX86_BUILTIN_VFMADDPD256,
28509 IX86_BUILTIN_VFMADDSUBPS,
28510 IX86_BUILTIN_VFMADDSUBPD,
28511 IX86_BUILTIN_VFMADDSUBPS256,
28512 IX86_BUILTIN_VFMADDSUBPD256,
28513
28514 /* FMA3 instructions. */
28515 IX86_BUILTIN_VFMADDSS3,
28516 IX86_BUILTIN_VFMADDSD3,
28517
28518 /* XOP instructions. */
28519 IX86_BUILTIN_VPCMOV,
28520 IX86_BUILTIN_VPCMOV_V2DI,
28521 IX86_BUILTIN_VPCMOV_V4SI,
28522 IX86_BUILTIN_VPCMOV_V8HI,
28523 IX86_BUILTIN_VPCMOV_V16QI,
28524 IX86_BUILTIN_VPCMOV_V4SF,
28525 IX86_BUILTIN_VPCMOV_V2DF,
28526 IX86_BUILTIN_VPCMOV256,
28527 IX86_BUILTIN_VPCMOV_V4DI256,
28528 IX86_BUILTIN_VPCMOV_V8SI256,
28529 IX86_BUILTIN_VPCMOV_V16HI256,
28530 IX86_BUILTIN_VPCMOV_V32QI256,
28531 IX86_BUILTIN_VPCMOV_V8SF256,
28532 IX86_BUILTIN_VPCMOV_V4DF256,
28533
28534 IX86_BUILTIN_VPPERM,
28535
28536 IX86_BUILTIN_VPMACSSWW,
28537 IX86_BUILTIN_VPMACSWW,
28538 IX86_BUILTIN_VPMACSSWD,
28539 IX86_BUILTIN_VPMACSWD,
28540 IX86_BUILTIN_VPMACSSDD,
28541 IX86_BUILTIN_VPMACSDD,
28542 IX86_BUILTIN_VPMACSSDQL,
28543 IX86_BUILTIN_VPMACSSDQH,
28544 IX86_BUILTIN_VPMACSDQL,
28545 IX86_BUILTIN_VPMACSDQH,
28546 IX86_BUILTIN_VPMADCSSWD,
28547 IX86_BUILTIN_VPMADCSWD,
28548
28549 IX86_BUILTIN_VPHADDBW,
28550 IX86_BUILTIN_VPHADDBD,
28551 IX86_BUILTIN_VPHADDBQ,
28552 IX86_BUILTIN_VPHADDWD,
28553 IX86_BUILTIN_VPHADDWQ,
28554 IX86_BUILTIN_VPHADDDQ,
28555 IX86_BUILTIN_VPHADDUBW,
28556 IX86_BUILTIN_VPHADDUBD,
28557 IX86_BUILTIN_VPHADDUBQ,
28558 IX86_BUILTIN_VPHADDUWD,
28559 IX86_BUILTIN_VPHADDUWQ,
28560 IX86_BUILTIN_VPHADDUDQ,
28561 IX86_BUILTIN_VPHSUBBW,
28562 IX86_BUILTIN_VPHSUBWD,
28563 IX86_BUILTIN_VPHSUBDQ,
28564
28565 IX86_BUILTIN_VPROTB,
28566 IX86_BUILTIN_VPROTW,
28567 IX86_BUILTIN_VPROTD,
28568 IX86_BUILTIN_VPROTQ,
28569 IX86_BUILTIN_VPROTB_IMM,
28570 IX86_BUILTIN_VPROTW_IMM,
28571 IX86_BUILTIN_VPROTD_IMM,
28572 IX86_BUILTIN_VPROTQ_IMM,
28573
28574 IX86_BUILTIN_VPSHLB,
28575 IX86_BUILTIN_VPSHLW,
28576 IX86_BUILTIN_VPSHLD,
28577 IX86_BUILTIN_VPSHLQ,
28578 IX86_BUILTIN_VPSHAB,
28579 IX86_BUILTIN_VPSHAW,
28580 IX86_BUILTIN_VPSHAD,
28581 IX86_BUILTIN_VPSHAQ,
28582
28583 IX86_BUILTIN_VFRCZSS,
28584 IX86_BUILTIN_VFRCZSD,
28585 IX86_BUILTIN_VFRCZPS,
28586 IX86_BUILTIN_VFRCZPD,
28587 IX86_BUILTIN_VFRCZPS256,
28588 IX86_BUILTIN_VFRCZPD256,
28589
28590 IX86_BUILTIN_VPCOMEQUB,
28591 IX86_BUILTIN_VPCOMNEUB,
28592 IX86_BUILTIN_VPCOMLTUB,
28593 IX86_BUILTIN_VPCOMLEUB,
28594 IX86_BUILTIN_VPCOMGTUB,
28595 IX86_BUILTIN_VPCOMGEUB,
28596 IX86_BUILTIN_VPCOMFALSEUB,
28597 IX86_BUILTIN_VPCOMTRUEUB,
28598
28599 IX86_BUILTIN_VPCOMEQUW,
28600 IX86_BUILTIN_VPCOMNEUW,
28601 IX86_BUILTIN_VPCOMLTUW,
28602 IX86_BUILTIN_VPCOMLEUW,
28603 IX86_BUILTIN_VPCOMGTUW,
28604 IX86_BUILTIN_VPCOMGEUW,
28605 IX86_BUILTIN_VPCOMFALSEUW,
28606 IX86_BUILTIN_VPCOMTRUEUW,
28607
28608 IX86_BUILTIN_VPCOMEQUD,
28609 IX86_BUILTIN_VPCOMNEUD,
28610 IX86_BUILTIN_VPCOMLTUD,
28611 IX86_BUILTIN_VPCOMLEUD,
28612 IX86_BUILTIN_VPCOMGTUD,
28613 IX86_BUILTIN_VPCOMGEUD,
28614 IX86_BUILTIN_VPCOMFALSEUD,
28615 IX86_BUILTIN_VPCOMTRUEUD,
28616
28617 IX86_BUILTIN_VPCOMEQUQ,
28618 IX86_BUILTIN_VPCOMNEUQ,
28619 IX86_BUILTIN_VPCOMLTUQ,
28620 IX86_BUILTIN_VPCOMLEUQ,
28621 IX86_BUILTIN_VPCOMGTUQ,
28622 IX86_BUILTIN_VPCOMGEUQ,
28623 IX86_BUILTIN_VPCOMFALSEUQ,
28624 IX86_BUILTIN_VPCOMTRUEUQ,
28625
28626 IX86_BUILTIN_VPCOMEQB,
28627 IX86_BUILTIN_VPCOMNEB,
28628 IX86_BUILTIN_VPCOMLTB,
28629 IX86_BUILTIN_VPCOMLEB,
28630 IX86_BUILTIN_VPCOMGTB,
28631 IX86_BUILTIN_VPCOMGEB,
28632 IX86_BUILTIN_VPCOMFALSEB,
28633 IX86_BUILTIN_VPCOMTRUEB,
28634
28635 IX86_BUILTIN_VPCOMEQW,
28636 IX86_BUILTIN_VPCOMNEW,
28637 IX86_BUILTIN_VPCOMLTW,
28638 IX86_BUILTIN_VPCOMLEW,
28639 IX86_BUILTIN_VPCOMGTW,
28640 IX86_BUILTIN_VPCOMGEW,
28641 IX86_BUILTIN_VPCOMFALSEW,
28642 IX86_BUILTIN_VPCOMTRUEW,
28643
28644 IX86_BUILTIN_VPCOMEQD,
28645 IX86_BUILTIN_VPCOMNED,
28646 IX86_BUILTIN_VPCOMLTD,
28647 IX86_BUILTIN_VPCOMLED,
28648 IX86_BUILTIN_VPCOMGTD,
28649 IX86_BUILTIN_VPCOMGED,
28650 IX86_BUILTIN_VPCOMFALSED,
28651 IX86_BUILTIN_VPCOMTRUED,
28652
28653 IX86_BUILTIN_VPCOMEQQ,
28654 IX86_BUILTIN_VPCOMNEQ,
28655 IX86_BUILTIN_VPCOMLTQ,
28656 IX86_BUILTIN_VPCOMLEQ,
28657 IX86_BUILTIN_VPCOMGTQ,
28658 IX86_BUILTIN_VPCOMGEQ,
28659 IX86_BUILTIN_VPCOMFALSEQ,
28660 IX86_BUILTIN_VPCOMTRUEQ,
28661
28662 /* LWP instructions. */
28663 IX86_BUILTIN_LLWPCB,
28664 IX86_BUILTIN_SLWPCB,
28665 IX86_BUILTIN_LWPVAL32,
28666 IX86_BUILTIN_LWPVAL64,
28667 IX86_BUILTIN_LWPINS32,
28668 IX86_BUILTIN_LWPINS64,
28669
28670 IX86_BUILTIN_CLZS,
28671
28672 /* RTM */
28673 IX86_BUILTIN_XBEGIN,
28674 IX86_BUILTIN_XEND,
28675 IX86_BUILTIN_XABORT,
28676 IX86_BUILTIN_XTEST,
28677
28678 /* BMI instructions. */
28679 IX86_BUILTIN_BEXTR32,
28680 IX86_BUILTIN_BEXTR64,
28681 IX86_BUILTIN_CTZS,
28682
28683 /* TBM instructions. */
28684 IX86_BUILTIN_BEXTRI32,
28685 IX86_BUILTIN_BEXTRI64,
28686
28687 /* BMI2 instructions. */
28688 IX86_BUILTIN_BZHI32,
28689 IX86_BUILTIN_BZHI64,
28690 IX86_BUILTIN_PDEP32,
28691 IX86_BUILTIN_PDEP64,
28692 IX86_BUILTIN_PEXT32,
28693 IX86_BUILTIN_PEXT64,
28694
28695 /* ADX instructions. */
28696 IX86_BUILTIN_ADDCARRYX32,
28697 IX86_BUILTIN_ADDCARRYX64,
28698
28699 /* FSGSBASE instructions. */
28700 IX86_BUILTIN_RDFSBASE32,
28701 IX86_BUILTIN_RDFSBASE64,
28702 IX86_BUILTIN_RDGSBASE32,
28703 IX86_BUILTIN_RDGSBASE64,
28704 IX86_BUILTIN_WRFSBASE32,
28705 IX86_BUILTIN_WRFSBASE64,
28706 IX86_BUILTIN_WRGSBASE32,
28707 IX86_BUILTIN_WRGSBASE64,
28708
28709 /* RDRND instructions. */
28710 IX86_BUILTIN_RDRAND16_STEP,
28711 IX86_BUILTIN_RDRAND32_STEP,
28712 IX86_BUILTIN_RDRAND64_STEP,
28713
28714 /* RDSEED instructions. */
28715 IX86_BUILTIN_RDSEED16_STEP,
28716 IX86_BUILTIN_RDSEED32_STEP,
28717 IX86_BUILTIN_RDSEED64_STEP,
28718
28719 /* F16C instructions. */
28720 IX86_BUILTIN_CVTPH2PS,
28721 IX86_BUILTIN_CVTPH2PS256,
28722 IX86_BUILTIN_CVTPS2PH,
28723 IX86_BUILTIN_CVTPS2PH256,
28724
28725 /* CFString built-in for darwin */
28726 IX86_BUILTIN_CFSTRING,
28727
28728 /* Builtins to get CPU type and supported features. */
28729 IX86_BUILTIN_CPU_INIT,
28730 IX86_BUILTIN_CPU_IS,
28731 IX86_BUILTIN_CPU_SUPPORTS,
28732
28733 /* Read/write FLAGS register built-ins. */
28734 IX86_BUILTIN_READ_FLAGS,
28735 IX86_BUILTIN_WRITE_FLAGS,
28736
28737 IX86_BUILTIN_MAX
28738 };
28739
28740 /* Table for the ix86 builtin decls. */
28741 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
28742
28743 /* Table of all of the builtin functions that are possible with different ISA's
28744 but are waiting to be built until a function is declared to use that
28745 ISA. */
28746 struct builtin_isa {
28747 const char *name; /* function name */
28748 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
28749 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
28750 bool const_p; /* true if the declaration is constant */
28751 bool set_and_not_built_p;
28752 };
28753
28754 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
28755
28756
28757 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
28758 of which isa_flags to use in the ix86_builtins_isa array. Stores the
28759 function decl in the ix86_builtins array. Returns the function decl or
28760 NULL_TREE, if the builtin was not added.
28761
28762 If the front end has a special hook for builtin functions, delay adding
28763 builtin functions that aren't in the current ISA until the ISA is changed
28764 with function specific optimization. Doing so, can save about 300K for the
28765 default compiler. When the builtin is expanded, check at that time whether
28766 it is valid.
28767
28768 If the front end doesn't have a special hook, record all builtins, even if
28769 it isn't an instruction set in the current ISA in case the user uses
28770 function specific options for a different ISA, so that we don't get scope
28771 errors if a builtin is added in the middle of a function scope. */
28772
28773 static inline tree
28774 def_builtin (HOST_WIDE_INT mask, const char *name,
28775 enum ix86_builtin_func_type tcode,
28776 enum ix86_builtins code)
28777 {
28778 tree decl = NULL_TREE;
28779
28780 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
28781 {
28782 ix86_builtins_isa[(int) code].isa = mask;
28783
28784 mask &= ~OPTION_MASK_ISA_64BIT;
28785 if (mask == 0
28786 || (mask & ix86_isa_flags) != 0
28787 || (lang_hooks.builtin_function
28788 == lang_hooks.builtin_function_ext_scope))
28789
28790 {
28791 tree type = ix86_get_builtin_func_type (tcode);
28792 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
28793 NULL, NULL_TREE);
28794 ix86_builtins[(int) code] = decl;
28795 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
28796 }
28797 else
28798 {
28799 ix86_builtins[(int) code] = NULL_TREE;
28800 ix86_builtins_isa[(int) code].tcode = tcode;
28801 ix86_builtins_isa[(int) code].name = name;
28802 ix86_builtins_isa[(int) code].const_p = false;
28803 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
28804 }
28805 }
28806
28807 return decl;
28808 }
28809
28810 /* Like def_builtin, but also marks the function decl "const". */
28811
28812 static inline tree
28813 def_builtin_const (HOST_WIDE_INT mask, const char *name,
28814 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
28815 {
28816 tree decl = def_builtin (mask, name, tcode, code);
28817 if (decl)
28818 TREE_READONLY (decl) = 1;
28819 else
28820 ix86_builtins_isa[(int) code].const_p = true;
28821
28822 return decl;
28823 }
28824
28825 /* Add any new builtin functions for a given ISA that may not have been
28826 declared. This saves a bit of space compared to adding all of the
28827 declarations to the tree, even if we didn't use them. */
28828
28829 static void
28830 ix86_add_new_builtins (HOST_WIDE_INT isa)
28831 {
28832 int i;
28833
28834 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
28835 {
28836 if ((ix86_builtins_isa[i].isa & isa) != 0
28837 && ix86_builtins_isa[i].set_and_not_built_p)
28838 {
28839 tree decl, type;
28840
28841 /* Don't define the builtin again. */
28842 ix86_builtins_isa[i].set_and_not_built_p = false;
28843
28844 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
28845 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
28846 type, i, BUILT_IN_MD, NULL,
28847 NULL_TREE);
28848
28849 ix86_builtins[i] = decl;
28850 if (ix86_builtins_isa[i].const_p)
28851 TREE_READONLY (decl) = 1;
28852 }
28853 }
28854 }
28855
28856 /* Bits for builtin_description.flag. */
28857
28858 /* Set when we don't support the comparison natively, and should
28859 swap_comparison in order to support it. */
28860 #define BUILTIN_DESC_SWAP_OPERANDS 1
28861
28862 struct builtin_description
28863 {
28864 const HOST_WIDE_INT mask;
28865 const enum insn_code icode;
28866 const char *const name;
28867 const enum ix86_builtins code;
28868 const enum rtx_code comparison;
28869 const int flag;
28870 };
28871
28872 static const struct builtin_description bdesc_comi[] =
28873 {
28874 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
28875 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
28876 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
28877 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
28878 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
28879 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
28880 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
28881 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
28882 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
28883 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
28884 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
28885 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
28886 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
28887 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
28888 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
28889 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
28890 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
28891 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
28892 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
28893 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
28894 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
28895 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
28896 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
28897 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
28898 };
28899
28900 static const struct builtin_description bdesc_pcmpestr[] =
28901 {
28902 /* SSE4.2 */
28903 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
28904 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
28905 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
28906 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
28907 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
28908 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
28909 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
28910 };
28911
28912 static const struct builtin_description bdesc_pcmpistr[] =
28913 {
28914 /* SSE4.2 */
28915 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
28916 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
28917 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
28918 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
28919 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
28920 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
28921 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
28922 };
28923
28924 /* Special builtins with variable number of arguments. */
28925 static const struct builtin_description bdesc_special_args[] =
28926 {
28927 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
28928 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
28929 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
28930
28931 /* 80387 (for use internally for atomic compound assignment). */
28932 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
28933 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
28934 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) VOID_FTYPE_PUSHORT },
28935 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
28936
28937 /* MMX */
28938 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
28939
28940 /* 3DNow! */
28941 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
28942
28943 /* FXSR, XSAVE and XSAVEOPT */
28944 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
28945 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
28946 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28947 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28948 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28949
28950 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
28951 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
28952 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28953 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28954 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28955
28956 /* SSE */
28957 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
28958 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
28959 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
28960
28961 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
28962 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
28963 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
28964 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
28965
28966 /* SSE or 3DNow!A */
28967 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
28968 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
28969
28970 /* SSE2 */
28971 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
28972 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
28973 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
28974 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
28975 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
28976 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
28977 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
28978 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
28979 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
28980 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
28981
28982 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
28983 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
28984
28985 /* SSE3 */
28986 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
28987
28988 /* SSE4.1 */
28989 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
28990
28991 /* SSE4A */
28992 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
28993 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
28994
28995 /* AVX */
28996 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
28997 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
28998
28999 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
29000 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29001 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29002 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
29003 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
29004
29005 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29006 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29007 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29008 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29009 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29010 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
29011 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29012
29013 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
29014 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29015 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29016
29017 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
29018 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
29019 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
29020 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
29021 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
29022 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
29023 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
29024 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
29025
29026 /* AVX2 */
29027 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
29028 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
29029 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
29030 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
29031 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
29032 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
29033 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
29034 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
29035 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
29036
29037 /* AVX512F */
29038 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29039 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29040 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29042 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29044 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29046 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29047 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29048 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29049 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29050 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29051 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29052 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29053 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29054 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29055 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29056 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29057 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29058 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
29059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
29060 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
29061 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
29062 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29063 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29064 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29065 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29066 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29067 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29068 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29069 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29070 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29071 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29072 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29073 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29074 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29075 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29076 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29077 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29078 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29079 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29080 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29081 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29082 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29083 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29084 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29085
29086 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
29087 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
29088 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
29089 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
29090 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
29091 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
29092
29093 /* FSGSBASE */
29094 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29095 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29096 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29097 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29098 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29099 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29100 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29101 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29102
29103 /* RTM */
29104 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29105 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
29106 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
29107 };
29108
29109 /* Builtins with variable number of arguments. */
29110 static const struct builtin_description bdesc_args[] =
29111 {
29112 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
29113 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
29114 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
29115 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29116 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29117 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29118 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29119
29120 /* MMX */
29121 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29122 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29123 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29124 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29125 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29126 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29127
29128 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29129 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29130 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29131 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29132 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29133 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29134 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29135 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29136
29137 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29138 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29139
29140 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29141 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29142 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29143 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29144
29145 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29146 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29147 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29148 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29149 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29150 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29151
29152 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29153 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29154 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29155 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29156 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
29157 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
29158
29159 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29160 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
29161 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29162
29163 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
29164
29165 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29166 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29167 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29168 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29169 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29170 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29171
29172 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29173 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29174 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29175 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29176 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29177 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29178
29179 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29180 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29181 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29182 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29183
29184 /* 3DNow! */
29185 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29186 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29187 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29188 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29189
29190 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29191 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29192 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29193 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29194 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29195 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29196 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29197 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29198 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29199 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29200 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29201 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29202 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29203 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29204 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29205
29206 /* 3DNow!A */
29207 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29208 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29209 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29210 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29211 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29212 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29213
29214 /* SSE */
29215 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
29216 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29217 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29218 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29219 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29220 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29221 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29222 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29223 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29224 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29225 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29226 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29227
29228 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29229
29230 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29231 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29232 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29233 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29234 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29235 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29236 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29237 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29238
29239 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29240 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29241 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29242 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29243 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29244 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29245 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29246 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29247 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29248 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29249 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
29250 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29251 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29252 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29253 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29254 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29255 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29256 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29257 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29258 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29259
29260 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29261 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29262 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29263 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29264
29265 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29266 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29267 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29268 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29269
29270 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29271
29272 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29273 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29274 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29275 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29276 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29277
29278 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
29279 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
29280 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
29281
29282 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
29283
29284 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29285 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29286 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29287
29288 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
29289 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
29290
29291 /* SSE MMX or 3Dnow!A */
29292 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29293 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29294 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29295
29296 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29297 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29298 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29299 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29300
29301 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
29302 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
29303
29304 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
29305
29306 /* SSE2 */
29307 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29308
29309 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
29310 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
29311 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29312 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
29313 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
29314
29315 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29316 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29317 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
29318 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29319 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29320
29321 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
29322
29323 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29324 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29325 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29326 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29327
29328 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29329 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
29330 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29331
29332 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29333 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29334 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29335 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29336 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29337 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29338 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29339 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29340
29341 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29342 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29343 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29344 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29345 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
29346 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29347 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29348 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29349 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29350 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29351 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29352 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29353 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29354 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29355 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29356 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29357 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29358 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29359 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29360 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29361
29362 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29363 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29364 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29365 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29366
29367 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29368 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29369 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29370 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29371
29372 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29373
29374 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29375 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29376 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29377
29378 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29379
29380 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29381 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29382 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29383 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29384 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29385 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29386 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29387 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29388
29389 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29390 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29391 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29392 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29393 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29394 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29395 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29396 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29397
29398 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29399 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
29400
29401 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29402 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29403 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29404 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29405
29406 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29407 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29408
29409 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29410 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29411 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29412 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29413 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29414 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29415
29416 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29417 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29418 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29419 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29420
29421 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29422 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29423 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29424 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29425 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29426 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29427 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29428 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29429
29430 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29431 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29432 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29433
29434 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29435 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
29436
29437 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
29438 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29439
29440 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
29441
29442 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
29443 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
29444 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
29445 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
29446
29447 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29448 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29449 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29450 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29451 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29452 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29453 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29454
29455 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29456 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29457 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29458 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29459 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29460 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29461 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29462
29463 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29464 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29465 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29466 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29467
29468 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
29469 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29470 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29471
29472 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
29473
29474 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29475
29476 /* SSE2 MMX */
29477 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29478 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29479
29480 /* SSE3 */
29481 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
29482 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29483
29484 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29485 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29486 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29487 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29488 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29489 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29490
29491 /* SSSE3 */
29492 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29493 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
29494 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29495 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
29496 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29497 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29498
29499 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29500 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29501 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29502 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29503 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29504 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29505 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29506 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29507 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29508 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29509 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29510 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29511 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
29512 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
29513 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29514 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29515 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29516 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29517 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29518 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29519 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29520 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29521 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29522 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29523
29524 /* SSSE3. */
29525 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
29526 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
29527
29528 /* SSE4.1 */
29529 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29530 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29531 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
29532 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
29533 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29534 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29535 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29536 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
29537 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
29538 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
29539
29540 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29541 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29542 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29543 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29544 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29545 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29546 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29547 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29548 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29549 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29550 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29551 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29552 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29553
29554 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29555 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29556 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29557 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29558 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29559 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29560 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29561 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29562 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29563 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29564 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29565 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29566
29567 /* SSE4.1 */
29568 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29569 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29570 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29571 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29572
29573 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
29574 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
29575 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
29576 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
29577
29578 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29579 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29580
29581 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29582 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29583
29584 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
29585 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
29586 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
29587 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
29588
29589 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
29590 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
29591
29592 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29593 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29594
29595 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29596 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29597 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29598
29599 /* SSE4.2 */
29600 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29601 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
29602 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
29603 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29604 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29605
29606 /* SSE4A */
29607 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
29608 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
29609 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
29610 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29611
29612 /* AES */
29613 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
29614 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29615
29616 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29617 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29618 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29619 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29620
29621 /* PCLMUL */
29622 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
29623
29624 /* AVX */
29625 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29626 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29627 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29628 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29629 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29630 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29631 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29632 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29633 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29634 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29635 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29636 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29637 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29638 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29639 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29640 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29641 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29642 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29643 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29644 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29645 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29646 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29647 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29648 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29649 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29650 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29651
29652 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
29653 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
29654 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
29655 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29656
29657 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29658 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29659 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
29660 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
29661 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29662 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29663 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29664 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29665 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29666 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29667 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29668 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29669 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29670 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
29671 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
29672 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
29673 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
29674 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
29675 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
29676 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29677 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
29678 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29679 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29680 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29681 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29682 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29683 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29684 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29685 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29686 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29687 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29688 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
29689 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
29690 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
29691
29692 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29693 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29694 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29695
29696 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29697 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29698 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29699 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29700 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29701
29702 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29703
29704 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29705 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29706
29707 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
29708 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
29709 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
29710 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
29711
29712 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29713 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29714
29715 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29716 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29717
29718 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
29719 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
29720 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
29721 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
29722
29723 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
29724 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
29725
29726 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29727 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29728
29729 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29730 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29731 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29732 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29733
29734 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29735 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29736 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29737 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
29738 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
29739 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
29740
29741 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29742 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29743 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29744 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29745 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29746 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29747 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29748 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29749 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29750 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29751 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29752 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29753 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29754 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29755 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29756
29757 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
29758 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
29759
29760 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29761 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29762
29763 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29764
29765 /* AVX2 */
29766 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
29767 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
29768 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
29769 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
29770 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29771 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29772 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29773 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29774 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29775 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29776 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29777 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29778 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29779 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29780 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29781 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29782 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
29783 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29784 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29785 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29786 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29787 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
29788 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
29789 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29790 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29791 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29792 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29793 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29794 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29795 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29796 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29797 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29798 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29799 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29800 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29801 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29802 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29803 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29804 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
29805 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29806 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29807 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29808 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29809 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29810 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29811 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29812 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29813 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29814 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29815 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29816 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29817 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
29818 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29819 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29820 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29821 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29822 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29823 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29824 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29825 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29826 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29827 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29828 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29829 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29830 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29831 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29832 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29833 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29834 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29835 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29836 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29837 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29838 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29839 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29840 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
29841 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29842 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29843 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29844 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29845 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29846 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29847 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29848 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29849 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29850 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29851 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29852 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29853 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29854 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29855 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29856 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29857 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29858 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29859 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29860 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29861 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29862 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29863 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29864 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29865 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29866 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29867 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29868 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29869 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29870 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29871 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29872 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29873 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29874 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29875 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29876 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29877 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29878 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29879 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29880 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29881 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29882 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29883 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29884 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
29885 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
29886 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29887 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
29888 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
29889 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29890 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
29891 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29892 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29893 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29894 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29895 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29896 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29897 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29898 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
29899 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
29900 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
29901 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
29902 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29903 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29904 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29905 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29906 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29907 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29908 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29909 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29910 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29911 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29912
29913 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
29914
29915 /* BMI */
29916 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29917 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29918 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
29919
29920 /* TBM */
29921 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29922 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29923
29924 /* F16C */
29925 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
29926 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
29927 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
29928 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
29929
29930 /* BMI2 */
29931 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29932 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29933 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29934 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29935 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29936 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29937
29938 /* AVX512F */
29939 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
29940 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
29941 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
29942 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29943 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29944 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
29945 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
29946 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
29947 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
29948 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
29949 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
29950 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
29951 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
29952 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
29953 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29954 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29955 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
29956 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
29957 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
29958 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
29959 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29960 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29961 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29962 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29963 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
29964 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
29965 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
29966 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
29967 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
29968 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
29969 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
29970 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
29971 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29972 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29973 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29974 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
29975 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
29976 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29977 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29978 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
29979 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
29980 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29981 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
29982 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29983 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
29984 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
29985 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
29986 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
29987 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
29988 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
29989 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
29990 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
29991 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
29992 { OPTION_MASK_ISA_AVX512F & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_memv8di_mask, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
29993 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
29994 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
29995 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
29996 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
29997 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
29998 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
29999 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30000 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30001 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30002 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30003 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30004 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30005 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30006 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30007 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30008 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30009 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30010 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30011 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30012 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30013 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30014 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30015 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30016 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30017 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30018 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30019 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30020 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30021 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30022 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30023 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30024 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30025 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30026 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30027 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30028 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30029 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30030 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30031 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30032 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30033 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30034 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30035 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30036 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30037 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30038 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30039 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30040 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30042 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30044 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30046 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30047 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30048 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30049 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30050 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30051 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30052 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30053 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30054 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30055 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30056 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30057 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30058 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30060 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30061 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30062 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30063 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30064 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30065 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30066 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30067 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30068 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30069 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30070 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30071 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30072 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30073 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30074 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30075 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30076 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30077 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30078 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30079 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30080 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30081 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30082 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30083 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30084 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30085 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30086 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30087 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30088 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30089 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30090 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30091 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30092 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
30093 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30094 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30095 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30096 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30097 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30098 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30100 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30101 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30102 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30103 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
30112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30130
30131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
30132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
30133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
30134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30135 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
30137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30139
30140 /* Mask arithmetic operations */
30141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
30144 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30145 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30146 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30147 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
30148 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30149 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30150 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
30151
30152 /* SHA */
30153 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30154 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30155 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30156 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
30157 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30158 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30159 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
30160 };
30161
30162 /* Builtins with rounding support. */
30163 static const struct builtin_description bdesc_round_args[] =
30164 {
30165 /* AVX512F */
30166 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30167 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30168 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30169 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30170 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
30171 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
30172 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
30173 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
30174 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
30175 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
30176 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30177 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30178 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
30179 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv8dfv8si_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30180 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
30181 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30182 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
30183 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30184 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
30185 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
30186 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
30187 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
30188 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
30189 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30190 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30191 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30192 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30193 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30194 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
30195 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
30196 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
30197 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30198 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30199 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30200 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30201 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30202 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30203 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30204 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30205 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30206 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30207 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30208 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30209 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30210 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30211 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30212 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30213 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30214 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30215 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30216 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30217 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30218 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30219 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30220 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30221 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30222 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30223 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30224 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30225 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30226 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30227 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30228 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30229 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30230 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30231 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30232 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30233 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30234 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30235 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30236 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30237 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30238 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30239 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30240 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30241 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30242 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30243 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30244 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30245 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30246 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30247 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30248 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30249 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30250 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30251 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30252 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30253 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30254 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30255 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30256 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30257 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30258 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30259 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30260 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30261 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30262 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30263 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30264 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30265 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30266 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30267 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
30268 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
30269 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30270 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30271 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30272 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30273 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30274 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30275 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30276 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30277 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30278 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30279 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30280 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30281 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30282 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30283 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30284 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30285
30286 /* AVX512ER */
30287 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30288 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30289 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30290 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30291 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30292 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30293 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30294 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30295 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30296 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30297 };
30298
30299 /* FMA4 and XOP. */
30300 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
30301 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
30302 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
30303 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
30304 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
30305 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
30306 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
30307 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
30308 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
30309 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
30310 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
30311 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
30312 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
30313 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
30314 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
30315 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
30316 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
30317 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
30318 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
30319 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
30320 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
30321 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
30322 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
30323 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
30324 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
30325 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
30326 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
30327 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
30328 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
30329 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
30330 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
30331 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
30332 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
30333 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
30334 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
30335 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
30336 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
30337 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
30338 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
30339 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
30340 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
30341 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
30342 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
30343 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
30344 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
30345 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
30346 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
30347 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
30348 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
30349 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
30350 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
30351 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
30352
30353 static const struct builtin_description bdesc_multi_arg[] =
30354 {
30355 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
30356 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
30357 UNKNOWN, (int)MULTI_ARG_3_SF },
30358 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
30359 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
30360 UNKNOWN, (int)MULTI_ARG_3_DF },
30361
30362 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
30363 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
30364 UNKNOWN, (int)MULTI_ARG_3_SF },
30365 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
30366 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
30367 UNKNOWN, (int)MULTI_ARG_3_DF },
30368
30369 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
30370 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
30371 UNKNOWN, (int)MULTI_ARG_3_SF },
30372 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
30373 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
30374 UNKNOWN, (int)MULTI_ARG_3_DF },
30375 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
30376 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
30377 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30378 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
30379 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
30380 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30381
30382 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
30383 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
30384 UNKNOWN, (int)MULTI_ARG_3_SF },
30385 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
30386 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
30387 UNKNOWN, (int)MULTI_ARG_3_DF },
30388 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
30389 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
30390 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30391 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
30392 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
30393 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30394
30395 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
30396 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
30397 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
30398 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
30399 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
30400 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
30401 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
30402
30403 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30404 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30405 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
30406 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
30407 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
30408 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
30409 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
30410
30411 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
30412
30413 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30414 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30415 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30416 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30417 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30418 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30419 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30420 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30421 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30422 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30423 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30424 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30425
30426 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30427 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
30428 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
30429 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
30430 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
30431 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
30432 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
30433 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
30434 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30435 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
30436 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
30437 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
30438 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30439 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
30440 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
30441 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
30442
30443 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
30444 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
30445 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
30446 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
30447 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
30448 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
30449
30450 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30451 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30452 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30453 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30454 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30455 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30456 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30457 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30458 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30459 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30460 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30461 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30462 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30463 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30464 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30465
30466 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
30467 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30468 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30469 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
30470 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
30471 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
30472 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
30473
30474 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
30475 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30476 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30477 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
30478 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
30479 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
30480 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
30481
30482 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
30483 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30484 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30485 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
30486 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
30487 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
30488 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
30489
30490 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30491 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30492 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30493 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
30494 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
30495 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
30496 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
30497
30498 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
30499 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30500 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30501 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
30502 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
30503 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
30504 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
30505
30506 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
30507 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30508 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30509 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
30510 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
30511 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
30512 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
30513
30514 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
30515 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30516 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30517 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
30518 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
30519 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
30520 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
30521
30522 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30523 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30524 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30525 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
30526 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
30527 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
30528 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
30529
30530 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30531 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30532 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30533 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30534 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30535 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30536 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30537 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30538
30539 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30540 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30541 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30542 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30543 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30544 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30545 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30546 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30547
30548 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
30549 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
30550 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
30551 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
30552
30553 };
30554 \f
30555 /* TM vector builtins. */
30556
30557 /* Reuse the existing x86-specific `struct builtin_description' cause
30558 we're lazy. Add casts to make them fit. */
30559 static const struct builtin_description bdesc_tm[] =
30560 {
30561 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30562 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30563 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30564 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30565 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30566 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30567 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30568
30569 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30570 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30571 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30572 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30573 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30574 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30575 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30576
30577 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30578 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30579 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30580 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30581 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30582 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30583 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30584
30585 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
30586 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
30587 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
30588 };
30589
30590 /* TM callbacks. */
30591
30592 /* Return the builtin decl needed to load a vector of TYPE. */
30593
30594 static tree
30595 ix86_builtin_tm_load (tree type)
30596 {
30597 if (TREE_CODE (type) == VECTOR_TYPE)
30598 {
30599 switch (tree_to_uhwi (TYPE_SIZE (type)))
30600 {
30601 case 64:
30602 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
30603 case 128:
30604 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
30605 case 256:
30606 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
30607 }
30608 }
30609 return NULL_TREE;
30610 }
30611
30612 /* Return the builtin decl needed to store a vector of TYPE. */
30613
30614 static tree
30615 ix86_builtin_tm_store (tree type)
30616 {
30617 if (TREE_CODE (type) == VECTOR_TYPE)
30618 {
30619 switch (tree_to_uhwi (TYPE_SIZE (type)))
30620 {
30621 case 64:
30622 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
30623 case 128:
30624 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
30625 case 256:
30626 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
30627 }
30628 }
30629 return NULL_TREE;
30630 }
30631 \f
30632 /* Initialize the transactional memory vector load/store builtins. */
30633
30634 static void
30635 ix86_init_tm_builtins (void)
30636 {
30637 enum ix86_builtin_func_type ftype;
30638 const struct builtin_description *d;
30639 size_t i;
30640 tree decl;
30641 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
30642 tree attrs_log, attrs_type_log;
30643
30644 if (!flag_tm)
30645 return;
30646
30647 /* If there are no builtins defined, we must be compiling in a
30648 language without trans-mem support. */
30649 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
30650 return;
30651
30652 /* Use whatever attributes a normal TM load has. */
30653 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
30654 attrs_load = DECL_ATTRIBUTES (decl);
30655 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30656 /* Use whatever attributes a normal TM store has. */
30657 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
30658 attrs_store = DECL_ATTRIBUTES (decl);
30659 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30660 /* Use whatever attributes a normal TM log has. */
30661 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
30662 attrs_log = DECL_ATTRIBUTES (decl);
30663 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30664
30665 for (i = 0, d = bdesc_tm;
30666 i < ARRAY_SIZE (bdesc_tm);
30667 i++, d++)
30668 {
30669 if ((d->mask & ix86_isa_flags) != 0
30670 || (lang_hooks.builtin_function
30671 == lang_hooks.builtin_function_ext_scope))
30672 {
30673 tree type, attrs, attrs_type;
30674 enum built_in_function code = (enum built_in_function) d->code;
30675
30676 ftype = (enum ix86_builtin_func_type) d->flag;
30677 type = ix86_get_builtin_func_type (ftype);
30678
30679 if (BUILTIN_TM_LOAD_P (code))
30680 {
30681 attrs = attrs_load;
30682 attrs_type = attrs_type_load;
30683 }
30684 else if (BUILTIN_TM_STORE_P (code))
30685 {
30686 attrs = attrs_store;
30687 attrs_type = attrs_type_store;
30688 }
30689 else
30690 {
30691 attrs = attrs_log;
30692 attrs_type = attrs_type_log;
30693 }
30694 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
30695 /* The builtin without the prefix for
30696 calling it directly. */
30697 d->name + strlen ("__builtin_"),
30698 attrs);
30699 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
30700 set the TYPE_ATTRIBUTES. */
30701 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
30702
30703 set_builtin_decl (code, decl, false);
30704 }
30705 }
30706 }
30707
30708 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
30709 in the current target ISA to allow the user to compile particular modules
30710 with different target specific options that differ from the command line
30711 options. */
30712 static void
30713 ix86_init_mmx_sse_builtins (void)
30714 {
30715 const struct builtin_description * d;
30716 enum ix86_builtin_func_type ftype;
30717 size_t i;
30718
30719 /* Add all special builtins with variable number of operands. */
30720 for (i = 0, d = bdesc_special_args;
30721 i < ARRAY_SIZE (bdesc_special_args);
30722 i++, d++)
30723 {
30724 if (d->name == 0)
30725 continue;
30726
30727 ftype = (enum ix86_builtin_func_type) d->flag;
30728 def_builtin (d->mask, d->name, ftype, d->code);
30729 }
30730
30731 /* Add all builtins with variable number of operands. */
30732 for (i = 0, d = bdesc_args;
30733 i < ARRAY_SIZE (bdesc_args);
30734 i++, d++)
30735 {
30736 if (d->name == 0)
30737 continue;
30738
30739 ftype = (enum ix86_builtin_func_type) d->flag;
30740 def_builtin_const (d->mask, d->name, ftype, d->code);
30741 }
30742
30743 /* Add all builtins with rounding. */
30744 for (i = 0, d = bdesc_round_args;
30745 i < ARRAY_SIZE (bdesc_round_args);
30746 i++, d++)
30747 {
30748 if (d->name == 0)
30749 continue;
30750
30751 ftype = (enum ix86_builtin_func_type) d->flag;
30752 def_builtin_const (d->mask, d->name, ftype, d->code);
30753 }
30754
30755 /* pcmpestr[im] insns. */
30756 for (i = 0, d = bdesc_pcmpestr;
30757 i < ARRAY_SIZE (bdesc_pcmpestr);
30758 i++, d++)
30759 {
30760 if (d->code == IX86_BUILTIN_PCMPESTRM128)
30761 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
30762 else
30763 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
30764 def_builtin_const (d->mask, d->name, ftype, d->code);
30765 }
30766
30767 /* pcmpistr[im] insns. */
30768 for (i = 0, d = bdesc_pcmpistr;
30769 i < ARRAY_SIZE (bdesc_pcmpistr);
30770 i++, d++)
30771 {
30772 if (d->code == IX86_BUILTIN_PCMPISTRM128)
30773 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
30774 else
30775 ftype = INT_FTYPE_V16QI_V16QI_INT;
30776 def_builtin_const (d->mask, d->name, ftype, d->code);
30777 }
30778
30779 /* comi/ucomi insns. */
30780 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
30781 {
30782 if (d->mask == OPTION_MASK_ISA_SSE2)
30783 ftype = INT_FTYPE_V2DF_V2DF;
30784 else
30785 ftype = INT_FTYPE_V4SF_V4SF;
30786 def_builtin_const (d->mask, d->name, ftype, d->code);
30787 }
30788
30789 /* SSE */
30790 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
30791 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
30792 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
30793 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
30794
30795 /* SSE or 3DNow!A */
30796 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
30797 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
30798 IX86_BUILTIN_MASKMOVQ);
30799
30800 /* SSE2 */
30801 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
30802 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
30803
30804 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
30805 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
30806 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
30807 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
30808
30809 /* SSE3. */
30810 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
30811 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
30812 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
30813 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
30814
30815 /* AES */
30816 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
30817 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
30818 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
30819 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
30820 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
30821 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
30822 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
30823 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
30824 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
30825 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
30826 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
30827 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
30828
30829 /* PCLMUL */
30830 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
30831 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
30832
30833 /* RDRND */
30834 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
30835 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
30836 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
30837 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
30838 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
30839 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
30840 IX86_BUILTIN_RDRAND64_STEP);
30841
30842 /* AVX2 */
30843 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
30844 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
30845 IX86_BUILTIN_GATHERSIV2DF);
30846
30847 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
30848 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
30849 IX86_BUILTIN_GATHERSIV4DF);
30850
30851 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
30852 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
30853 IX86_BUILTIN_GATHERDIV2DF);
30854
30855 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
30856 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
30857 IX86_BUILTIN_GATHERDIV4DF);
30858
30859 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
30860 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
30861 IX86_BUILTIN_GATHERSIV4SF);
30862
30863 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
30864 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
30865 IX86_BUILTIN_GATHERSIV8SF);
30866
30867 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
30868 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
30869 IX86_BUILTIN_GATHERDIV4SF);
30870
30871 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
30872 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
30873 IX86_BUILTIN_GATHERDIV8SF);
30874
30875 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
30876 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
30877 IX86_BUILTIN_GATHERSIV2DI);
30878
30879 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
30880 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
30881 IX86_BUILTIN_GATHERSIV4DI);
30882
30883 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
30884 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
30885 IX86_BUILTIN_GATHERDIV2DI);
30886
30887 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
30888 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
30889 IX86_BUILTIN_GATHERDIV4DI);
30890
30891 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
30892 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
30893 IX86_BUILTIN_GATHERSIV4SI);
30894
30895 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
30896 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
30897 IX86_BUILTIN_GATHERSIV8SI);
30898
30899 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
30900 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
30901 IX86_BUILTIN_GATHERDIV4SI);
30902
30903 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
30904 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
30905 IX86_BUILTIN_GATHERDIV8SI);
30906
30907 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
30908 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
30909 IX86_BUILTIN_GATHERALTSIV4DF);
30910
30911 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
30912 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
30913 IX86_BUILTIN_GATHERALTDIV8SF);
30914
30915 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
30916 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
30917 IX86_BUILTIN_GATHERALTSIV4DI);
30918
30919 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
30920 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
30921 IX86_BUILTIN_GATHERALTDIV8SI);
30922
30923 /* AVX512F */
30924 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
30925 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
30926 IX86_BUILTIN_GATHER3SIV16SF);
30927
30928 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
30929 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
30930 IX86_BUILTIN_GATHER3SIV8DF);
30931
30932 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
30933 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
30934 IX86_BUILTIN_GATHER3DIV16SF);
30935
30936 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
30937 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
30938 IX86_BUILTIN_GATHER3DIV8DF);
30939
30940 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
30941 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
30942 IX86_BUILTIN_GATHER3SIV16SI);
30943
30944 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
30945 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
30946 IX86_BUILTIN_GATHER3SIV8DI);
30947
30948 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
30949 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
30950 IX86_BUILTIN_GATHER3DIV16SI);
30951
30952 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
30953 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
30954 IX86_BUILTIN_GATHER3DIV8DI);
30955
30956 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
30957 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
30958 IX86_BUILTIN_GATHER3ALTSIV8DF);
30959
30960 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
30961 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
30962 IX86_BUILTIN_GATHER3ALTDIV16SF);
30963
30964 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
30965 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
30966 IX86_BUILTIN_GATHER3ALTSIV8DI);
30967
30968 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
30969 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
30970 IX86_BUILTIN_GATHER3ALTDIV16SI);
30971
30972 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
30973 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
30974 IX86_BUILTIN_SCATTERSIV16SF);
30975
30976 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
30977 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
30978 IX86_BUILTIN_SCATTERSIV8DF);
30979
30980 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
30981 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
30982 IX86_BUILTIN_SCATTERDIV16SF);
30983
30984 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
30985 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
30986 IX86_BUILTIN_SCATTERDIV8DF);
30987
30988 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
30989 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
30990 IX86_BUILTIN_SCATTERSIV16SI);
30991
30992 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
30993 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
30994 IX86_BUILTIN_SCATTERSIV8DI);
30995
30996 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
30997 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
30998 IX86_BUILTIN_SCATTERDIV16SI);
30999
31000 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
31001 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
31002 IX86_BUILTIN_SCATTERDIV8DI);
31003
31004 /* AVX512PF */
31005 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
31006 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31007 IX86_BUILTIN_GATHERPFDPD);
31008 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
31009 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31010 IX86_BUILTIN_GATHERPFDPS);
31011 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
31012 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31013 IX86_BUILTIN_GATHERPFQPD);
31014 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
31015 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31016 IX86_BUILTIN_GATHERPFQPS);
31017 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
31018 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31019 IX86_BUILTIN_SCATTERPFDPD);
31020 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
31021 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31022 IX86_BUILTIN_SCATTERPFDPS);
31023 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
31024 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31025 IX86_BUILTIN_SCATTERPFQPD);
31026 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
31027 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31028 IX86_BUILTIN_SCATTERPFQPS);
31029
31030 /* SHA */
31031 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
31032 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
31033 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
31034 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
31035 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
31036 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
31037 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
31038 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
31039 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
31040 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
31041 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
31042 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
31043 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
31044 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
31045
31046 /* RTM. */
31047 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
31048 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
31049
31050 /* MMX access to the vec_init patterns. */
31051 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
31052 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
31053
31054 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
31055 V4HI_FTYPE_HI_HI_HI_HI,
31056 IX86_BUILTIN_VEC_INIT_V4HI);
31057
31058 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
31059 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
31060 IX86_BUILTIN_VEC_INIT_V8QI);
31061
31062 /* Access to the vec_extract patterns. */
31063 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
31064 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
31065 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
31066 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
31067 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
31068 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
31069 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
31070 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
31071 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
31072 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
31073
31074 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31075 "__builtin_ia32_vec_ext_v4hi",
31076 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
31077
31078 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
31079 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
31080
31081 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
31082 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
31083
31084 /* Access to the vec_set patterns. */
31085 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
31086 "__builtin_ia32_vec_set_v2di",
31087 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
31088
31089 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
31090 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
31091
31092 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
31093 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
31094
31095 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
31096 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
31097
31098 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31099 "__builtin_ia32_vec_set_v4hi",
31100 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
31101
31102 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
31103 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
31104
31105 /* RDSEED */
31106 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
31107 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
31108 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
31109 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
31110 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
31111 "__builtin_ia32_rdseed_di_step",
31112 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
31113
31114 /* ADCX */
31115 def_builtin (0, "__builtin_ia32_addcarryx_u32",
31116 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
31117 def_builtin (OPTION_MASK_ISA_64BIT,
31118 "__builtin_ia32_addcarryx_u64",
31119 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
31120 IX86_BUILTIN_ADDCARRYX64);
31121
31122 /* Read/write FLAGS. */
31123 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
31124 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31125 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
31126 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31127 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
31128 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
31129 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
31130 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
31131
31132
31133 /* Add FMA4 multi-arg argument instructions */
31134 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
31135 {
31136 if (d->name == 0)
31137 continue;
31138
31139 ftype = (enum ix86_builtin_func_type) d->flag;
31140 def_builtin_const (d->mask, d->name, ftype, d->code);
31141 }
31142 }
31143
31144 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
31145 to return a pointer to VERSION_DECL if the outcome of the expression
31146 formed by PREDICATE_CHAIN is true. This function will be called during
31147 version dispatch to decide which function version to execute. It returns
31148 the basic block at the end, to which more conditions can be added. */
31149
31150 static basic_block
31151 add_condition_to_bb (tree function_decl, tree version_decl,
31152 tree predicate_chain, basic_block new_bb)
31153 {
31154 gimple return_stmt;
31155 tree convert_expr, result_var;
31156 gimple convert_stmt;
31157 gimple call_cond_stmt;
31158 gimple if_else_stmt;
31159
31160 basic_block bb1, bb2, bb3;
31161 edge e12, e23;
31162
31163 tree cond_var, and_expr_var = NULL_TREE;
31164 gimple_seq gseq;
31165
31166 tree predicate_decl, predicate_arg;
31167
31168 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
31169
31170 gcc_assert (new_bb != NULL);
31171 gseq = bb_seq (new_bb);
31172
31173
31174 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
31175 build_fold_addr_expr (version_decl));
31176 result_var = create_tmp_var (ptr_type_node, NULL);
31177 convert_stmt = gimple_build_assign (result_var, convert_expr);
31178 return_stmt = gimple_build_return (result_var);
31179
31180 if (predicate_chain == NULL_TREE)
31181 {
31182 gimple_seq_add_stmt (&gseq, convert_stmt);
31183 gimple_seq_add_stmt (&gseq, return_stmt);
31184 set_bb_seq (new_bb, gseq);
31185 gimple_set_bb (convert_stmt, new_bb);
31186 gimple_set_bb (return_stmt, new_bb);
31187 pop_cfun ();
31188 return new_bb;
31189 }
31190
31191 while (predicate_chain != NULL)
31192 {
31193 cond_var = create_tmp_var (integer_type_node, NULL);
31194 predicate_decl = TREE_PURPOSE (predicate_chain);
31195 predicate_arg = TREE_VALUE (predicate_chain);
31196 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
31197 gimple_call_set_lhs (call_cond_stmt, cond_var);
31198
31199 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
31200 gimple_set_bb (call_cond_stmt, new_bb);
31201 gimple_seq_add_stmt (&gseq, call_cond_stmt);
31202
31203 predicate_chain = TREE_CHAIN (predicate_chain);
31204
31205 if (and_expr_var == NULL)
31206 and_expr_var = cond_var;
31207 else
31208 {
31209 gimple assign_stmt;
31210 /* Use MIN_EXPR to check if any integer is zero?.
31211 and_expr_var = min_expr <cond_var, and_expr_var> */
31212 assign_stmt = gimple_build_assign (and_expr_var,
31213 build2 (MIN_EXPR, integer_type_node,
31214 cond_var, and_expr_var));
31215
31216 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
31217 gimple_set_bb (assign_stmt, new_bb);
31218 gimple_seq_add_stmt (&gseq, assign_stmt);
31219 }
31220 }
31221
31222 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
31223 integer_zero_node,
31224 NULL_TREE, NULL_TREE);
31225 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
31226 gimple_set_bb (if_else_stmt, new_bb);
31227 gimple_seq_add_stmt (&gseq, if_else_stmt);
31228
31229 gimple_seq_add_stmt (&gseq, convert_stmt);
31230 gimple_seq_add_stmt (&gseq, return_stmt);
31231 set_bb_seq (new_bb, gseq);
31232
31233 bb1 = new_bb;
31234 e12 = split_block (bb1, if_else_stmt);
31235 bb2 = e12->dest;
31236 e12->flags &= ~EDGE_FALLTHRU;
31237 e12->flags |= EDGE_TRUE_VALUE;
31238
31239 e23 = split_block (bb2, return_stmt);
31240
31241 gimple_set_bb (convert_stmt, bb2);
31242 gimple_set_bb (return_stmt, bb2);
31243
31244 bb3 = e23->dest;
31245 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
31246
31247 remove_edge (e23);
31248 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
31249
31250 pop_cfun ();
31251
31252 return bb3;
31253 }
31254
31255 /* This parses the attribute arguments to target in DECL and determines
31256 the right builtin to use to match the platform specification.
31257 It returns the priority value for this version decl. If PREDICATE_LIST
31258 is not NULL, it stores the list of cpu features that need to be checked
31259 before dispatching this function. */
31260
31261 static unsigned int
31262 get_builtin_code_for_version (tree decl, tree *predicate_list)
31263 {
31264 tree attrs;
31265 struct cl_target_option cur_target;
31266 tree target_node;
31267 struct cl_target_option *new_target;
31268 const char *arg_str = NULL;
31269 const char *attrs_str = NULL;
31270 char *tok_str = NULL;
31271 char *token;
31272
31273 /* Priority of i386 features, greater value is higher priority. This is
31274 used to decide the order in which function dispatch must happen. For
31275 instance, a version specialized for SSE4.2 should be checked for dispatch
31276 before a version for SSE3, as SSE4.2 implies SSE3. */
31277 enum feature_priority
31278 {
31279 P_ZERO = 0,
31280 P_MMX,
31281 P_SSE,
31282 P_SSE2,
31283 P_SSE3,
31284 P_SSSE3,
31285 P_PROC_SSSE3,
31286 P_SSE4_A,
31287 P_PROC_SSE4_A,
31288 P_SSE4_1,
31289 P_SSE4_2,
31290 P_PROC_SSE4_2,
31291 P_POPCNT,
31292 P_AVX,
31293 P_PROC_AVX,
31294 P_FMA4,
31295 P_XOP,
31296 P_PROC_XOP,
31297 P_FMA,
31298 P_PROC_FMA,
31299 P_AVX2,
31300 P_PROC_AVX2
31301 };
31302
31303 enum feature_priority priority = P_ZERO;
31304
31305 /* These are the target attribute strings for which a dispatcher is
31306 available, from fold_builtin_cpu. */
31307
31308 static struct _feature_list
31309 {
31310 const char *const name;
31311 const enum feature_priority priority;
31312 }
31313 const feature_list[] =
31314 {
31315 {"mmx", P_MMX},
31316 {"sse", P_SSE},
31317 {"sse2", P_SSE2},
31318 {"sse3", P_SSE3},
31319 {"sse4a", P_SSE4_A},
31320 {"ssse3", P_SSSE3},
31321 {"sse4.1", P_SSE4_1},
31322 {"sse4.2", P_SSE4_2},
31323 {"popcnt", P_POPCNT},
31324 {"avx", P_AVX},
31325 {"fma4", P_FMA4},
31326 {"xop", P_XOP},
31327 {"fma", P_FMA},
31328 {"avx2", P_AVX2}
31329 };
31330
31331
31332 static unsigned int NUM_FEATURES
31333 = sizeof (feature_list) / sizeof (struct _feature_list);
31334
31335 unsigned int i;
31336
31337 tree predicate_chain = NULL_TREE;
31338 tree predicate_decl, predicate_arg;
31339
31340 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31341 gcc_assert (attrs != NULL);
31342
31343 attrs = TREE_VALUE (TREE_VALUE (attrs));
31344
31345 gcc_assert (TREE_CODE (attrs) == STRING_CST);
31346 attrs_str = TREE_STRING_POINTER (attrs);
31347
31348 /* Return priority zero for default function. */
31349 if (strcmp (attrs_str, "default") == 0)
31350 return 0;
31351
31352 /* Handle arch= if specified. For priority, set it to be 1 more than
31353 the best instruction set the processor can handle. For instance, if
31354 there is a version for atom and a version for ssse3 (the highest ISA
31355 priority for atom), the atom version must be checked for dispatch
31356 before the ssse3 version. */
31357 if (strstr (attrs_str, "arch=") != NULL)
31358 {
31359 cl_target_option_save (&cur_target, &global_options);
31360 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
31361 &global_options_set);
31362
31363 gcc_assert (target_node);
31364 new_target = TREE_TARGET_OPTION (target_node);
31365 gcc_assert (new_target);
31366
31367 if (new_target->arch_specified && new_target->arch > 0)
31368 {
31369 switch (new_target->arch)
31370 {
31371 case PROCESSOR_CORE2:
31372 arg_str = "core2";
31373 priority = P_PROC_SSSE3;
31374 break;
31375 case PROCESSOR_NEHALEM:
31376 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
31377 arg_str = "westmere";
31378 else
31379 /* We translate "arch=corei7" and "arch=nehalem" to
31380 "corei7" so that it will be mapped to M_INTEL_COREI7
31381 as cpu type to cover all M_INTEL_COREI7_XXXs. */
31382 arg_str = "corei7";
31383 priority = P_PROC_SSE4_2;
31384 break;
31385 case PROCESSOR_SANDYBRIDGE:
31386 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
31387 arg_str = "ivybridge";
31388 else
31389 arg_str = "sandybridge";
31390 priority = P_PROC_AVX;
31391 break;
31392 case PROCESSOR_HASWELL:
31393 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
31394 arg_str = "broadwell";
31395 else
31396 arg_str = "haswell";
31397 priority = P_PROC_AVX2;
31398 break;
31399 case PROCESSOR_BONNELL:
31400 arg_str = "bonnell";
31401 priority = P_PROC_SSSE3;
31402 break;
31403 case PROCESSOR_SILVERMONT:
31404 arg_str = "silvermont";
31405 priority = P_PROC_SSE4_2;
31406 break;
31407 case PROCESSOR_AMDFAM10:
31408 arg_str = "amdfam10h";
31409 priority = P_PROC_SSE4_A;
31410 break;
31411 case PROCESSOR_BTVER1:
31412 arg_str = "btver1";
31413 priority = P_PROC_SSE4_A;
31414 break;
31415 case PROCESSOR_BTVER2:
31416 arg_str = "btver2";
31417 priority = P_PROC_AVX;
31418 break;
31419 case PROCESSOR_BDVER1:
31420 arg_str = "bdver1";
31421 priority = P_PROC_XOP;
31422 break;
31423 case PROCESSOR_BDVER2:
31424 arg_str = "bdver2";
31425 priority = P_PROC_FMA;
31426 break;
31427 case PROCESSOR_BDVER3:
31428 arg_str = "bdver3";
31429 priority = P_PROC_FMA;
31430 break;
31431 case PROCESSOR_BDVER4:
31432 arg_str = "bdver4";
31433 priority = P_PROC_AVX2;
31434 break;
31435 }
31436 }
31437
31438 cl_target_option_restore (&global_options, &cur_target);
31439
31440 if (predicate_list && arg_str == NULL)
31441 {
31442 error_at (DECL_SOURCE_LOCATION (decl),
31443 "No dispatcher found for the versioning attributes");
31444 return 0;
31445 }
31446
31447 if (predicate_list)
31448 {
31449 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
31450 /* For a C string literal the length includes the trailing NULL. */
31451 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
31452 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31453 predicate_chain);
31454 }
31455 }
31456
31457 /* Process feature name. */
31458 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
31459 strcpy (tok_str, attrs_str);
31460 token = strtok (tok_str, ",");
31461 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
31462
31463 while (token != NULL)
31464 {
31465 /* Do not process "arch=" */
31466 if (strncmp (token, "arch=", 5) == 0)
31467 {
31468 token = strtok (NULL, ",");
31469 continue;
31470 }
31471 for (i = 0; i < NUM_FEATURES; ++i)
31472 {
31473 if (strcmp (token, feature_list[i].name) == 0)
31474 {
31475 if (predicate_list)
31476 {
31477 predicate_arg = build_string_literal (
31478 strlen (feature_list[i].name) + 1,
31479 feature_list[i].name);
31480 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31481 predicate_chain);
31482 }
31483 /* Find the maximum priority feature. */
31484 if (feature_list[i].priority > priority)
31485 priority = feature_list[i].priority;
31486
31487 break;
31488 }
31489 }
31490 if (predicate_list && i == NUM_FEATURES)
31491 {
31492 error_at (DECL_SOURCE_LOCATION (decl),
31493 "No dispatcher found for %s", token);
31494 return 0;
31495 }
31496 token = strtok (NULL, ",");
31497 }
31498 free (tok_str);
31499
31500 if (predicate_list && predicate_chain == NULL_TREE)
31501 {
31502 error_at (DECL_SOURCE_LOCATION (decl),
31503 "No dispatcher found for the versioning attributes : %s",
31504 attrs_str);
31505 return 0;
31506 }
31507 else if (predicate_list)
31508 {
31509 predicate_chain = nreverse (predicate_chain);
31510 *predicate_list = predicate_chain;
31511 }
31512
31513 return priority;
31514 }
31515
31516 /* This compares the priority of target features in function DECL1
31517 and DECL2. It returns positive value if DECL1 is higher priority,
31518 negative value if DECL2 is higher priority and 0 if they are the
31519 same. */
31520
31521 static int
31522 ix86_compare_version_priority (tree decl1, tree decl2)
31523 {
31524 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
31525 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
31526
31527 return (int)priority1 - (int)priority2;
31528 }
31529
31530 /* V1 and V2 point to function versions with different priorities
31531 based on the target ISA. This function compares their priorities. */
31532
31533 static int
31534 feature_compare (const void *v1, const void *v2)
31535 {
31536 typedef struct _function_version_info
31537 {
31538 tree version_decl;
31539 tree predicate_chain;
31540 unsigned int dispatch_priority;
31541 } function_version_info;
31542
31543 const function_version_info c1 = *(const function_version_info *)v1;
31544 const function_version_info c2 = *(const function_version_info *)v2;
31545 return (c2.dispatch_priority - c1.dispatch_priority);
31546 }
31547
31548 /* This function generates the dispatch function for
31549 multi-versioned functions. DISPATCH_DECL is the function which will
31550 contain the dispatch logic. FNDECLS are the function choices for
31551 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
31552 in DISPATCH_DECL in which the dispatch code is generated. */
31553
31554 static int
31555 dispatch_function_versions (tree dispatch_decl,
31556 void *fndecls_p,
31557 basic_block *empty_bb)
31558 {
31559 tree default_decl;
31560 gimple ifunc_cpu_init_stmt;
31561 gimple_seq gseq;
31562 int ix;
31563 tree ele;
31564 vec<tree> *fndecls;
31565 unsigned int num_versions = 0;
31566 unsigned int actual_versions = 0;
31567 unsigned int i;
31568
31569 struct _function_version_info
31570 {
31571 tree version_decl;
31572 tree predicate_chain;
31573 unsigned int dispatch_priority;
31574 }*function_version_info;
31575
31576 gcc_assert (dispatch_decl != NULL
31577 && fndecls_p != NULL
31578 && empty_bb != NULL);
31579
31580 /*fndecls_p is actually a vector. */
31581 fndecls = static_cast<vec<tree> *> (fndecls_p);
31582
31583 /* At least one more version other than the default. */
31584 num_versions = fndecls->length ();
31585 gcc_assert (num_versions >= 2);
31586
31587 function_version_info = (struct _function_version_info *)
31588 XNEWVEC (struct _function_version_info, (num_versions - 1));
31589
31590 /* The first version in the vector is the default decl. */
31591 default_decl = (*fndecls)[0];
31592
31593 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
31594
31595 gseq = bb_seq (*empty_bb);
31596 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
31597 constructors, so explicity call __builtin_cpu_init here. */
31598 ifunc_cpu_init_stmt = gimple_build_call_vec (
31599 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
31600 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
31601 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
31602 set_bb_seq (*empty_bb, gseq);
31603
31604 pop_cfun ();
31605
31606
31607 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
31608 {
31609 tree version_decl = ele;
31610 tree predicate_chain = NULL_TREE;
31611 unsigned int priority;
31612 /* Get attribute string, parse it and find the right predicate decl.
31613 The predicate function could be a lengthy combination of many
31614 features, like arch-type and various isa-variants. */
31615 priority = get_builtin_code_for_version (version_decl,
31616 &predicate_chain);
31617
31618 if (predicate_chain == NULL_TREE)
31619 continue;
31620
31621 function_version_info [actual_versions].version_decl = version_decl;
31622 function_version_info [actual_versions].predicate_chain
31623 = predicate_chain;
31624 function_version_info [actual_versions].dispatch_priority = priority;
31625 actual_versions++;
31626 }
31627
31628 /* Sort the versions according to descending order of dispatch priority. The
31629 priority is based on the ISA. This is not a perfect solution. There
31630 could still be ambiguity. If more than one function version is suitable
31631 to execute, which one should be dispatched? In future, allow the user
31632 to specify a dispatch priority next to the version. */
31633 qsort (function_version_info, actual_versions,
31634 sizeof (struct _function_version_info), feature_compare);
31635
31636 for (i = 0; i < actual_versions; ++i)
31637 *empty_bb = add_condition_to_bb (dispatch_decl,
31638 function_version_info[i].version_decl,
31639 function_version_info[i].predicate_chain,
31640 *empty_bb);
31641
31642 /* dispatch default version at the end. */
31643 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
31644 NULL, *empty_bb);
31645
31646 free (function_version_info);
31647 return 0;
31648 }
31649
31650 /* Comparator function to be used in qsort routine to sort attribute
31651 specification strings to "target". */
31652
31653 static int
31654 attr_strcmp (const void *v1, const void *v2)
31655 {
31656 const char *c1 = *(char *const*)v1;
31657 const char *c2 = *(char *const*)v2;
31658 return strcmp (c1, c2);
31659 }
31660
31661 /* ARGLIST is the argument to target attribute. This function tokenizes
31662 the comma separated arguments, sorts them and returns a string which
31663 is a unique identifier for the comma separated arguments. It also
31664 replaces non-identifier characters "=,-" with "_". */
31665
31666 static char *
31667 sorted_attr_string (tree arglist)
31668 {
31669 tree arg;
31670 size_t str_len_sum = 0;
31671 char **args = NULL;
31672 char *attr_str, *ret_str;
31673 char *attr = NULL;
31674 unsigned int argnum = 1;
31675 unsigned int i;
31676
31677 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31678 {
31679 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31680 size_t len = strlen (str);
31681 str_len_sum += len + 1;
31682 if (arg != arglist)
31683 argnum++;
31684 for (i = 0; i < strlen (str); i++)
31685 if (str[i] == ',')
31686 argnum++;
31687 }
31688
31689 attr_str = XNEWVEC (char, str_len_sum);
31690 str_len_sum = 0;
31691 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31692 {
31693 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31694 size_t len = strlen (str);
31695 memcpy (attr_str + str_len_sum, str, len);
31696 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
31697 str_len_sum += len + 1;
31698 }
31699
31700 /* Replace "=,-" with "_". */
31701 for (i = 0; i < strlen (attr_str); i++)
31702 if (attr_str[i] == '=' || attr_str[i]== '-')
31703 attr_str[i] = '_';
31704
31705 if (argnum == 1)
31706 return attr_str;
31707
31708 args = XNEWVEC (char *, argnum);
31709
31710 i = 0;
31711 attr = strtok (attr_str, ",");
31712 while (attr != NULL)
31713 {
31714 args[i] = attr;
31715 i++;
31716 attr = strtok (NULL, ",");
31717 }
31718
31719 qsort (args, argnum, sizeof (char *), attr_strcmp);
31720
31721 ret_str = XNEWVEC (char, str_len_sum);
31722 str_len_sum = 0;
31723 for (i = 0; i < argnum; i++)
31724 {
31725 size_t len = strlen (args[i]);
31726 memcpy (ret_str + str_len_sum, args[i], len);
31727 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
31728 str_len_sum += len + 1;
31729 }
31730
31731 XDELETEVEC (args);
31732 XDELETEVEC (attr_str);
31733 return ret_str;
31734 }
31735
31736 /* This function changes the assembler name for functions that are
31737 versions. If DECL is a function version and has a "target"
31738 attribute, it appends the attribute string to its assembler name. */
31739
31740 static tree
31741 ix86_mangle_function_version_assembler_name (tree decl, tree id)
31742 {
31743 tree version_attr;
31744 const char *orig_name, *version_string;
31745 char *attr_str, *assembler_name;
31746
31747 if (DECL_DECLARED_INLINE_P (decl)
31748 && lookup_attribute ("gnu_inline",
31749 DECL_ATTRIBUTES (decl)))
31750 error_at (DECL_SOURCE_LOCATION (decl),
31751 "Function versions cannot be marked as gnu_inline,"
31752 " bodies have to be generated");
31753
31754 if (DECL_VIRTUAL_P (decl)
31755 || DECL_VINDEX (decl))
31756 sorry ("Virtual function multiversioning not supported");
31757
31758 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31759
31760 /* target attribute string cannot be NULL. */
31761 gcc_assert (version_attr != NULL_TREE);
31762
31763 orig_name = IDENTIFIER_POINTER (id);
31764 version_string
31765 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
31766
31767 if (strcmp (version_string, "default") == 0)
31768 return id;
31769
31770 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
31771 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
31772
31773 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
31774
31775 /* Allow assembler name to be modified if already set. */
31776 if (DECL_ASSEMBLER_NAME_SET_P (decl))
31777 SET_DECL_RTL (decl, NULL);
31778
31779 tree ret = get_identifier (assembler_name);
31780 XDELETEVEC (attr_str);
31781 XDELETEVEC (assembler_name);
31782 return ret;
31783 }
31784
31785 /* This function returns true if FN1 and FN2 are versions of the same function,
31786 that is, the target strings of the function decls are different. This assumes
31787 that FN1 and FN2 have the same signature. */
31788
31789 static bool
31790 ix86_function_versions (tree fn1, tree fn2)
31791 {
31792 tree attr1, attr2;
31793 char *target1, *target2;
31794 bool result;
31795
31796 if (TREE_CODE (fn1) != FUNCTION_DECL
31797 || TREE_CODE (fn2) != FUNCTION_DECL)
31798 return false;
31799
31800 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
31801 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
31802
31803 /* At least one function decl should have the target attribute specified. */
31804 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
31805 return false;
31806
31807 /* Diagnose missing target attribute if one of the decls is already
31808 multi-versioned. */
31809 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
31810 {
31811 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
31812 {
31813 if (attr2 != NULL_TREE)
31814 {
31815 tree tem = fn1;
31816 fn1 = fn2;
31817 fn2 = tem;
31818 attr1 = attr2;
31819 }
31820 error_at (DECL_SOURCE_LOCATION (fn2),
31821 "missing %<target%> attribute for multi-versioned %D",
31822 fn2);
31823 inform (DECL_SOURCE_LOCATION (fn1),
31824 "previous declaration of %D", fn1);
31825 /* Prevent diagnosing of the same error multiple times. */
31826 DECL_ATTRIBUTES (fn2)
31827 = tree_cons (get_identifier ("target"),
31828 copy_node (TREE_VALUE (attr1)),
31829 DECL_ATTRIBUTES (fn2));
31830 }
31831 return false;
31832 }
31833
31834 target1 = sorted_attr_string (TREE_VALUE (attr1));
31835 target2 = sorted_attr_string (TREE_VALUE (attr2));
31836
31837 /* The sorted target strings must be different for fn1 and fn2
31838 to be versions. */
31839 if (strcmp (target1, target2) == 0)
31840 result = false;
31841 else
31842 result = true;
31843
31844 XDELETEVEC (target1);
31845 XDELETEVEC (target2);
31846
31847 return result;
31848 }
31849
31850 static tree
31851 ix86_mangle_decl_assembler_name (tree decl, tree id)
31852 {
31853 /* For function version, add the target suffix to the assembler name. */
31854 if (TREE_CODE (decl) == FUNCTION_DECL
31855 && DECL_FUNCTION_VERSIONED (decl))
31856 id = ix86_mangle_function_version_assembler_name (decl, id);
31857 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
31858 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
31859 #endif
31860
31861 return id;
31862 }
31863
31864 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
31865 is true, append the full path name of the source file. */
31866
31867 static char *
31868 make_name (tree decl, const char *suffix, bool make_unique)
31869 {
31870 char *global_var_name;
31871 int name_len;
31872 const char *name;
31873 const char *unique_name = NULL;
31874
31875 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
31876
31877 /* Get a unique name that can be used globally without any chances
31878 of collision at link time. */
31879 if (make_unique)
31880 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
31881
31882 name_len = strlen (name) + strlen (suffix) + 2;
31883
31884 if (make_unique)
31885 name_len += strlen (unique_name) + 1;
31886 global_var_name = XNEWVEC (char, name_len);
31887
31888 /* Use '.' to concatenate names as it is demangler friendly. */
31889 if (make_unique)
31890 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
31891 suffix);
31892 else
31893 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
31894
31895 return global_var_name;
31896 }
31897
31898 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
31899
31900 /* Make a dispatcher declaration for the multi-versioned function DECL.
31901 Calls to DECL function will be replaced with calls to the dispatcher
31902 by the front-end. Return the decl created. */
31903
31904 static tree
31905 make_dispatcher_decl (const tree decl)
31906 {
31907 tree func_decl;
31908 char *func_name;
31909 tree fn_type, func_type;
31910 bool is_uniq = false;
31911
31912 if (TREE_PUBLIC (decl) == 0)
31913 is_uniq = true;
31914
31915 func_name = make_name (decl, "ifunc", is_uniq);
31916
31917 fn_type = TREE_TYPE (decl);
31918 func_type = build_function_type (TREE_TYPE (fn_type),
31919 TYPE_ARG_TYPES (fn_type));
31920
31921 func_decl = build_fn_decl (func_name, func_type);
31922 XDELETEVEC (func_name);
31923 TREE_USED (func_decl) = 1;
31924 DECL_CONTEXT (func_decl) = NULL_TREE;
31925 DECL_INITIAL (func_decl) = error_mark_node;
31926 DECL_ARTIFICIAL (func_decl) = 1;
31927 /* Mark this func as external, the resolver will flip it again if
31928 it gets generated. */
31929 DECL_EXTERNAL (func_decl) = 1;
31930 /* This will be of type IFUNCs have to be externally visible. */
31931 TREE_PUBLIC (func_decl) = 1;
31932
31933 return func_decl;
31934 }
31935
31936 #endif
31937
31938 /* Returns true if decl is multi-versioned and DECL is the default function,
31939 that is it is not tagged with target specific optimization. */
31940
31941 static bool
31942 is_function_default_version (const tree decl)
31943 {
31944 if (TREE_CODE (decl) != FUNCTION_DECL
31945 || !DECL_FUNCTION_VERSIONED (decl))
31946 return false;
31947 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31948 gcc_assert (attr);
31949 attr = TREE_VALUE (TREE_VALUE (attr));
31950 return (TREE_CODE (attr) == STRING_CST
31951 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
31952 }
31953
31954 /* Make a dispatcher declaration for the multi-versioned function DECL.
31955 Calls to DECL function will be replaced with calls to the dispatcher
31956 by the front-end. Returns the decl of the dispatcher function. */
31957
31958 static tree
31959 ix86_get_function_versions_dispatcher (void *decl)
31960 {
31961 tree fn = (tree) decl;
31962 struct cgraph_node *node = NULL;
31963 struct cgraph_node *default_node = NULL;
31964 struct cgraph_function_version_info *node_v = NULL;
31965 struct cgraph_function_version_info *first_v = NULL;
31966
31967 tree dispatch_decl = NULL;
31968
31969 struct cgraph_function_version_info *default_version_info = NULL;
31970
31971 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
31972
31973 node = cgraph_get_node (fn);
31974 gcc_assert (node != NULL);
31975
31976 node_v = get_cgraph_node_version (node);
31977 gcc_assert (node_v != NULL);
31978
31979 if (node_v->dispatcher_resolver != NULL)
31980 return node_v->dispatcher_resolver;
31981
31982 /* Find the default version and make it the first node. */
31983 first_v = node_v;
31984 /* Go to the beginning of the chain. */
31985 while (first_v->prev != NULL)
31986 first_v = first_v->prev;
31987 default_version_info = first_v;
31988 while (default_version_info != NULL)
31989 {
31990 if (is_function_default_version
31991 (default_version_info->this_node->decl))
31992 break;
31993 default_version_info = default_version_info->next;
31994 }
31995
31996 /* If there is no default node, just return NULL. */
31997 if (default_version_info == NULL)
31998 return NULL;
31999
32000 /* Make default info the first node. */
32001 if (first_v != default_version_info)
32002 {
32003 default_version_info->prev->next = default_version_info->next;
32004 if (default_version_info->next)
32005 default_version_info->next->prev = default_version_info->prev;
32006 first_v->prev = default_version_info;
32007 default_version_info->next = first_v;
32008 default_version_info->prev = NULL;
32009 }
32010
32011 default_node = default_version_info->this_node;
32012
32013 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
32014 if (targetm.has_ifunc_p ())
32015 {
32016 struct cgraph_function_version_info *it_v = NULL;
32017 struct cgraph_node *dispatcher_node = NULL;
32018 struct cgraph_function_version_info *dispatcher_version_info = NULL;
32019
32020 /* Right now, the dispatching is done via ifunc. */
32021 dispatch_decl = make_dispatcher_decl (default_node->decl);
32022
32023 dispatcher_node = cgraph_get_create_node (dispatch_decl);
32024 gcc_assert (dispatcher_node != NULL);
32025 dispatcher_node->dispatcher_function = 1;
32026 dispatcher_version_info
32027 = insert_new_cgraph_node_version (dispatcher_node);
32028 dispatcher_version_info->next = default_version_info;
32029 dispatcher_node->definition = 1;
32030
32031 /* Set the dispatcher for all the versions. */
32032 it_v = default_version_info;
32033 while (it_v != NULL)
32034 {
32035 it_v->dispatcher_resolver = dispatch_decl;
32036 it_v = it_v->next;
32037 }
32038 }
32039 else
32040 #endif
32041 {
32042 error_at (DECL_SOURCE_LOCATION (default_node->decl),
32043 "multiversioning needs ifunc which is not supported "
32044 "on this target");
32045 }
32046
32047 return dispatch_decl;
32048 }
32049
32050 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
32051 it to CHAIN. */
32052
32053 static tree
32054 make_attribute (const char *name, const char *arg_name, tree chain)
32055 {
32056 tree attr_name;
32057 tree attr_arg_name;
32058 tree attr_args;
32059 tree attr;
32060
32061 attr_name = get_identifier (name);
32062 attr_arg_name = build_string (strlen (arg_name), arg_name);
32063 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
32064 attr = tree_cons (attr_name, attr_args, chain);
32065 return attr;
32066 }
32067
32068 /* Make the resolver function decl to dispatch the versions of
32069 a multi-versioned function, DEFAULT_DECL. Create an
32070 empty basic block in the resolver and store the pointer in
32071 EMPTY_BB. Return the decl of the resolver function. */
32072
32073 static tree
32074 make_resolver_func (const tree default_decl,
32075 const tree dispatch_decl,
32076 basic_block *empty_bb)
32077 {
32078 char *resolver_name;
32079 tree decl, type, decl_name, t;
32080 bool is_uniq = false;
32081
32082 /* IFUNC's have to be globally visible. So, if the default_decl is
32083 not, then the name of the IFUNC should be made unique. */
32084 if (TREE_PUBLIC (default_decl) == 0)
32085 is_uniq = true;
32086
32087 /* Append the filename to the resolver function if the versions are
32088 not externally visible. This is because the resolver function has
32089 to be externally visible for the loader to find it. So, appending
32090 the filename will prevent conflicts with a resolver function from
32091 another module which is based on the same version name. */
32092 resolver_name = make_name (default_decl, "resolver", is_uniq);
32093
32094 /* The resolver function should return a (void *). */
32095 type = build_function_type_list (ptr_type_node, NULL_TREE);
32096
32097 decl = build_fn_decl (resolver_name, type);
32098 decl_name = get_identifier (resolver_name);
32099 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
32100
32101 DECL_NAME (decl) = decl_name;
32102 TREE_USED (decl) = 1;
32103 DECL_ARTIFICIAL (decl) = 1;
32104 DECL_IGNORED_P (decl) = 0;
32105 /* IFUNC resolvers have to be externally visible. */
32106 TREE_PUBLIC (decl) = 1;
32107 DECL_UNINLINABLE (decl) = 1;
32108
32109 /* Resolver is not external, body is generated. */
32110 DECL_EXTERNAL (decl) = 0;
32111 DECL_EXTERNAL (dispatch_decl) = 0;
32112
32113 DECL_CONTEXT (decl) = NULL_TREE;
32114 DECL_INITIAL (decl) = make_node (BLOCK);
32115 DECL_STATIC_CONSTRUCTOR (decl) = 0;
32116
32117 if (DECL_COMDAT_GROUP (default_decl)
32118 || TREE_PUBLIC (default_decl))
32119 {
32120 /* In this case, each translation unit with a call to this
32121 versioned function will put out a resolver. Ensure it
32122 is comdat to keep just one copy. */
32123 DECL_COMDAT (decl) = 1;
32124 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
32125 }
32126 /* Build result decl and add to function_decl. */
32127 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
32128 DECL_ARTIFICIAL (t) = 1;
32129 DECL_IGNORED_P (t) = 1;
32130 DECL_RESULT (decl) = t;
32131
32132 gimplify_function_tree (decl);
32133 push_cfun (DECL_STRUCT_FUNCTION (decl));
32134 *empty_bb = init_lowered_empty_function (decl, false);
32135
32136 cgraph_add_new_function (decl, true);
32137 cgraph_call_function_insertion_hooks (cgraph_get_create_node (decl));
32138
32139 pop_cfun ();
32140
32141 gcc_assert (dispatch_decl != NULL);
32142 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
32143 DECL_ATTRIBUTES (dispatch_decl)
32144 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
32145
32146 /* Create the alias for dispatch to resolver here. */
32147 /*cgraph_create_function_alias (dispatch_decl, decl);*/
32148 cgraph_same_body_alias (NULL, dispatch_decl, decl);
32149 XDELETEVEC (resolver_name);
32150 return decl;
32151 }
32152
32153 /* Generate the dispatching code body to dispatch multi-versioned function
32154 DECL. The target hook is called to process the "target" attributes and
32155 provide the code to dispatch the right function at run-time. NODE points
32156 to the dispatcher decl whose body will be created. */
32157
32158 static tree
32159 ix86_generate_version_dispatcher_body (void *node_p)
32160 {
32161 tree resolver_decl;
32162 basic_block empty_bb;
32163 tree default_ver_decl;
32164 struct cgraph_node *versn;
32165 struct cgraph_node *node;
32166
32167 struct cgraph_function_version_info *node_version_info = NULL;
32168 struct cgraph_function_version_info *versn_info = NULL;
32169
32170 node = (cgraph_node *)node_p;
32171
32172 node_version_info = get_cgraph_node_version (node);
32173 gcc_assert (node->dispatcher_function
32174 && node_version_info != NULL);
32175
32176 if (node_version_info->dispatcher_resolver)
32177 return node_version_info->dispatcher_resolver;
32178
32179 /* The first version in the chain corresponds to the default version. */
32180 default_ver_decl = node_version_info->next->this_node->decl;
32181
32182 /* node is going to be an alias, so remove the finalized bit. */
32183 node->definition = false;
32184
32185 resolver_decl = make_resolver_func (default_ver_decl,
32186 node->decl, &empty_bb);
32187
32188 node_version_info->dispatcher_resolver = resolver_decl;
32189
32190 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
32191
32192 auto_vec<tree, 2> fn_ver_vec;
32193
32194 for (versn_info = node_version_info->next; versn_info;
32195 versn_info = versn_info->next)
32196 {
32197 versn = versn_info->this_node;
32198 /* Check for virtual functions here again, as by this time it should
32199 have been determined if this function needs a vtable index or
32200 not. This happens for methods in derived classes that override
32201 virtual methods in base classes but are not explicitly marked as
32202 virtual. */
32203 if (DECL_VINDEX (versn->decl))
32204 sorry ("Virtual function multiversioning not supported");
32205
32206 fn_ver_vec.safe_push (versn->decl);
32207 }
32208
32209 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
32210 rebuild_cgraph_edges ();
32211 pop_cfun ();
32212 return resolver_decl;
32213 }
32214 /* This builds the processor_model struct type defined in
32215 libgcc/config/i386/cpuinfo.c */
32216
32217 static tree
32218 build_processor_model_struct (void)
32219 {
32220 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
32221 "__cpu_features"};
32222 tree field = NULL_TREE, field_chain = NULL_TREE;
32223 int i;
32224 tree type = make_node (RECORD_TYPE);
32225
32226 /* The first 3 fields are unsigned int. */
32227 for (i = 0; i < 3; ++i)
32228 {
32229 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32230 get_identifier (field_name[i]), unsigned_type_node);
32231 if (field_chain != NULL_TREE)
32232 DECL_CHAIN (field) = field_chain;
32233 field_chain = field;
32234 }
32235
32236 /* The last field is an array of unsigned integers of size one. */
32237 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32238 get_identifier (field_name[3]),
32239 build_array_type (unsigned_type_node,
32240 build_index_type (size_one_node)));
32241 if (field_chain != NULL_TREE)
32242 DECL_CHAIN (field) = field_chain;
32243 field_chain = field;
32244
32245 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
32246 return type;
32247 }
32248
32249 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
32250
32251 static tree
32252 make_var_decl (tree type, const char *name)
32253 {
32254 tree new_decl;
32255
32256 new_decl = build_decl (UNKNOWN_LOCATION,
32257 VAR_DECL,
32258 get_identifier(name),
32259 type);
32260
32261 DECL_EXTERNAL (new_decl) = 1;
32262 TREE_STATIC (new_decl) = 1;
32263 TREE_PUBLIC (new_decl) = 1;
32264 DECL_INITIAL (new_decl) = 0;
32265 DECL_ARTIFICIAL (new_decl) = 0;
32266 DECL_PRESERVE_P (new_decl) = 1;
32267
32268 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
32269 assemble_variable (new_decl, 0, 0, 0);
32270
32271 return new_decl;
32272 }
32273
32274 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
32275 into an integer defined in libgcc/config/i386/cpuinfo.c */
32276
32277 static tree
32278 fold_builtin_cpu (tree fndecl, tree *args)
32279 {
32280 unsigned int i;
32281 enum ix86_builtins fn_code = (enum ix86_builtins)
32282 DECL_FUNCTION_CODE (fndecl);
32283 tree param_string_cst = NULL;
32284
32285 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
32286 enum processor_features
32287 {
32288 F_CMOV = 0,
32289 F_MMX,
32290 F_POPCNT,
32291 F_SSE,
32292 F_SSE2,
32293 F_SSE3,
32294 F_SSSE3,
32295 F_SSE4_1,
32296 F_SSE4_2,
32297 F_AVX,
32298 F_AVX2,
32299 F_SSE4_A,
32300 F_FMA4,
32301 F_XOP,
32302 F_FMA,
32303 F_MAX
32304 };
32305
32306 /* These are the values for vendor types and cpu types and subtypes
32307 in cpuinfo.c. Cpu types and subtypes should be subtracted by
32308 the corresponding start value. */
32309 enum processor_model
32310 {
32311 M_INTEL = 1,
32312 M_AMD,
32313 M_CPU_TYPE_START,
32314 M_INTEL_BONNELL,
32315 M_INTEL_CORE2,
32316 M_INTEL_COREI7,
32317 M_AMDFAM10H,
32318 M_AMDFAM15H,
32319 M_INTEL_SILVERMONT,
32320 M_AMD_BTVER1,
32321 M_AMD_BTVER2,
32322 M_CPU_SUBTYPE_START,
32323 M_INTEL_COREI7_NEHALEM,
32324 M_INTEL_COREI7_WESTMERE,
32325 M_INTEL_COREI7_SANDYBRIDGE,
32326 M_AMDFAM10H_BARCELONA,
32327 M_AMDFAM10H_SHANGHAI,
32328 M_AMDFAM10H_ISTANBUL,
32329 M_AMDFAM15H_BDVER1,
32330 M_AMDFAM15H_BDVER2,
32331 M_AMDFAM15H_BDVER3,
32332 M_AMDFAM15H_BDVER4,
32333 M_INTEL_COREI7_IVYBRIDGE,
32334 M_INTEL_COREI7_HASWELL
32335 };
32336
32337 static struct _arch_names_table
32338 {
32339 const char *const name;
32340 const enum processor_model model;
32341 }
32342 const arch_names_table[] =
32343 {
32344 {"amd", M_AMD},
32345 {"intel", M_INTEL},
32346 {"atom", M_INTEL_BONNELL},
32347 {"slm", M_INTEL_SILVERMONT},
32348 {"core2", M_INTEL_CORE2},
32349 {"corei7", M_INTEL_COREI7},
32350 {"nehalem", M_INTEL_COREI7_NEHALEM},
32351 {"westmere", M_INTEL_COREI7_WESTMERE},
32352 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
32353 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
32354 {"haswell", M_INTEL_COREI7_HASWELL},
32355 {"bonnell", M_INTEL_BONNELL},
32356 {"silvermont", M_INTEL_SILVERMONT},
32357 {"amdfam10h", M_AMDFAM10H},
32358 {"barcelona", M_AMDFAM10H_BARCELONA},
32359 {"shanghai", M_AMDFAM10H_SHANGHAI},
32360 {"istanbul", M_AMDFAM10H_ISTANBUL},
32361 {"btver1", M_AMD_BTVER1},
32362 {"amdfam15h", M_AMDFAM15H},
32363 {"bdver1", M_AMDFAM15H_BDVER1},
32364 {"bdver2", M_AMDFAM15H_BDVER2},
32365 {"bdver3", M_AMDFAM15H_BDVER3},
32366 {"bdver4", M_AMDFAM15H_BDVER4},
32367 {"btver2", M_AMD_BTVER2},
32368 };
32369
32370 static struct _isa_names_table
32371 {
32372 const char *const name;
32373 const enum processor_features feature;
32374 }
32375 const isa_names_table[] =
32376 {
32377 {"cmov", F_CMOV},
32378 {"mmx", F_MMX},
32379 {"popcnt", F_POPCNT},
32380 {"sse", F_SSE},
32381 {"sse2", F_SSE2},
32382 {"sse3", F_SSE3},
32383 {"ssse3", F_SSSE3},
32384 {"sse4a", F_SSE4_A},
32385 {"sse4.1", F_SSE4_1},
32386 {"sse4.2", F_SSE4_2},
32387 {"avx", F_AVX},
32388 {"fma4", F_FMA4},
32389 {"xop", F_XOP},
32390 {"fma", F_FMA},
32391 {"avx2", F_AVX2}
32392 };
32393
32394 tree __processor_model_type = build_processor_model_struct ();
32395 tree __cpu_model_var = make_var_decl (__processor_model_type,
32396 "__cpu_model");
32397
32398
32399 varpool_add_new_variable (__cpu_model_var);
32400
32401 gcc_assert ((args != NULL) && (*args != NULL));
32402
32403 param_string_cst = *args;
32404 while (param_string_cst
32405 && TREE_CODE (param_string_cst) != STRING_CST)
32406 {
32407 /* *args must be a expr that can contain other EXPRS leading to a
32408 STRING_CST. */
32409 if (!EXPR_P (param_string_cst))
32410 {
32411 error ("Parameter to builtin must be a string constant or literal");
32412 return integer_zero_node;
32413 }
32414 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
32415 }
32416
32417 gcc_assert (param_string_cst);
32418
32419 if (fn_code == IX86_BUILTIN_CPU_IS)
32420 {
32421 tree ref;
32422 tree field;
32423 tree final;
32424
32425 unsigned int field_val = 0;
32426 unsigned int NUM_ARCH_NAMES
32427 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
32428
32429 for (i = 0; i < NUM_ARCH_NAMES; i++)
32430 if (strcmp (arch_names_table[i].name,
32431 TREE_STRING_POINTER (param_string_cst)) == 0)
32432 break;
32433
32434 if (i == NUM_ARCH_NAMES)
32435 {
32436 error ("Parameter to builtin not valid: %s",
32437 TREE_STRING_POINTER (param_string_cst));
32438 return integer_zero_node;
32439 }
32440
32441 field = TYPE_FIELDS (__processor_model_type);
32442 field_val = arch_names_table[i].model;
32443
32444 /* CPU types are stored in the next field. */
32445 if (field_val > M_CPU_TYPE_START
32446 && field_val < M_CPU_SUBTYPE_START)
32447 {
32448 field = DECL_CHAIN (field);
32449 field_val -= M_CPU_TYPE_START;
32450 }
32451
32452 /* CPU subtypes are stored in the next field. */
32453 if (field_val > M_CPU_SUBTYPE_START)
32454 {
32455 field = DECL_CHAIN ( DECL_CHAIN (field));
32456 field_val -= M_CPU_SUBTYPE_START;
32457 }
32458
32459 /* Get the appropriate field in __cpu_model. */
32460 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32461 field, NULL_TREE);
32462
32463 /* Check the value. */
32464 final = build2 (EQ_EXPR, unsigned_type_node, ref,
32465 build_int_cstu (unsigned_type_node, field_val));
32466 return build1 (CONVERT_EXPR, integer_type_node, final);
32467 }
32468 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32469 {
32470 tree ref;
32471 tree array_elt;
32472 tree field;
32473 tree final;
32474
32475 unsigned int field_val = 0;
32476 unsigned int NUM_ISA_NAMES
32477 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
32478
32479 for (i = 0; i < NUM_ISA_NAMES; i++)
32480 if (strcmp (isa_names_table[i].name,
32481 TREE_STRING_POINTER (param_string_cst)) == 0)
32482 break;
32483
32484 if (i == NUM_ISA_NAMES)
32485 {
32486 error ("Parameter to builtin not valid: %s",
32487 TREE_STRING_POINTER (param_string_cst));
32488 return integer_zero_node;
32489 }
32490
32491 field = TYPE_FIELDS (__processor_model_type);
32492 /* Get the last field, which is __cpu_features. */
32493 while (DECL_CHAIN (field))
32494 field = DECL_CHAIN (field);
32495
32496 /* Get the appropriate field: __cpu_model.__cpu_features */
32497 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32498 field, NULL_TREE);
32499
32500 /* Access the 0th element of __cpu_features array. */
32501 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
32502 integer_zero_node, NULL_TREE, NULL_TREE);
32503
32504 field_val = (1 << isa_names_table[i].feature);
32505 /* Return __cpu_model.__cpu_features[0] & field_val */
32506 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
32507 build_int_cstu (unsigned_type_node, field_val));
32508 return build1 (CONVERT_EXPR, integer_type_node, final);
32509 }
32510 gcc_unreachable ();
32511 }
32512
32513 static tree
32514 ix86_fold_builtin (tree fndecl, int n_args,
32515 tree *args, bool ignore ATTRIBUTE_UNUSED)
32516 {
32517 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
32518 {
32519 enum ix86_builtins fn_code = (enum ix86_builtins)
32520 DECL_FUNCTION_CODE (fndecl);
32521 if (fn_code == IX86_BUILTIN_CPU_IS
32522 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32523 {
32524 gcc_assert (n_args == 1);
32525 return fold_builtin_cpu (fndecl, args);
32526 }
32527 }
32528
32529 #ifdef SUBTARGET_FOLD_BUILTIN
32530 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
32531 #endif
32532
32533 return NULL_TREE;
32534 }
32535
32536 /* Make builtins to detect cpu type and features supported. NAME is
32537 the builtin name, CODE is the builtin code, and FTYPE is the function
32538 type of the builtin. */
32539
32540 static void
32541 make_cpu_type_builtin (const char* name, int code,
32542 enum ix86_builtin_func_type ftype, bool is_const)
32543 {
32544 tree decl;
32545 tree type;
32546
32547 type = ix86_get_builtin_func_type (ftype);
32548 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
32549 NULL, NULL_TREE);
32550 gcc_assert (decl != NULL_TREE);
32551 ix86_builtins[(int) code] = decl;
32552 TREE_READONLY (decl) = is_const;
32553 }
32554
32555 /* Make builtins to get CPU type and features supported. The created
32556 builtins are :
32557
32558 __builtin_cpu_init (), to detect cpu type and features,
32559 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
32560 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
32561 */
32562
32563 static void
32564 ix86_init_platform_type_builtins (void)
32565 {
32566 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
32567 INT_FTYPE_VOID, false);
32568 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
32569 INT_FTYPE_PCCHAR, true);
32570 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
32571 INT_FTYPE_PCCHAR, true);
32572 }
32573
32574 /* Internal method for ix86_init_builtins. */
32575
32576 static void
32577 ix86_init_builtins_va_builtins_abi (void)
32578 {
32579 tree ms_va_ref, sysv_va_ref;
32580 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
32581 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
32582 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
32583 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
32584
32585 if (!TARGET_64BIT)
32586 return;
32587 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
32588 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
32589 ms_va_ref = build_reference_type (ms_va_list_type_node);
32590 sysv_va_ref =
32591 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
32592
32593 fnvoid_va_end_ms =
32594 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32595 fnvoid_va_start_ms =
32596 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32597 fnvoid_va_end_sysv =
32598 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
32599 fnvoid_va_start_sysv =
32600 build_varargs_function_type_list (void_type_node, sysv_va_ref,
32601 NULL_TREE);
32602 fnvoid_va_copy_ms =
32603 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
32604 NULL_TREE);
32605 fnvoid_va_copy_sysv =
32606 build_function_type_list (void_type_node, sysv_va_ref,
32607 sysv_va_ref, NULL_TREE);
32608
32609 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
32610 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
32611 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
32612 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
32613 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
32614 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
32615 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
32616 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32617 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
32618 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32619 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
32620 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32621 }
32622
32623 static void
32624 ix86_init_builtin_types (void)
32625 {
32626 tree float128_type_node, float80_type_node;
32627
32628 /* The __float80 type. */
32629 float80_type_node = long_double_type_node;
32630 if (TYPE_MODE (float80_type_node) != XFmode)
32631 {
32632 /* The __float80 type. */
32633 float80_type_node = make_node (REAL_TYPE);
32634
32635 TYPE_PRECISION (float80_type_node) = 80;
32636 layout_type (float80_type_node);
32637 }
32638 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
32639
32640 /* The __float128 type. */
32641 float128_type_node = make_node (REAL_TYPE);
32642 TYPE_PRECISION (float128_type_node) = 128;
32643 layout_type (float128_type_node);
32644 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
32645
32646 /* This macro is built by i386-builtin-types.awk. */
32647 DEFINE_BUILTIN_PRIMITIVE_TYPES;
32648 }
32649
32650 static void
32651 ix86_init_builtins (void)
32652 {
32653 tree t;
32654
32655 ix86_init_builtin_types ();
32656
32657 /* Builtins to get CPU type and features. */
32658 ix86_init_platform_type_builtins ();
32659
32660 /* TFmode support builtins. */
32661 def_builtin_const (0, "__builtin_infq",
32662 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
32663 def_builtin_const (0, "__builtin_huge_valq",
32664 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
32665
32666 /* We will expand them to normal call if SSE isn't available since
32667 they are used by libgcc. */
32668 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
32669 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
32670 BUILT_IN_MD, "__fabstf2", NULL_TREE);
32671 TREE_READONLY (t) = 1;
32672 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
32673
32674 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
32675 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
32676 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
32677 TREE_READONLY (t) = 1;
32678 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
32679
32680 ix86_init_tm_builtins ();
32681 ix86_init_mmx_sse_builtins ();
32682
32683 if (TARGET_LP64)
32684 ix86_init_builtins_va_builtins_abi ();
32685
32686 #ifdef SUBTARGET_INIT_BUILTINS
32687 SUBTARGET_INIT_BUILTINS;
32688 #endif
32689 }
32690
32691 /* Return the ix86 builtin for CODE. */
32692
32693 static tree
32694 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
32695 {
32696 if (code >= IX86_BUILTIN_MAX)
32697 return error_mark_node;
32698
32699 return ix86_builtins[code];
32700 }
32701
32702 /* Errors in the source file can cause expand_expr to return const0_rtx
32703 where we expect a vector. To avoid crashing, use one of the vector
32704 clear instructions. */
32705 static rtx
32706 safe_vector_operand (rtx x, enum machine_mode mode)
32707 {
32708 if (x == const0_rtx)
32709 x = CONST0_RTX (mode);
32710 return x;
32711 }
32712
32713 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
32714
32715 static rtx
32716 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
32717 {
32718 rtx pat;
32719 tree arg0 = CALL_EXPR_ARG (exp, 0);
32720 tree arg1 = CALL_EXPR_ARG (exp, 1);
32721 rtx op0 = expand_normal (arg0);
32722 rtx op1 = expand_normal (arg1);
32723 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32724 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
32725 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
32726
32727 if (VECTOR_MODE_P (mode0))
32728 op0 = safe_vector_operand (op0, mode0);
32729 if (VECTOR_MODE_P (mode1))
32730 op1 = safe_vector_operand (op1, mode1);
32731
32732 if (optimize || !target
32733 || GET_MODE (target) != tmode
32734 || !insn_data[icode].operand[0].predicate (target, tmode))
32735 target = gen_reg_rtx (tmode);
32736
32737 if (GET_MODE (op1) == SImode && mode1 == TImode)
32738 {
32739 rtx x = gen_reg_rtx (V4SImode);
32740 emit_insn (gen_sse2_loadd (x, op1));
32741 op1 = gen_lowpart (TImode, x);
32742 }
32743
32744 if (!insn_data[icode].operand[1].predicate (op0, mode0))
32745 op0 = copy_to_mode_reg (mode0, op0);
32746 if (!insn_data[icode].operand[2].predicate (op1, mode1))
32747 op1 = copy_to_mode_reg (mode1, op1);
32748
32749 pat = GEN_FCN (icode) (target, op0, op1);
32750 if (! pat)
32751 return 0;
32752
32753 emit_insn (pat);
32754
32755 return target;
32756 }
32757
32758 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
32759
32760 static rtx
32761 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
32762 enum ix86_builtin_func_type m_type,
32763 enum rtx_code sub_code)
32764 {
32765 rtx pat;
32766 int i;
32767 int nargs;
32768 bool comparison_p = false;
32769 bool tf_p = false;
32770 bool last_arg_constant = false;
32771 int num_memory = 0;
32772 struct {
32773 rtx op;
32774 enum machine_mode mode;
32775 } args[4];
32776
32777 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32778
32779 switch (m_type)
32780 {
32781 case MULTI_ARG_4_DF2_DI_I:
32782 case MULTI_ARG_4_DF2_DI_I1:
32783 case MULTI_ARG_4_SF2_SI_I:
32784 case MULTI_ARG_4_SF2_SI_I1:
32785 nargs = 4;
32786 last_arg_constant = true;
32787 break;
32788
32789 case MULTI_ARG_3_SF:
32790 case MULTI_ARG_3_DF:
32791 case MULTI_ARG_3_SF2:
32792 case MULTI_ARG_3_DF2:
32793 case MULTI_ARG_3_DI:
32794 case MULTI_ARG_3_SI:
32795 case MULTI_ARG_3_SI_DI:
32796 case MULTI_ARG_3_HI:
32797 case MULTI_ARG_3_HI_SI:
32798 case MULTI_ARG_3_QI:
32799 case MULTI_ARG_3_DI2:
32800 case MULTI_ARG_3_SI2:
32801 case MULTI_ARG_3_HI2:
32802 case MULTI_ARG_3_QI2:
32803 nargs = 3;
32804 break;
32805
32806 case MULTI_ARG_2_SF:
32807 case MULTI_ARG_2_DF:
32808 case MULTI_ARG_2_DI:
32809 case MULTI_ARG_2_SI:
32810 case MULTI_ARG_2_HI:
32811 case MULTI_ARG_2_QI:
32812 nargs = 2;
32813 break;
32814
32815 case MULTI_ARG_2_DI_IMM:
32816 case MULTI_ARG_2_SI_IMM:
32817 case MULTI_ARG_2_HI_IMM:
32818 case MULTI_ARG_2_QI_IMM:
32819 nargs = 2;
32820 last_arg_constant = true;
32821 break;
32822
32823 case MULTI_ARG_1_SF:
32824 case MULTI_ARG_1_DF:
32825 case MULTI_ARG_1_SF2:
32826 case MULTI_ARG_1_DF2:
32827 case MULTI_ARG_1_DI:
32828 case MULTI_ARG_1_SI:
32829 case MULTI_ARG_1_HI:
32830 case MULTI_ARG_1_QI:
32831 case MULTI_ARG_1_SI_DI:
32832 case MULTI_ARG_1_HI_DI:
32833 case MULTI_ARG_1_HI_SI:
32834 case MULTI_ARG_1_QI_DI:
32835 case MULTI_ARG_1_QI_SI:
32836 case MULTI_ARG_1_QI_HI:
32837 nargs = 1;
32838 break;
32839
32840 case MULTI_ARG_2_DI_CMP:
32841 case MULTI_ARG_2_SI_CMP:
32842 case MULTI_ARG_2_HI_CMP:
32843 case MULTI_ARG_2_QI_CMP:
32844 nargs = 2;
32845 comparison_p = true;
32846 break;
32847
32848 case MULTI_ARG_2_SF_TF:
32849 case MULTI_ARG_2_DF_TF:
32850 case MULTI_ARG_2_DI_TF:
32851 case MULTI_ARG_2_SI_TF:
32852 case MULTI_ARG_2_HI_TF:
32853 case MULTI_ARG_2_QI_TF:
32854 nargs = 2;
32855 tf_p = true;
32856 break;
32857
32858 default:
32859 gcc_unreachable ();
32860 }
32861
32862 if (optimize || !target
32863 || GET_MODE (target) != tmode
32864 || !insn_data[icode].operand[0].predicate (target, tmode))
32865 target = gen_reg_rtx (tmode);
32866
32867 gcc_assert (nargs <= 4);
32868
32869 for (i = 0; i < nargs; i++)
32870 {
32871 tree arg = CALL_EXPR_ARG (exp, i);
32872 rtx op = expand_normal (arg);
32873 int adjust = (comparison_p) ? 1 : 0;
32874 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
32875
32876 if (last_arg_constant && i == nargs - 1)
32877 {
32878 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
32879 {
32880 enum insn_code new_icode = icode;
32881 switch (icode)
32882 {
32883 case CODE_FOR_xop_vpermil2v2df3:
32884 case CODE_FOR_xop_vpermil2v4sf3:
32885 case CODE_FOR_xop_vpermil2v4df3:
32886 case CODE_FOR_xop_vpermil2v8sf3:
32887 error ("the last argument must be a 2-bit immediate");
32888 return gen_reg_rtx (tmode);
32889 case CODE_FOR_xop_rotlv2di3:
32890 new_icode = CODE_FOR_rotlv2di3;
32891 goto xop_rotl;
32892 case CODE_FOR_xop_rotlv4si3:
32893 new_icode = CODE_FOR_rotlv4si3;
32894 goto xop_rotl;
32895 case CODE_FOR_xop_rotlv8hi3:
32896 new_icode = CODE_FOR_rotlv8hi3;
32897 goto xop_rotl;
32898 case CODE_FOR_xop_rotlv16qi3:
32899 new_icode = CODE_FOR_rotlv16qi3;
32900 xop_rotl:
32901 if (CONST_INT_P (op))
32902 {
32903 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
32904 op = GEN_INT (INTVAL (op) & mask);
32905 gcc_checking_assert
32906 (insn_data[icode].operand[i + 1].predicate (op, mode));
32907 }
32908 else
32909 {
32910 gcc_checking_assert
32911 (nargs == 2
32912 && insn_data[new_icode].operand[0].mode == tmode
32913 && insn_data[new_icode].operand[1].mode == tmode
32914 && insn_data[new_icode].operand[2].mode == mode
32915 && insn_data[new_icode].operand[0].predicate
32916 == insn_data[icode].operand[0].predicate
32917 && insn_data[new_icode].operand[1].predicate
32918 == insn_data[icode].operand[1].predicate);
32919 icode = new_icode;
32920 goto non_constant;
32921 }
32922 break;
32923 default:
32924 gcc_unreachable ();
32925 }
32926 }
32927 }
32928 else
32929 {
32930 non_constant:
32931 if (VECTOR_MODE_P (mode))
32932 op = safe_vector_operand (op, mode);
32933
32934 /* If we aren't optimizing, only allow one memory operand to be
32935 generated. */
32936 if (memory_operand (op, mode))
32937 num_memory++;
32938
32939 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
32940
32941 if (optimize
32942 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
32943 || num_memory > 1)
32944 op = force_reg (mode, op);
32945 }
32946
32947 args[i].op = op;
32948 args[i].mode = mode;
32949 }
32950
32951 switch (nargs)
32952 {
32953 case 1:
32954 pat = GEN_FCN (icode) (target, args[0].op);
32955 break;
32956
32957 case 2:
32958 if (tf_p)
32959 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
32960 GEN_INT ((int)sub_code));
32961 else if (! comparison_p)
32962 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
32963 else
32964 {
32965 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
32966 args[0].op,
32967 args[1].op);
32968
32969 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
32970 }
32971 break;
32972
32973 case 3:
32974 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
32975 break;
32976
32977 case 4:
32978 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
32979 break;
32980
32981 default:
32982 gcc_unreachable ();
32983 }
32984
32985 if (! pat)
32986 return 0;
32987
32988 emit_insn (pat);
32989 return target;
32990 }
32991
32992 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
32993 insns with vec_merge. */
32994
32995 static rtx
32996 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
32997 rtx target)
32998 {
32999 rtx pat;
33000 tree arg0 = CALL_EXPR_ARG (exp, 0);
33001 rtx op1, op0 = expand_normal (arg0);
33002 enum machine_mode tmode = insn_data[icode].operand[0].mode;
33003 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
33004
33005 if (optimize || !target
33006 || GET_MODE (target) != tmode
33007 || !insn_data[icode].operand[0].predicate (target, tmode))
33008 target = gen_reg_rtx (tmode);
33009
33010 if (VECTOR_MODE_P (mode0))
33011 op0 = safe_vector_operand (op0, mode0);
33012
33013 if ((optimize && !register_operand (op0, mode0))
33014 || !insn_data[icode].operand[1].predicate (op0, mode0))
33015 op0 = copy_to_mode_reg (mode0, op0);
33016
33017 op1 = op0;
33018 if (!insn_data[icode].operand[2].predicate (op1, mode0))
33019 op1 = copy_to_mode_reg (mode0, op1);
33020
33021 pat = GEN_FCN (icode) (target, op0, op1);
33022 if (! pat)
33023 return 0;
33024 emit_insn (pat);
33025 return target;
33026 }
33027
33028 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
33029
33030 static rtx
33031 ix86_expand_sse_compare (const struct builtin_description *d,
33032 tree exp, rtx target, bool swap)
33033 {
33034 rtx pat;
33035 tree arg0 = CALL_EXPR_ARG (exp, 0);
33036 tree arg1 = CALL_EXPR_ARG (exp, 1);
33037 rtx op0 = expand_normal (arg0);
33038 rtx op1 = expand_normal (arg1);
33039 rtx op2;
33040 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33041 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33042 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33043 enum rtx_code comparison = d->comparison;
33044
33045 if (VECTOR_MODE_P (mode0))
33046 op0 = safe_vector_operand (op0, mode0);
33047 if (VECTOR_MODE_P (mode1))
33048 op1 = safe_vector_operand (op1, mode1);
33049
33050 /* Swap operands if we have a comparison that isn't available in
33051 hardware. */
33052 if (swap)
33053 {
33054 rtx tmp = gen_reg_rtx (mode1);
33055 emit_move_insn (tmp, op1);
33056 op1 = op0;
33057 op0 = tmp;
33058 }
33059
33060 if (optimize || !target
33061 || GET_MODE (target) != tmode
33062 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33063 target = gen_reg_rtx (tmode);
33064
33065 if ((optimize && !register_operand (op0, mode0))
33066 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
33067 op0 = copy_to_mode_reg (mode0, op0);
33068 if ((optimize && !register_operand (op1, mode1))
33069 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
33070 op1 = copy_to_mode_reg (mode1, op1);
33071
33072 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
33073 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33074 if (! pat)
33075 return 0;
33076 emit_insn (pat);
33077 return target;
33078 }
33079
33080 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
33081
33082 static rtx
33083 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
33084 rtx target)
33085 {
33086 rtx pat;
33087 tree arg0 = CALL_EXPR_ARG (exp, 0);
33088 tree arg1 = CALL_EXPR_ARG (exp, 1);
33089 rtx op0 = expand_normal (arg0);
33090 rtx op1 = expand_normal (arg1);
33091 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33092 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33093 enum rtx_code comparison = d->comparison;
33094
33095 if (VECTOR_MODE_P (mode0))
33096 op0 = safe_vector_operand (op0, mode0);
33097 if (VECTOR_MODE_P (mode1))
33098 op1 = safe_vector_operand (op1, mode1);
33099
33100 /* Swap operands if we have a comparison that isn't available in
33101 hardware. */
33102 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
33103 {
33104 rtx tmp = op1;
33105 op1 = op0;
33106 op0 = tmp;
33107 }
33108
33109 target = gen_reg_rtx (SImode);
33110 emit_move_insn (target, const0_rtx);
33111 target = gen_rtx_SUBREG (QImode, target, 0);
33112
33113 if ((optimize && !register_operand (op0, mode0))
33114 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33115 op0 = copy_to_mode_reg (mode0, op0);
33116 if ((optimize && !register_operand (op1, mode1))
33117 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33118 op1 = copy_to_mode_reg (mode1, op1);
33119
33120 pat = GEN_FCN (d->icode) (op0, op1);
33121 if (! pat)
33122 return 0;
33123 emit_insn (pat);
33124 emit_insn (gen_rtx_SET (VOIDmode,
33125 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33126 gen_rtx_fmt_ee (comparison, QImode,
33127 SET_DEST (pat),
33128 const0_rtx)));
33129
33130 return SUBREG_REG (target);
33131 }
33132
33133 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
33134
33135 static rtx
33136 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
33137 rtx target)
33138 {
33139 rtx pat;
33140 tree arg0 = CALL_EXPR_ARG (exp, 0);
33141 rtx op1, op0 = expand_normal (arg0);
33142 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33143 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33144
33145 if (optimize || target == 0
33146 || GET_MODE (target) != tmode
33147 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33148 target = gen_reg_rtx (tmode);
33149
33150 if (VECTOR_MODE_P (mode0))
33151 op0 = safe_vector_operand (op0, mode0);
33152
33153 if ((optimize && !register_operand (op0, mode0))
33154 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33155 op0 = copy_to_mode_reg (mode0, op0);
33156
33157 op1 = GEN_INT (d->comparison);
33158
33159 pat = GEN_FCN (d->icode) (target, op0, op1);
33160 if (! pat)
33161 return 0;
33162 emit_insn (pat);
33163 return target;
33164 }
33165
33166 static rtx
33167 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
33168 tree exp, rtx target)
33169 {
33170 rtx pat;
33171 tree arg0 = CALL_EXPR_ARG (exp, 0);
33172 tree arg1 = CALL_EXPR_ARG (exp, 1);
33173 rtx op0 = expand_normal (arg0);
33174 rtx op1 = expand_normal (arg1);
33175 rtx op2;
33176 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33177 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33178 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33179
33180 if (optimize || target == 0
33181 || GET_MODE (target) != tmode
33182 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33183 target = gen_reg_rtx (tmode);
33184
33185 op0 = safe_vector_operand (op0, mode0);
33186 op1 = safe_vector_operand (op1, mode1);
33187
33188 if ((optimize && !register_operand (op0, mode0))
33189 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33190 op0 = copy_to_mode_reg (mode0, op0);
33191 if ((optimize && !register_operand (op1, mode1))
33192 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33193 op1 = copy_to_mode_reg (mode1, op1);
33194
33195 op2 = GEN_INT (d->comparison);
33196
33197 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33198 if (! pat)
33199 return 0;
33200 emit_insn (pat);
33201 return target;
33202 }
33203
33204 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
33205
33206 static rtx
33207 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
33208 rtx target)
33209 {
33210 rtx pat;
33211 tree arg0 = CALL_EXPR_ARG (exp, 0);
33212 tree arg1 = CALL_EXPR_ARG (exp, 1);
33213 rtx op0 = expand_normal (arg0);
33214 rtx op1 = expand_normal (arg1);
33215 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33216 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33217 enum rtx_code comparison = d->comparison;
33218
33219 if (VECTOR_MODE_P (mode0))
33220 op0 = safe_vector_operand (op0, mode0);
33221 if (VECTOR_MODE_P (mode1))
33222 op1 = safe_vector_operand (op1, mode1);
33223
33224 target = gen_reg_rtx (SImode);
33225 emit_move_insn (target, const0_rtx);
33226 target = gen_rtx_SUBREG (QImode, target, 0);
33227
33228 if ((optimize && !register_operand (op0, mode0))
33229 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33230 op0 = copy_to_mode_reg (mode0, op0);
33231 if ((optimize && !register_operand (op1, mode1))
33232 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33233 op1 = copy_to_mode_reg (mode1, op1);
33234
33235 pat = GEN_FCN (d->icode) (op0, op1);
33236 if (! pat)
33237 return 0;
33238 emit_insn (pat);
33239 emit_insn (gen_rtx_SET (VOIDmode,
33240 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33241 gen_rtx_fmt_ee (comparison, QImode,
33242 SET_DEST (pat),
33243 const0_rtx)));
33244
33245 return SUBREG_REG (target);
33246 }
33247
33248 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
33249
33250 static rtx
33251 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
33252 tree exp, rtx target)
33253 {
33254 rtx pat;
33255 tree arg0 = CALL_EXPR_ARG (exp, 0);
33256 tree arg1 = CALL_EXPR_ARG (exp, 1);
33257 tree arg2 = CALL_EXPR_ARG (exp, 2);
33258 tree arg3 = CALL_EXPR_ARG (exp, 3);
33259 tree arg4 = CALL_EXPR_ARG (exp, 4);
33260 rtx scratch0, scratch1;
33261 rtx op0 = expand_normal (arg0);
33262 rtx op1 = expand_normal (arg1);
33263 rtx op2 = expand_normal (arg2);
33264 rtx op3 = expand_normal (arg3);
33265 rtx op4 = expand_normal (arg4);
33266 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
33267
33268 tmode0 = insn_data[d->icode].operand[0].mode;
33269 tmode1 = insn_data[d->icode].operand[1].mode;
33270 modev2 = insn_data[d->icode].operand[2].mode;
33271 modei3 = insn_data[d->icode].operand[3].mode;
33272 modev4 = insn_data[d->icode].operand[4].mode;
33273 modei5 = insn_data[d->icode].operand[5].mode;
33274 modeimm = insn_data[d->icode].operand[6].mode;
33275
33276 if (VECTOR_MODE_P (modev2))
33277 op0 = safe_vector_operand (op0, modev2);
33278 if (VECTOR_MODE_P (modev4))
33279 op2 = safe_vector_operand (op2, modev4);
33280
33281 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33282 op0 = copy_to_mode_reg (modev2, op0);
33283 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
33284 op1 = copy_to_mode_reg (modei3, op1);
33285 if ((optimize && !register_operand (op2, modev4))
33286 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
33287 op2 = copy_to_mode_reg (modev4, op2);
33288 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
33289 op3 = copy_to_mode_reg (modei5, op3);
33290
33291 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
33292 {
33293 error ("the fifth argument must be an 8-bit immediate");
33294 return const0_rtx;
33295 }
33296
33297 if (d->code == IX86_BUILTIN_PCMPESTRI128)
33298 {
33299 if (optimize || !target
33300 || GET_MODE (target) != tmode0
33301 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33302 target = gen_reg_rtx (tmode0);
33303
33304 scratch1 = gen_reg_rtx (tmode1);
33305
33306 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
33307 }
33308 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
33309 {
33310 if (optimize || !target
33311 || GET_MODE (target) != tmode1
33312 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33313 target = gen_reg_rtx (tmode1);
33314
33315 scratch0 = gen_reg_rtx (tmode0);
33316
33317 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
33318 }
33319 else
33320 {
33321 gcc_assert (d->flag);
33322
33323 scratch0 = gen_reg_rtx (tmode0);
33324 scratch1 = gen_reg_rtx (tmode1);
33325
33326 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
33327 }
33328
33329 if (! pat)
33330 return 0;
33331
33332 emit_insn (pat);
33333
33334 if (d->flag)
33335 {
33336 target = gen_reg_rtx (SImode);
33337 emit_move_insn (target, const0_rtx);
33338 target = gen_rtx_SUBREG (QImode, target, 0);
33339
33340 emit_insn
33341 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33342 gen_rtx_fmt_ee (EQ, QImode,
33343 gen_rtx_REG ((enum machine_mode) d->flag,
33344 FLAGS_REG),
33345 const0_rtx)));
33346 return SUBREG_REG (target);
33347 }
33348 else
33349 return target;
33350 }
33351
33352
33353 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
33354
33355 static rtx
33356 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
33357 tree exp, rtx target)
33358 {
33359 rtx pat;
33360 tree arg0 = CALL_EXPR_ARG (exp, 0);
33361 tree arg1 = CALL_EXPR_ARG (exp, 1);
33362 tree arg2 = CALL_EXPR_ARG (exp, 2);
33363 rtx scratch0, scratch1;
33364 rtx op0 = expand_normal (arg0);
33365 rtx op1 = expand_normal (arg1);
33366 rtx op2 = expand_normal (arg2);
33367 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
33368
33369 tmode0 = insn_data[d->icode].operand[0].mode;
33370 tmode1 = insn_data[d->icode].operand[1].mode;
33371 modev2 = insn_data[d->icode].operand[2].mode;
33372 modev3 = insn_data[d->icode].operand[3].mode;
33373 modeimm = insn_data[d->icode].operand[4].mode;
33374
33375 if (VECTOR_MODE_P (modev2))
33376 op0 = safe_vector_operand (op0, modev2);
33377 if (VECTOR_MODE_P (modev3))
33378 op1 = safe_vector_operand (op1, modev3);
33379
33380 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33381 op0 = copy_to_mode_reg (modev2, op0);
33382 if ((optimize && !register_operand (op1, modev3))
33383 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
33384 op1 = copy_to_mode_reg (modev3, op1);
33385
33386 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
33387 {
33388 error ("the third argument must be an 8-bit immediate");
33389 return const0_rtx;
33390 }
33391
33392 if (d->code == IX86_BUILTIN_PCMPISTRI128)
33393 {
33394 if (optimize || !target
33395 || GET_MODE (target) != tmode0
33396 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33397 target = gen_reg_rtx (tmode0);
33398
33399 scratch1 = gen_reg_rtx (tmode1);
33400
33401 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
33402 }
33403 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
33404 {
33405 if (optimize || !target
33406 || GET_MODE (target) != tmode1
33407 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33408 target = gen_reg_rtx (tmode1);
33409
33410 scratch0 = gen_reg_rtx (tmode0);
33411
33412 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
33413 }
33414 else
33415 {
33416 gcc_assert (d->flag);
33417
33418 scratch0 = gen_reg_rtx (tmode0);
33419 scratch1 = gen_reg_rtx (tmode1);
33420
33421 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
33422 }
33423
33424 if (! pat)
33425 return 0;
33426
33427 emit_insn (pat);
33428
33429 if (d->flag)
33430 {
33431 target = gen_reg_rtx (SImode);
33432 emit_move_insn (target, const0_rtx);
33433 target = gen_rtx_SUBREG (QImode, target, 0);
33434
33435 emit_insn
33436 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33437 gen_rtx_fmt_ee (EQ, QImode,
33438 gen_rtx_REG ((enum machine_mode) d->flag,
33439 FLAGS_REG),
33440 const0_rtx)));
33441 return SUBREG_REG (target);
33442 }
33443 else
33444 return target;
33445 }
33446
33447 /* Subroutine of ix86_expand_builtin to take care of insns with
33448 variable number of operands. */
33449
33450 static rtx
33451 ix86_expand_args_builtin (const struct builtin_description *d,
33452 tree exp, rtx target)
33453 {
33454 rtx pat, real_target;
33455 unsigned int i, nargs;
33456 unsigned int nargs_constant = 0;
33457 unsigned int mask_pos = 0;
33458 int num_memory = 0;
33459 struct
33460 {
33461 rtx op;
33462 enum machine_mode mode;
33463 } args[6];
33464 bool last_arg_count = false;
33465 enum insn_code icode = d->icode;
33466 const struct insn_data_d *insn_p = &insn_data[icode];
33467 enum machine_mode tmode = insn_p->operand[0].mode;
33468 enum machine_mode rmode = VOIDmode;
33469 bool swap = false;
33470 enum rtx_code comparison = d->comparison;
33471
33472 switch ((enum ix86_builtin_func_type) d->flag)
33473 {
33474 case V2DF_FTYPE_V2DF_ROUND:
33475 case V4DF_FTYPE_V4DF_ROUND:
33476 case V4SF_FTYPE_V4SF_ROUND:
33477 case V8SF_FTYPE_V8SF_ROUND:
33478 case V4SI_FTYPE_V4SF_ROUND:
33479 case V8SI_FTYPE_V8SF_ROUND:
33480 return ix86_expand_sse_round (d, exp, target);
33481 case V4SI_FTYPE_V2DF_V2DF_ROUND:
33482 case V8SI_FTYPE_V4DF_V4DF_ROUND:
33483 case V16SI_FTYPE_V8DF_V8DF_ROUND:
33484 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
33485 case INT_FTYPE_V8SF_V8SF_PTEST:
33486 case INT_FTYPE_V4DI_V4DI_PTEST:
33487 case INT_FTYPE_V4DF_V4DF_PTEST:
33488 case INT_FTYPE_V4SF_V4SF_PTEST:
33489 case INT_FTYPE_V2DI_V2DI_PTEST:
33490 case INT_FTYPE_V2DF_V2DF_PTEST:
33491 return ix86_expand_sse_ptest (d, exp, target);
33492 case FLOAT128_FTYPE_FLOAT128:
33493 case FLOAT_FTYPE_FLOAT:
33494 case INT_FTYPE_INT:
33495 case UINT64_FTYPE_INT:
33496 case UINT16_FTYPE_UINT16:
33497 case INT64_FTYPE_INT64:
33498 case INT64_FTYPE_V4SF:
33499 case INT64_FTYPE_V2DF:
33500 case INT_FTYPE_V16QI:
33501 case INT_FTYPE_V8QI:
33502 case INT_FTYPE_V8SF:
33503 case INT_FTYPE_V4DF:
33504 case INT_FTYPE_V4SF:
33505 case INT_FTYPE_V2DF:
33506 case INT_FTYPE_V32QI:
33507 case V16QI_FTYPE_V16QI:
33508 case V8SI_FTYPE_V8SF:
33509 case V8SI_FTYPE_V4SI:
33510 case V8HI_FTYPE_V8HI:
33511 case V8HI_FTYPE_V16QI:
33512 case V8QI_FTYPE_V8QI:
33513 case V8SF_FTYPE_V8SF:
33514 case V8SF_FTYPE_V8SI:
33515 case V8SF_FTYPE_V4SF:
33516 case V8SF_FTYPE_V8HI:
33517 case V4SI_FTYPE_V4SI:
33518 case V4SI_FTYPE_V16QI:
33519 case V4SI_FTYPE_V4SF:
33520 case V4SI_FTYPE_V8SI:
33521 case V4SI_FTYPE_V8HI:
33522 case V4SI_FTYPE_V4DF:
33523 case V4SI_FTYPE_V2DF:
33524 case V4HI_FTYPE_V4HI:
33525 case V4DF_FTYPE_V4DF:
33526 case V4DF_FTYPE_V4SI:
33527 case V4DF_FTYPE_V4SF:
33528 case V4DF_FTYPE_V2DF:
33529 case V4SF_FTYPE_V4SF:
33530 case V4SF_FTYPE_V4SI:
33531 case V4SF_FTYPE_V8SF:
33532 case V4SF_FTYPE_V4DF:
33533 case V4SF_FTYPE_V8HI:
33534 case V4SF_FTYPE_V2DF:
33535 case V2DI_FTYPE_V2DI:
33536 case V2DI_FTYPE_V16QI:
33537 case V2DI_FTYPE_V8HI:
33538 case V2DI_FTYPE_V4SI:
33539 case V2DF_FTYPE_V2DF:
33540 case V2DF_FTYPE_V4SI:
33541 case V2DF_FTYPE_V4DF:
33542 case V2DF_FTYPE_V4SF:
33543 case V2DF_FTYPE_V2SI:
33544 case V2SI_FTYPE_V2SI:
33545 case V2SI_FTYPE_V4SF:
33546 case V2SI_FTYPE_V2SF:
33547 case V2SI_FTYPE_V2DF:
33548 case V2SF_FTYPE_V2SF:
33549 case V2SF_FTYPE_V2SI:
33550 case V32QI_FTYPE_V32QI:
33551 case V32QI_FTYPE_V16QI:
33552 case V16HI_FTYPE_V16HI:
33553 case V16HI_FTYPE_V8HI:
33554 case V8SI_FTYPE_V8SI:
33555 case V16HI_FTYPE_V16QI:
33556 case V8SI_FTYPE_V16QI:
33557 case V4DI_FTYPE_V16QI:
33558 case V8SI_FTYPE_V8HI:
33559 case V4DI_FTYPE_V8HI:
33560 case V4DI_FTYPE_V4SI:
33561 case V4DI_FTYPE_V2DI:
33562 case HI_FTYPE_HI:
33563 case UINT_FTYPE_V2DF:
33564 case UINT_FTYPE_V4SF:
33565 case UINT64_FTYPE_V2DF:
33566 case UINT64_FTYPE_V4SF:
33567 case V16QI_FTYPE_V8DI:
33568 case V16HI_FTYPE_V16SI:
33569 case V16SI_FTYPE_HI:
33570 case V16SI_FTYPE_V16SI:
33571 case V16SI_FTYPE_INT:
33572 case V16SF_FTYPE_FLOAT:
33573 case V16SF_FTYPE_V4SF:
33574 case V16SF_FTYPE_V16SF:
33575 case V8HI_FTYPE_V8DI:
33576 case V8UHI_FTYPE_V8UHI:
33577 case V8SI_FTYPE_V8DI:
33578 case V8USI_FTYPE_V8USI:
33579 case V8SF_FTYPE_V8DF:
33580 case V8DI_FTYPE_QI:
33581 case V8DI_FTYPE_INT64:
33582 case V8DI_FTYPE_V4DI:
33583 case V8DI_FTYPE_V8DI:
33584 case V8DF_FTYPE_DOUBLE:
33585 case V8DF_FTYPE_V4DF:
33586 case V8DF_FTYPE_V8DF:
33587 case V8DF_FTYPE_V8SI:
33588 nargs = 1;
33589 break;
33590 case V4SF_FTYPE_V4SF_VEC_MERGE:
33591 case V2DF_FTYPE_V2DF_VEC_MERGE:
33592 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
33593 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
33594 case V16QI_FTYPE_V16QI_V16QI:
33595 case V16QI_FTYPE_V8HI_V8HI:
33596 case V16SI_FTYPE_V16SI_V16SI:
33597 case V16SF_FTYPE_V16SF_V16SF:
33598 case V16SF_FTYPE_V16SF_V16SI:
33599 case V8QI_FTYPE_V8QI_V8QI:
33600 case V8QI_FTYPE_V4HI_V4HI:
33601 case V8HI_FTYPE_V8HI_V8HI:
33602 case V8HI_FTYPE_V16QI_V16QI:
33603 case V8HI_FTYPE_V4SI_V4SI:
33604 case V8SF_FTYPE_V8SF_V8SF:
33605 case V8SF_FTYPE_V8SF_V8SI:
33606 case V8DI_FTYPE_V8DI_V8DI:
33607 case V8DF_FTYPE_V8DF_V8DF:
33608 case V8DF_FTYPE_V8DF_V8DI:
33609 case V4SI_FTYPE_V4SI_V4SI:
33610 case V4SI_FTYPE_V8HI_V8HI:
33611 case V4SI_FTYPE_V4SF_V4SF:
33612 case V4SI_FTYPE_V2DF_V2DF:
33613 case V4HI_FTYPE_V4HI_V4HI:
33614 case V4HI_FTYPE_V8QI_V8QI:
33615 case V4HI_FTYPE_V2SI_V2SI:
33616 case V4DF_FTYPE_V4DF_V4DF:
33617 case V4DF_FTYPE_V4DF_V4DI:
33618 case V4SF_FTYPE_V4SF_V4SF:
33619 case V4SF_FTYPE_V4SF_V4SI:
33620 case V4SF_FTYPE_V4SF_V2SI:
33621 case V4SF_FTYPE_V4SF_V2DF:
33622 case V4SF_FTYPE_V4SF_UINT:
33623 case V4SF_FTYPE_V4SF_UINT64:
33624 case V4SF_FTYPE_V4SF_DI:
33625 case V4SF_FTYPE_V4SF_SI:
33626 case V2DI_FTYPE_V2DI_V2DI:
33627 case V2DI_FTYPE_V16QI_V16QI:
33628 case V2DI_FTYPE_V4SI_V4SI:
33629 case V2UDI_FTYPE_V4USI_V4USI:
33630 case V2DI_FTYPE_V2DI_V16QI:
33631 case V2DI_FTYPE_V2DF_V2DF:
33632 case V2SI_FTYPE_V2SI_V2SI:
33633 case V2SI_FTYPE_V4HI_V4HI:
33634 case V2SI_FTYPE_V2SF_V2SF:
33635 case V2DF_FTYPE_V2DF_V2DF:
33636 case V2DF_FTYPE_V2DF_V4SF:
33637 case V2DF_FTYPE_V2DF_V2DI:
33638 case V2DF_FTYPE_V2DF_DI:
33639 case V2DF_FTYPE_V2DF_SI:
33640 case V2DF_FTYPE_V2DF_UINT:
33641 case V2DF_FTYPE_V2DF_UINT64:
33642 case V2SF_FTYPE_V2SF_V2SF:
33643 case V1DI_FTYPE_V1DI_V1DI:
33644 case V1DI_FTYPE_V8QI_V8QI:
33645 case V1DI_FTYPE_V2SI_V2SI:
33646 case V32QI_FTYPE_V16HI_V16HI:
33647 case V16HI_FTYPE_V8SI_V8SI:
33648 case V32QI_FTYPE_V32QI_V32QI:
33649 case V16HI_FTYPE_V32QI_V32QI:
33650 case V16HI_FTYPE_V16HI_V16HI:
33651 case V8SI_FTYPE_V4DF_V4DF:
33652 case V8SI_FTYPE_V8SI_V8SI:
33653 case V8SI_FTYPE_V16HI_V16HI:
33654 case V4DI_FTYPE_V4DI_V4DI:
33655 case V4DI_FTYPE_V8SI_V8SI:
33656 case V4UDI_FTYPE_V8USI_V8USI:
33657 case QI_FTYPE_V8DI_V8DI:
33658 case HI_FTYPE_V16SI_V16SI:
33659 if (comparison == UNKNOWN)
33660 return ix86_expand_binop_builtin (icode, exp, target);
33661 nargs = 2;
33662 break;
33663 case V4SF_FTYPE_V4SF_V4SF_SWAP:
33664 case V2DF_FTYPE_V2DF_V2DF_SWAP:
33665 gcc_assert (comparison != UNKNOWN);
33666 nargs = 2;
33667 swap = true;
33668 break;
33669 case V16HI_FTYPE_V16HI_V8HI_COUNT:
33670 case V16HI_FTYPE_V16HI_SI_COUNT:
33671 case V8SI_FTYPE_V8SI_V4SI_COUNT:
33672 case V8SI_FTYPE_V8SI_SI_COUNT:
33673 case V4DI_FTYPE_V4DI_V2DI_COUNT:
33674 case V4DI_FTYPE_V4DI_INT_COUNT:
33675 case V8HI_FTYPE_V8HI_V8HI_COUNT:
33676 case V8HI_FTYPE_V8HI_SI_COUNT:
33677 case V4SI_FTYPE_V4SI_V4SI_COUNT:
33678 case V4SI_FTYPE_V4SI_SI_COUNT:
33679 case V4HI_FTYPE_V4HI_V4HI_COUNT:
33680 case V4HI_FTYPE_V4HI_SI_COUNT:
33681 case V2DI_FTYPE_V2DI_V2DI_COUNT:
33682 case V2DI_FTYPE_V2DI_SI_COUNT:
33683 case V2SI_FTYPE_V2SI_V2SI_COUNT:
33684 case V2SI_FTYPE_V2SI_SI_COUNT:
33685 case V1DI_FTYPE_V1DI_V1DI_COUNT:
33686 case V1DI_FTYPE_V1DI_SI_COUNT:
33687 nargs = 2;
33688 last_arg_count = true;
33689 break;
33690 case UINT64_FTYPE_UINT64_UINT64:
33691 case UINT_FTYPE_UINT_UINT:
33692 case UINT_FTYPE_UINT_USHORT:
33693 case UINT_FTYPE_UINT_UCHAR:
33694 case UINT16_FTYPE_UINT16_INT:
33695 case UINT8_FTYPE_UINT8_INT:
33696 case HI_FTYPE_HI_HI:
33697 case V16SI_FTYPE_V8DF_V8DF:
33698 nargs = 2;
33699 break;
33700 case V2DI_FTYPE_V2DI_INT_CONVERT:
33701 nargs = 2;
33702 rmode = V1TImode;
33703 nargs_constant = 1;
33704 break;
33705 case V4DI_FTYPE_V4DI_INT_CONVERT:
33706 nargs = 2;
33707 rmode = V2TImode;
33708 nargs_constant = 1;
33709 break;
33710 case V8HI_FTYPE_V8HI_INT:
33711 case V8HI_FTYPE_V8SF_INT:
33712 case V16HI_FTYPE_V16SF_INT:
33713 case V8HI_FTYPE_V4SF_INT:
33714 case V8SF_FTYPE_V8SF_INT:
33715 case V4SF_FTYPE_V16SF_INT:
33716 case V16SF_FTYPE_V16SF_INT:
33717 case V4SI_FTYPE_V4SI_INT:
33718 case V4SI_FTYPE_V8SI_INT:
33719 case V4HI_FTYPE_V4HI_INT:
33720 case V4DF_FTYPE_V4DF_INT:
33721 case V4DF_FTYPE_V8DF_INT:
33722 case V4SF_FTYPE_V4SF_INT:
33723 case V4SF_FTYPE_V8SF_INT:
33724 case V2DI_FTYPE_V2DI_INT:
33725 case V2DF_FTYPE_V2DF_INT:
33726 case V2DF_FTYPE_V4DF_INT:
33727 case V16HI_FTYPE_V16HI_INT:
33728 case V8SI_FTYPE_V8SI_INT:
33729 case V16SI_FTYPE_V16SI_INT:
33730 case V4SI_FTYPE_V16SI_INT:
33731 case V4DI_FTYPE_V4DI_INT:
33732 case V2DI_FTYPE_V4DI_INT:
33733 case V4DI_FTYPE_V8DI_INT:
33734 case HI_FTYPE_HI_INT:
33735 nargs = 2;
33736 nargs_constant = 1;
33737 break;
33738 case V16QI_FTYPE_V16QI_V16QI_V16QI:
33739 case V8SF_FTYPE_V8SF_V8SF_V8SF:
33740 case V4DF_FTYPE_V4DF_V4DF_V4DF:
33741 case V4SF_FTYPE_V4SF_V4SF_V4SF:
33742 case V2DF_FTYPE_V2DF_V2DF_V2DF:
33743 case V32QI_FTYPE_V32QI_V32QI_V32QI:
33744 case HI_FTYPE_V16SI_V16SI_HI:
33745 case QI_FTYPE_V8DI_V8DI_QI:
33746 case V16HI_FTYPE_V16SI_V16HI_HI:
33747 case V16QI_FTYPE_V16SI_V16QI_HI:
33748 case V16QI_FTYPE_V8DI_V16QI_QI:
33749 case V16SF_FTYPE_V16SF_V16SF_HI:
33750 case V16SF_FTYPE_V16SF_V16SF_V16SF:
33751 case V16SF_FTYPE_V16SF_V16SI_V16SF:
33752 case V16SF_FTYPE_V16SI_V16SF_HI:
33753 case V16SF_FTYPE_V16SI_V16SF_V16SF:
33754 case V16SF_FTYPE_V4SF_V16SF_HI:
33755 case V16SI_FTYPE_SI_V16SI_HI:
33756 case V16SI_FTYPE_V16HI_V16SI_HI:
33757 case V16SI_FTYPE_V16QI_V16SI_HI:
33758 case V16SI_FTYPE_V16SF_V16SI_HI:
33759 case V16SI_FTYPE_V16SI_V16SI_HI:
33760 case V16SI_FTYPE_V16SI_V16SI_V16SI:
33761 case V16SI_FTYPE_V4SI_V16SI_HI:
33762 case V2DI_FTYPE_V2DI_V2DI_V2DI:
33763 case V4DI_FTYPE_V4DI_V4DI_V4DI:
33764 case V8DF_FTYPE_V2DF_V8DF_QI:
33765 case V8DF_FTYPE_V4DF_V8DF_QI:
33766 case V8DF_FTYPE_V8DF_V8DF_QI:
33767 case V8DF_FTYPE_V8DF_V8DF_V8DF:
33768 case V8DF_FTYPE_V8DF_V8DI_V8DF:
33769 case V8DF_FTYPE_V8DI_V8DF_V8DF:
33770 case V8DF_FTYPE_V8SF_V8DF_QI:
33771 case V8DF_FTYPE_V8SI_V8DF_QI:
33772 case V8DI_FTYPE_DI_V8DI_QI:
33773 case V8DI_FTYPE_V16QI_V8DI_QI:
33774 case V8DI_FTYPE_V2DI_V8DI_QI:
33775 case V8DI_FTYPE_V4DI_V8DI_QI:
33776 case V8DI_FTYPE_V8DI_V8DI_QI:
33777 case V8DI_FTYPE_V8DI_V8DI_V8DI:
33778 case V8DI_FTYPE_V8HI_V8DI_QI:
33779 case V8DI_FTYPE_V8SI_V8DI_QI:
33780 case V8HI_FTYPE_V8DI_V8HI_QI:
33781 case V8SF_FTYPE_V8DF_V8SF_QI:
33782 case V8SI_FTYPE_V8DF_V8SI_QI:
33783 case V8SI_FTYPE_V8DI_V8SI_QI:
33784 case V4SI_FTYPE_V4SI_V4SI_V4SI:
33785 nargs = 3;
33786 break;
33787 case V32QI_FTYPE_V32QI_V32QI_INT:
33788 case V16HI_FTYPE_V16HI_V16HI_INT:
33789 case V16QI_FTYPE_V16QI_V16QI_INT:
33790 case V4DI_FTYPE_V4DI_V4DI_INT:
33791 case V8HI_FTYPE_V8HI_V8HI_INT:
33792 case V8SI_FTYPE_V8SI_V8SI_INT:
33793 case V8SI_FTYPE_V8SI_V4SI_INT:
33794 case V8SF_FTYPE_V8SF_V8SF_INT:
33795 case V8SF_FTYPE_V8SF_V4SF_INT:
33796 case V4SI_FTYPE_V4SI_V4SI_INT:
33797 case V4DF_FTYPE_V4DF_V4DF_INT:
33798 case V16SF_FTYPE_V16SF_V16SF_INT:
33799 case V16SF_FTYPE_V16SF_V4SF_INT:
33800 case V16SI_FTYPE_V16SI_V4SI_INT:
33801 case V4DF_FTYPE_V4DF_V2DF_INT:
33802 case V4SF_FTYPE_V4SF_V4SF_INT:
33803 case V2DI_FTYPE_V2DI_V2DI_INT:
33804 case V4DI_FTYPE_V4DI_V2DI_INT:
33805 case V2DF_FTYPE_V2DF_V2DF_INT:
33806 case QI_FTYPE_V8DI_V8DI_INT:
33807 case QI_FTYPE_V8DF_V8DF_INT:
33808 case QI_FTYPE_V2DF_V2DF_INT:
33809 case QI_FTYPE_V4SF_V4SF_INT:
33810 case HI_FTYPE_V16SI_V16SI_INT:
33811 case HI_FTYPE_V16SF_V16SF_INT:
33812 nargs = 3;
33813 nargs_constant = 1;
33814 break;
33815 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
33816 nargs = 3;
33817 rmode = V4DImode;
33818 nargs_constant = 1;
33819 break;
33820 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
33821 nargs = 3;
33822 rmode = V2DImode;
33823 nargs_constant = 1;
33824 break;
33825 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
33826 nargs = 3;
33827 rmode = DImode;
33828 nargs_constant = 1;
33829 break;
33830 case V2DI_FTYPE_V2DI_UINT_UINT:
33831 nargs = 3;
33832 nargs_constant = 2;
33833 break;
33834 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
33835 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
33836 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
33837 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
33838 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
33839 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
33840 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
33841 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
33842 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
33843 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
33844 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
33845 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
33846 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
33847 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
33848 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
33849 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
33850 nargs = 4;
33851 break;
33852 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
33853 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
33854 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
33855 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
33856 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
33857 nargs = 4;
33858 nargs_constant = 1;
33859 break;
33860 case QI_FTYPE_V2DF_V2DF_INT_QI:
33861 case QI_FTYPE_V4SF_V4SF_INT_QI:
33862 nargs = 4;
33863 mask_pos = 1;
33864 nargs_constant = 1;
33865 break;
33866 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
33867 nargs = 4;
33868 nargs_constant = 2;
33869 break;
33870 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
33871 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
33872 nargs = 4;
33873 break;
33874 case QI_FTYPE_V8DI_V8DI_INT_QI:
33875 case HI_FTYPE_V16SI_V16SI_INT_HI:
33876 case QI_FTYPE_V8DF_V8DF_INT_QI:
33877 case HI_FTYPE_V16SF_V16SF_INT_HI:
33878 mask_pos = 1;
33879 nargs = 4;
33880 nargs_constant = 1;
33881 break;
33882 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
33883 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
33884 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
33885 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
33886 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
33887 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
33888 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
33889 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
33890 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
33891 nargs = 4;
33892 mask_pos = 2;
33893 nargs_constant = 1;
33894 break;
33895 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
33896 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
33897 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
33898 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
33899 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
33900 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
33901 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
33902 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
33903 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
33904 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
33905 nargs = 5;
33906 mask_pos = 2;
33907 nargs_constant = 1;
33908 break;
33909 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
33910 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
33911 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
33912 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
33913 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
33914 nargs = 5;
33915 mask_pos = 1;
33916 nargs_constant = 1;
33917 break;
33918
33919 default:
33920 gcc_unreachable ();
33921 }
33922
33923 gcc_assert (nargs <= ARRAY_SIZE (args));
33924
33925 if (comparison != UNKNOWN)
33926 {
33927 gcc_assert (nargs == 2);
33928 return ix86_expand_sse_compare (d, exp, target, swap);
33929 }
33930
33931 if (rmode == VOIDmode || rmode == tmode)
33932 {
33933 if (optimize
33934 || target == 0
33935 || GET_MODE (target) != tmode
33936 || !insn_p->operand[0].predicate (target, tmode))
33937 target = gen_reg_rtx (tmode);
33938 real_target = target;
33939 }
33940 else
33941 {
33942 real_target = gen_reg_rtx (tmode);
33943 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
33944 }
33945
33946 for (i = 0; i < nargs; i++)
33947 {
33948 tree arg = CALL_EXPR_ARG (exp, i);
33949 rtx op = expand_normal (arg);
33950 enum machine_mode mode = insn_p->operand[i + 1].mode;
33951 bool match = insn_p->operand[i + 1].predicate (op, mode);
33952
33953 if (last_arg_count && (i + 1) == nargs)
33954 {
33955 /* SIMD shift insns take either an 8-bit immediate or
33956 register as count. But builtin functions take int as
33957 count. If count doesn't match, we put it in register. */
33958 if (!match)
33959 {
33960 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
33961 if (!insn_p->operand[i + 1].predicate (op, mode))
33962 op = copy_to_reg (op);
33963 }
33964 }
33965 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
33966 (!mask_pos && (nargs - i) <= nargs_constant))
33967 {
33968 if (!match)
33969 switch (icode)
33970 {
33971 case CODE_FOR_avx2_inserti128:
33972 case CODE_FOR_avx2_extracti128:
33973 error ("the last argument must be an 1-bit immediate");
33974 return const0_rtx;
33975
33976 case CODE_FOR_avx512f_cmpv8di3_mask:
33977 case CODE_FOR_avx512f_cmpv16si3_mask:
33978 case CODE_FOR_avx512f_ucmpv8di3_mask:
33979 case CODE_FOR_avx512f_ucmpv16si3_mask:
33980 error ("the last argument must be a 3-bit immediate");
33981 return const0_rtx;
33982
33983 case CODE_FOR_sse4_1_roundsd:
33984 case CODE_FOR_sse4_1_roundss:
33985
33986 case CODE_FOR_sse4_1_roundpd:
33987 case CODE_FOR_sse4_1_roundps:
33988 case CODE_FOR_avx_roundpd256:
33989 case CODE_FOR_avx_roundps256:
33990
33991 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
33992 case CODE_FOR_sse4_1_roundps_sfix:
33993 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
33994 case CODE_FOR_avx_roundps_sfix256:
33995
33996 case CODE_FOR_sse4_1_blendps:
33997 case CODE_FOR_avx_blendpd256:
33998 case CODE_FOR_avx_vpermilv4df:
33999 case CODE_FOR_avx512f_getmantv8df_mask:
34000 case CODE_FOR_avx512f_getmantv16sf_mask:
34001 error ("the last argument must be a 4-bit immediate");
34002 return const0_rtx;
34003
34004 case CODE_FOR_sha1rnds4:
34005 case CODE_FOR_sse4_1_blendpd:
34006 case CODE_FOR_avx_vpermilv2df:
34007 case CODE_FOR_xop_vpermil2v2df3:
34008 case CODE_FOR_xop_vpermil2v4sf3:
34009 case CODE_FOR_xop_vpermil2v4df3:
34010 case CODE_FOR_xop_vpermil2v8sf3:
34011 case CODE_FOR_avx512f_vinsertf32x4_mask:
34012 case CODE_FOR_avx512f_vinserti32x4_mask:
34013 case CODE_FOR_avx512f_vextractf32x4_mask:
34014 case CODE_FOR_avx512f_vextracti32x4_mask:
34015 error ("the last argument must be a 2-bit immediate");
34016 return const0_rtx;
34017
34018 case CODE_FOR_avx_vextractf128v4df:
34019 case CODE_FOR_avx_vextractf128v8sf:
34020 case CODE_FOR_avx_vextractf128v8si:
34021 case CODE_FOR_avx_vinsertf128v4df:
34022 case CODE_FOR_avx_vinsertf128v8sf:
34023 case CODE_FOR_avx_vinsertf128v8si:
34024 case CODE_FOR_avx512f_vinsertf64x4_mask:
34025 case CODE_FOR_avx512f_vinserti64x4_mask:
34026 case CODE_FOR_avx512f_vextractf64x4_mask:
34027 case CODE_FOR_avx512f_vextracti64x4_mask:
34028 error ("the last argument must be a 1-bit immediate");
34029 return const0_rtx;
34030
34031 case CODE_FOR_avx_vmcmpv2df3:
34032 case CODE_FOR_avx_vmcmpv4sf3:
34033 case CODE_FOR_avx_cmpv2df3:
34034 case CODE_FOR_avx_cmpv4sf3:
34035 case CODE_FOR_avx_cmpv4df3:
34036 case CODE_FOR_avx_cmpv8sf3:
34037 case CODE_FOR_avx512f_cmpv8df3_mask:
34038 case CODE_FOR_avx512f_cmpv16sf3_mask:
34039 case CODE_FOR_avx512f_vmcmpv2df3_mask:
34040 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
34041 error ("the last argument must be a 5-bit immediate");
34042 return const0_rtx;
34043
34044 default:
34045 switch (nargs_constant)
34046 {
34047 case 2:
34048 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34049 (!mask_pos && (nargs - i) == nargs_constant))
34050 {
34051 error ("the next to last argument must be an 8-bit immediate");
34052 break;
34053 }
34054 case 1:
34055 error ("the last argument must be an 8-bit immediate");
34056 break;
34057 default:
34058 gcc_unreachable ();
34059 }
34060 return const0_rtx;
34061 }
34062 }
34063 else
34064 {
34065 if (VECTOR_MODE_P (mode))
34066 op = safe_vector_operand (op, mode);
34067
34068 /* If we aren't optimizing, only allow one memory operand to
34069 be generated. */
34070 if (memory_operand (op, mode))
34071 num_memory++;
34072
34073 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34074 {
34075 if (optimize || !match || num_memory > 1)
34076 op = copy_to_mode_reg (mode, op);
34077 }
34078 else
34079 {
34080 op = copy_to_reg (op);
34081 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34082 }
34083 }
34084
34085 args[i].op = op;
34086 args[i].mode = mode;
34087 }
34088
34089 switch (nargs)
34090 {
34091 case 1:
34092 pat = GEN_FCN (icode) (real_target, args[0].op);
34093 break;
34094 case 2:
34095 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
34096 break;
34097 case 3:
34098 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34099 args[2].op);
34100 break;
34101 case 4:
34102 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34103 args[2].op, args[3].op);
34104 break;
34105 case 5:
34106 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34107 args[2].op, args[3].op, args[4].op);
34108 case 6:
34109 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34110 args[2].op, args[3].op, args[4].op,
34111 args[5].op);
34112 break;
34113 default:
34114 gcc_unreachable ();
34115 }
34116
34117 if (! pat)
34118 return 0;
34119
34120 emit_insn (pat);
34121 return target;
34122 }
34123
34124 /* Transform pattern of following layout:
34125 (parallel [
34126 set (A B)
34127 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
34128 ])
34129 into:
34130 (set (A B))
34131
34132 Or:
34133 (parallel [ A B
34134 ...
34135 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
34136 ...
34137 ])
34138 into:
34139 (parallel [ A B ... ]) */
34140
34141 static rtx
34142 ix86_erase_embedded_rounding (rtx pat)
34143 {
34144 if (GET_CODE (pat) == INSN)
34145 pat = PATTERN (pat);
34146
34147 gcc_assert (GET_CODE (pat) == PARALLEL);
34148
34149 if (XVECLEN (pat, 0) == 2)
34150 {
34151 rtx p0 = XVECEXP (pat, 0, 0);
34152 rtx p1 = XVECEXP (pat, 0, 1);
34153
34154 gcc_assert (GET_CODE (p0) == SET
34155 && GET_CODE (p1) == UNSPEC
34156 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
34157
34158 return p0;
34159 }
34160 else
34161 {
34162 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
34163 int i = 0;
34164 int j = 0;
34165
34166 for (; i < XVECLEN (pat, 0); ++i)
34167 {
34168 rtx elem = XVECEXP (pat, 0, i);
34169 if (GET_CODE (elem) != UNSPEC
34170 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
34171 res [j++] = elem;
34172 }
34173
34174 /* No more than 1 occurence was removed. */
34175 gcc_assert (j >= XVECLEN (pat, 0) - 1);
34176
34177 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
34178 }
34179 }
34180
34181 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
34182 with rounding. */
34183 static rtx
34184 ix86_expand_sse_comi_round (const struct builtin_description *d,
34185 tree exp, rtx target)
34186 {
34187 rtx pat, set_dst;
34188 tree arg0 = CALL_EXPR_ARG (exp, 0);
34189 tree arg1 = CALL_EXPR_ARG (exp, 1);
34190 tree arg2 = CALL_EXPR_ARG (exp, 2);
34191 tree arg3 = CALL_EXPR_ARG (exp, 3);
34192 rtx op0 = expand_normal (arg0);
34193 rtx op1 = expand_normal (arg1);
34194 rtx op2 = expand_normal (arg2);
34195 rtx op3 = expand_normal (arg3);
34196 enum insn_code icode = d->icode;
34197 const struct insn_data_d *insn_p = &insn_data[icode];
34198 enum machine_mode mode0 = insn_p->operand[0].mode;
34199 enum machine_mode mode1 = insn_p->operand[1].mode;
34200 enum rtx_code comparison = UNEQ;
34201 bool need_ucomi = false;
34202
34203 /* See avxintrin.h for values. */
34204 enum rtx_code comi_comparisons[32] =
34205 {
34206 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
34207 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
34208 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
34209 };
34210 bool need_ucomi_values[32] =
34211 {
34212 true, false, false, true, true, false, false, true,
34213 true, false, false, true, true, false, false, true,
34214 false, true, true, false, false, true, true, false,
34215 false, true, true, false, false, true, true, false
34216 };
34217
34218 if (!CONST_INT_P (op2))
34219 {
34220 error ("the third argument must be comparison constant");
34221 return const0_rtx;
34222 }
34223 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
34224 {
34225 error ("incorect comparison mode");
34226 return const0_rtx;
34227 }
34228
34229 if (!insn_p->operand[2].predicate (op3, SImode))
34230 {
34231 error ("incorrect rounding operand");
34232 return const0_rtx;
34233 }
34234
34235 comparison = comi_comparisons[INTVAL (op2)];
34236 need_ucomi = need_ucomi_values[INTVAL (op2)];
34237
34238 if (VECTOR_MODE_P (mode0))
34239 op0 = safe_vector_operand (op0, mode0);
34240 if (VECTOR_MODE_P (mode1))
34241 op1 = safe_vector_operand (op1, mode1);
34242
34243 target = gen_reg_rtx (SImode);
34244 emit_move_insn (target, const0_rtx);
34245 target = gen_rtx_SUBREG (QImode, target, 0);
34246
34247 if ((optimize && !register_operand (op0, mode0))
34248 || !insn_p->operand[0].predicate (op0, mode0))
34249 op0 = copy_to_mode_reg (mode0, op0);
34250 if ((optimize && !register_operand (op1, mode1))
34251 || !insn_p->operand[1].predicate (op1, mode1))
34252 op1 = copy_to_mode_reg (mode1, op1);
34253
34254 if (need_ucomi)
34255 icode = icode == CODE_FOR_sse_comi_round
34256 ? CODE_FOR_sse_ucomi_round
34257 : CODE_FOR_sse2_ucomi_round;
34258
34259 pat = GEN_FCN (icode) (op0, op1, op3);
34260 if (! pat)
34261 return 0;
34262
34263 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
34264 if (INTVAL (op3) == NO_ROUND)
34265 {
34266 pat = ix86_erase_embedded_rounding (pat);
34267 if (! pat)
34268 return 0;
34269
34270 set_dst = SET_DEST (pat);
34271 }
34272 else
34273 {
34274 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
34275 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
34276 }
34277
34278 emit_insn (pat);
34279 emit_insn (gen_rtx_SET (VOIDmode,
34280 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34281 gen_rtx_fmt_ee (comparison, QImode,
34282 set_dst,
34283 const0_rtx)));
34284
34285 return SUBREG_REG (target);
34286 }
34287
34288 static rtx
34289 ix86_expand_round_builtin (const struct builtin_description *d,
34290 tree exp, rtx target)
34291 {
34292 rtx pat;
34293 unsigned int i, nargs;
34294 struct
34295 {
34296 rtx op;
34297 enum machine_mode mode;
34298 } args[6];
34299 enum insn_code icode = d->icode;
34300 const struct insn_data_d *insn_p = &insn_data[icode];
34301 enum machine_mode tmode = insn_p->operand[0].mode;
34302 unsigned int nargs_constant = 0;
34303 unsigned int redundant_embed_rnd = 0;
34304
34305 switch ((enum ix86_builtin_func_type) d->flag)
34306 {
34307 case UINT64_FTYPE_V2DF_INT:
34308 case UINT64_FTYPE_V4SF_INT:
34309 case UINT_FTYPE_V2DF_INT:
34310 case UINT_FTYPE_V4SF_INT:
34311 case INT64_FTYPE_V2DF_INT:
34312 case INT64_FTYPE_V4SF_INT:
34313 case INT_FTYPE_V2DF_INT:
34314 case INT_FTYPE_V4SF_INT:
34315 nargs = 2;
34316 break;
34317 case V4SF_FTYPE_V4SF_UINT_INT:
34318 case V4SF_FTYPE_V4SF_UINT64_INT:
34319 case V2DF_FTYPE_V2DF_UINT64_INT:
34320 case V4SF_FTYPE_V4SF_INT_INT:
34321 case V4SF_FTYPE_V4SF_INT64_INT:
34322 case V2DF_FTYPE_V2DF_INT64_INT:
34323 case V4SF_FTYPE_V4SF_V4SF_INT:
34324 case V2DF_FTYPE_V2DF_V2DF_INT:
34325 case V4SF_FTYPE_V4SF_V2DF_INT:
34326 case V2DF_FTYPE_V2DF_V4SF_INT:
34327 nargs = 3;
34328 break;
34329 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
34330 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
34331 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
34332 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
34333 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
34334 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
34335 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
34336 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
34337 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
34338 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
34339 nargs = 4;
34340 break;
34341 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
34342 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
34343 nargs_constant = 2;
34344 nargs = 4;
34345 break;
34346 case INT_FTYPE_V4SF_V4SF_INT_INT:
34347 case INT_FTYPE_V2DF_V2DF_INT_INT:
34348 return ix86_expand_sse_comi_round (d, exp, target);
34349 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
34350 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
34351 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
34352 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
34353 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
34354 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
34355 nargs = 5;
34356 break;
34357 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
34358 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
34359 nargs_constant = 4;
34360 nargs = 5;
34361 break;
34362 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
34363 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
34364 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
34365 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
34366 nargs_constant = 3;
34367 nargs = 5;
34368 break;
34369 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
34370 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
34371 nargs = 6;
34372 nargs_constant = 4;
34373 break;
34374 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
34375 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
34376 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
34377 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
34378 nargs = 6;
34379 nargs_constant = 3;
34380 break;
34381 default:
34382 gcc_unreachable ();
34383 }
34384 gcc_assert (nargs <= ARRAY_SIZE (args));
34385
34386 if (optimize
34387 || target == 0
34388 || GET_MODE (target) != tmode
34389 || !insn_p->operand[0].predicate (target, tmode))
34390 target = gen_reg_rtx (tmode);
34391
34392 for (i = 0; i < nargs; i++)
34393 {
34394 tree arg = CALL_EXPR_ARG (exp, i);
34395 rtx op = expand_normal (arg);
34396 enum machine_mode mode = insn_p->operand[i + 1].mode;
34397 bool match = insn_p->operand[i + 1].predicate (op, mode);
34398
34399 if (i == nargs - nargs_constant)
34400 {
34401 if (!match)
34402 {
34403 switch (icode)
34404 {
34405 case CODE_FOR_avx512f_getmantv8df_mask_round:
34406 case CODE_FOR_avx512f_getmantv16sf_mask_round:
34407 case CODE_FOR_avx512f_getmantv2df_round:
34408 case CODE_FOR_avx512f_getmantv4sf_round:
34409 error ("the immediate argument must be a 4-bit immediate");
34410 return const0_rtx;
34411 case CODE_FOR_avx512f_cmpv8df3_mask_round:
34412 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
34413 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
34414 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
34415 error ("the immediate argument must be a 5-bit immediate");
34416 return const0_rtx;
34417 default:
34418 error ("the immediate argument must be an 8-bit immediate");
34419 return const0_rtx;
34420 }
34421 }
34422 }
34423 else if (i == nargs-1)
34424 {
34425 if (!insn_p->operand[nargs].predicate (op, SImode))
34426 {
34427 error ("incorrect rounding operand");
34428 return const0_rtx;
34429 }
34430
34431 /* If there is no rounding use normal version of the pattern. */
34432 if (INTVAL (op) == NO_ROUND)
34433 redundant_embed_rnd = 1;
34434 }
34435 else
34436 {
34437 if (VECTOR_MODE_P (mode))
34438 op = safe_vector_operand (op, mode);
34439
34440 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34441 {
34442 if (optimize || !match)
34443 op = copy_to_mode_reg (mode, op);
34444 }
34445 else
34446 {
34447 op = copy_to_reg (op);
34448 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34449 }
34450 }
34451
34452 args[i].op = op;
34453 args[i].mode = mode;
34454 }
34455
34456 switch (nargs)
34457 {
34458 case 1:
34459 pat = GEN_FCN (icode) (target, args[0].op);
34460 break;
34461 case 2:
34462 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34463 break;
34464 case 3:
34465 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34466 args[2].op);
34467 break;
34468 case 4:
34469 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34470 args[2].op, args[3].op);
34471 break;
34472 case 5:
34473 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34474 args[2].op, args[3].op, args[4].op);
34475 case 6:
34476 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34477 args[2].op, args[3].op, args[4].op,
34478 args[5].op);
34479 break;
34480 default:
34481 gcc_unreachable ();
34482 }
34483
34484 if (!pat)
34485 return 0;
34486
34487 if (redundant_embed_rnd)
34488 pat = ix86_erase_embedded_rounding (pat);
34489
34490 emit_insn (pat);
34491 return target;
34492 }
34493
34494 /* Subroutine of ix86_expand_builtin to take care of special insns
34495 with variable number of operands. */
34496
34497 static rtx
34498 ix86_expand_special_args_builtin (const struct builtin_description *d,
34499 tree exp, rtx target)
34500 {
34501 tree arg;
34502 rtx pat, op;
34503 unsigned int i, nargs, arg_adjust, memory;
34504 bool aligned_mem = false;
34505 struct
34506 {
34507 rtx op;
34508 enum machine_mode mode;
34509 } args[3];
34510 enum insn_code icode = d->icode;
34511 bool last_arg_constant = false;
34512 const struct insn_data_d *insn_p = &insn_data[icode];
34513 enum machine_mode tmode = insn_p->operand[0].mode;
34514 enum { load, store } klass;
34515
34516 switch ((enum ix86_builtin_func_type) d->flag)
34517 {
34518 case VOID_FTYPE_VOID:
34519 emit_insn (GEN_FCN (icode) (target));
34520 return 0;
34521 case VOID_FTYPE_UINT64:
34522 case VOID_FTYPE_UNSIGNED:
34523 nargs = 0;
34524 klass = store;
34525 memory = 0;
34526 break;
34527
34528 case INT_FTYPE_VOID:
34529 case UINT64_FTYPE_VOID:
34530 case UNSIGNED_FTYPE_VOID:
34531 nargs = 0;
34532 klass = load;
34533 memory = 0;
34534 break;
34535 case UINT64_FTYPE_PUNSIGNED:
34536 case V2DI_FTYPE_PV2DI:
34537 case V4DI_FTYPE_PV4DI:
34538 case V32QI_FTYPE_PCCHAR:
34539 case V16QI_FTYPE_PCCHAR:
34540 case V8SF_FTYPE_PCV4SF:
34541 case V8SF_FTYPE_PCFLOAT:
34542 case V4SF_FTYPE_PCFLOAT:
34543 case V4DF_FTYPE_PCV2DF:
34544 case V4DF_FTYPE_PCDOUBLE:
34545 case V2DF_FTYPE_PCDOUBLE:
34546 case VOID_FTYPE_PVOID:
34547 case V16SI_FTYPE_PV4SI:
34548 case V16SF_FTYPE_PV4SF:
34549 case V8DI_FTYPE_PV4DI:
34550 case V8DI_FTYPE_PV8DI:
34551 case V8DF_FTYPE_PV4DF:
34552 nargs = 1;
34553 klass = load;
34554 memory = 0;
34555 switch (icode)
34556 {
34557 case CODE_FOR_sse4_1_movntdqa:
34558 case CODE_FOR_avx2_movntdqa:
34559 case CODE_FOR_avx512f_movntdqa:
34560 aligned_mem = true;
34561 break;
34562 default:
34563 break;
34564 }
34565 break;
34566 case VOID_FTYPE_PV2SF_V4SF:
34567 case VOID_FTYPE_PV8DI_V8DI:
34568 case VOID_FTYPE_PV4DI_V4DI:
34569 case VOID_FTYPE_PV2DI_V2DI:
34570 case VOID_FTYPE_PCHAR_V32QI:
34571 case VOID_FTYPE_PCHAR_V16QI:
34572 case VOID_FTYPE_PFLOAT_V16SF:
34573 case VOID_FTYPE_PFLOAT_V8SF:
34574 case VOID_FTYPE_PFLOAT_V4SF:
34575 case VOID_FTYPE_PDOUBLE_V8DF:
34576 case VOID_FTYPE_PDOUBLE_V4DF:
34577 case VOID_FTYPE_PDOUBLE_V2DF:
34578 case VOID_FTYPE_PLONGLONG_LONGLONG:
34579 case VOID_FTYPE_PULONGLONG_ULONGLONG:
34580 case VOID_FTYPE_PINT_INT:
34581 nargs = 1;
34582 klass = store;
34583 /* Reserve memory operand for target. */
34584 memory = ARRAY_SIZE (args);
34585 switch (icode)
34586 {
34587 /* These builtins and instructions require the memory
34588 to be properly aligned. */
34589 case CODE_FOR_avx_movntv4di:
34590 case CODE_FOR_sse2_movntv2di:
34591 case CODE_FOR_avx_movntv8sf:
34592 case CODE_FOR_sse_movntv4sf:
34593 case CODE_FOR_sse4a_vmmovntv4sf:
34594 case CODE_FOR_avx_movntv4df:
34595 case CODE_FOR_sse2_movntv2df:
34596 case CODE_FOR_sse4a_vmmovntv2df:
34597 case CODE_FOR_sse2_movntidi:
34598 case CODE_FOR_sse_movntq:
34599 case CODE_FOR_sse2_movntisi:
34600 case CODE_FOR_avx512f_movntv16sf:
34601 case CODE_FOR_avx512f_movntv8df:
34602 case CODE_FOR_avx512f_movntv8di:
34603 aligned_mem = true;
34604 break;
34605 default:
34606 break;
34607 }
34608 break;
34609 case V4SF_FTYPE_V4SF_PCV2SF:
34610 case V2DF_FTYPE_V2DF_PCDOUBLE:
34611 nargs = 2;
34612 klass = load;
34613 memory = 1;
34614 break;
34615 case V8SF_FTYPE_PCV8SF_V8SI:
34616 case V4DF_FTYPE_PCV4DF_V4DI:
34617 case V4SF_FTYPE_PCV4SF_V4SI:
34618 case V2DF_FTYPE_PCV2DF_V2DI:
34619 case V8SI_FTYPE_PCV8SI_V8SI:
34620 case V4DI_FTYPE_PCV4DI_V4DI:
34621 case V4SI_FTYPE_PCV4SI_V4SI:
34622 case V2DI_FTYPE_PCV2DI_V2DI:
34623 nargs = 2;
34624 klass = load;
34625 memory = 0;
34626 break;
34627 case VOID_FTYPE_PV8DF_V8DF_QI:
34628 case VOID_FTYPE_PV16SF_V16SF_HI:
34629 case VOID_FTYPE_PV8DI_V8DI_QI:
34630 case VOID_FTYPE_PV16SI_V16SI_HI:
34631 switch (icode)
34632 {
34633 /* These builtins and instructions require the memory
34634 to be properly aligned. */
34635 case CODE_FOR_avx512f_storev16sf_mask:
34636 case CODE_FOR_avx512f_storev16si_mask:
34637 case CODE_FOR_avx512f_storev8df_mask:
34638 case CODE_FOR_avx512f_storev8di_mask:
34639 aligned_mem = true;
34640 break;
34641 default:
34642 break;
34643 }
34644 /* FALLTHRU */
34645 case VOID_FTYPE_PV8SF_V8SI_V8SF:
34646 case VOID_FTYPE_PV4DF_V4DI_V4DF:
34647 case VOID_FTYPE_PV4SF_V4SI_V4SF:
34648 case VOID_FTYPE_PV2DF_V2DI_V2DF:
34649 case VOID_FTYPE_PV8SI_V8SI_V8SI:
34650 case VOID_FTYPE_PV4DI_V4DI_V4DI:
34651 case VOID_FTYPE_PV4SI_V4SI_V4SI:
34652 case VOID_FTYPE_PV2DI_V2DI_V2DI:
34653 case VOID_FTYPE_PDOUBLE_V2DF_QI:
34654 case VOID_FTYPE_PFLOAT_V4SF_QI:
34655 case VOID_FTYPE_PV8SI_V8DI_QI:
34656 case VOID_FTYPE_PV8HI_V8DI_QI:
34657 case VOID_FTYPE_PV16HI_V16SI_HI:
34658 case VOID_FTYPE_PV16QI_V8DI_QI:
34659 case VOID_FTYPE_PV16QI_V16SI_HI:
34660 nargs = 2;
34661 klass = store;
34662 /* Reserve memory operand for target. */
34663 memory = ARRAY_SIZE (args);
34664 break;
34665 case V16SF_FTYPE_PCV16SF_V16SF_HI:
34666 case V16SI_FTYPE_PCV16SI_V16SI_HI:
34667 case V8DF_FTYPE_PCV8DF_V8DF_QI:
34668 case V8DI_FTYPE_PCV8DI_V8DI_QI:
34669 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
34670 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
34671 nargs = 3;
34672 klass = load;
34673 memory = 0;
34674 switch (icode)
34675 {
34676 /* These builtins and instructions require the memory
34677 to be properly aligned. */
34678 case CODE_FOR_avx512f_loadv16sf_mask:
34679 case CODE_FOR_avx512f_loadv16si_mask:
34680 case CODE_FOR_avx512f_loadv8df_mask:
34681 case CODE_FOR_avx512f_loadv8di_mask:
34682 aligned_mem = true;
34683 break;
34684 default:
34685 break;
34686 }
34687 break;
34688 case VOID_FTYPE_UINT_UINT_UINT:
34689 case VOID_FTYPE_UINT64_UINT_UINT:
34690 case UCHAR_FTYPE_UINT_UINT_UINT:
34691 case UCHAR_FTYPE_UINT64_UINT_UINT:
34692 nargs = 3;
34693 klass = load;
34694 memory = ARRAY_SIZE (args);
34695 last_arg_constant = true;
34696 break;
34697 default:
34698 gcc_unreachable ();
34699 }
34700
34701 gcc_assert (nargs <= ARRAY_SIZE (args));
34702
34703 if (klass == store)
34704 {
34705 arg = CALL_EXPR_ARG (exp, 0);
34706 op = expand_normal (arg);
34707 gcc_assert (target == 0);
34708 if (memory)
34709 {
34710 op = ix86_zero_extend_to_Pmode (op);
34711 target = gen_rtx_MEM (tmode, op);
34712 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
34713 on it. Try to improve it using get_pointer_alignment,
34714 and if the special builtin is one that requires strict
34715 mode alignment, also from it's GET_MODE_ALIGNMENT.
34716 Failure to do so could lead to ix86_legitimate_combined_insn
34717 rejecting all changes to such insns. */
34718 unsigned int align = get_pointer_alignment (arg);
34719 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
34720 align = GET_MODE_ALIGNMENT (tmode);
34721 if (MEM_ALIGN (target) < align)
34722 set_mem_align (target, align);
34723 }
34724 else
34725 target = force_reg (tmode, op);
34726 arg_adjust = 1;
34727 }
34728 else
34729 {
34730 arg_adjust = 0;
34731 if (optimize
34732 || target == 0
34733 || !register_operand (target, tmode)
34734 || GET_MODE (target) != tmode)
34735 target = gen_reg_rtx (tmode);
34736 }
34737
34738 for (i = 0; i < nargs; i++)
34739 {
34740 enum machine_mode mode = insn_p->operand[i + 1].mode;
34741 bool match;
34742
34743 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
34744 op = expand_normal (arg);
34745 match = insn_p->operand[i + 1].predicate (op, mode);
34746
34747 if (last_arg_constant && (i + 1) == nargs)
34748 {
34749 if (!match)
34750 {
34751 if (icode == CODE_FOR_lwp_lwpvalsi3
34752 || icode == CODE_FOR_lwp_lwpinssi3
34753 || icode == CODE_FOR_lwp_lwpvaldi3
34754 || icode == CODE_FOR_lwp_lwpinsdi3)
34755 error ("the last argument must be a 32-bit immediate");
34756 else
34757 error ("the last argument must be an 8-bit immediate");
34758 return const0_rtx;
34759 }
34760 }
34761 else
34762 {
34763 if (i == memory)
34764 {
34765 /* This must be the memory operand. */
34766 op = ix86_zero_extend_to_Pmode (op);
34767 op = gen_rtx_MEM (mode, op);
34768 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
34769 on it. Try to improve it using get_pointer_alignment,
34770 and if the special builtin is one that requires strict
34771 mode alignment, also from it's GET_MODE_ALIGNMENT.
34772 Failure to do so could lead to ix86_legitimate_combined_insn
34773 rejecting all changes to such insns. */
34774 unsigned int align = get_pointer_alignment (arg);
34775 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
34776 align = GET_MODE_ALIGNMENT (mode);
34777 if (MEM_ALIGN (op) < align)
34778 set_mem_align (op, align);
34779 }
34780 else
34781 {
34782 /* This must be register. */
34783 if (VECTOR_MODE_P (mode))
34784 op = safe_vector_operand (op, mode);
34785
34786 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34787 op = copy_to_mode_reg (mode, op);
34788 else
34789 {
34790 op = copy_to_reg (op);
34791 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34792 }
34793 }
34794 }
34795
34796 args[i].op = op;
34797 args[i].mode = mode;
34798 }
34799
34800 switch (nargs)
34801 {
34802 case 0:
34803 pat = GEN_FCN (icode) (target);
34804 break;
34805 case 1:
34806 pat = GEN_FCN (icode) (target, args[0].op);
34807 break;
34808 case 2:
34809 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34810 break;
34811 case 3:
34812 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
34813 break;
34814 default:
34815 gcc_unreachable ();
34816 }
34817
34818 if (! pat)
34819 return 0;
34820 emit_insn (pat);
34821 return klass == store ? 0 : target;
34822 }
34823
34824 /* Return the integer constant in ARG. Constrain it to be in the range
34825 of the subparts of VEC_TYPE; issue an error if not. */
34826
34827 static int
34828 get_element_number (tree vec_type, tree arg)
34829 {
34830 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
34831
34832 if (!tree_fits_uhwi_p (arg)
34833 || (elt = tree_to_uhwi (arg), elt > max))
34834 {
34835 error ("selector must be an integer constant in the range 0..%wi", max);
34836 return 0;
34837 }
34838
34839 return elt;
34840 }
34841
34842 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34843 ix86_expand_vector_init. We DO have language-level syntax for this, in
34844 the form of (type){ init-list }. Except that since we can't place emms
34845 instructions from inside the compiler, we can't allow the use of MMX
34846 registers unless the user explicitly asks for it. So we do *not* define
34847 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
34848 we have builtins invoked by mmintrin.h that gives us license to emit
34849 these sorts of instructions. */
34850
34851 static rtx
34852 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
34853 {
34854 enum machine_mode tmode = TYPE_MODE (type);
34855 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
34856 int i, n_elt = GET_MODE_NUNITS (tmode);
34857 rtvec v = rtvec_alloc (n_elt);
34858
34859 gcc_assert (VECTOR_MODE_P (tmode));
34860 gcc_assert (call_expr_nargs (exp) == n_elt);
34861
34862 for (i = 0; i < n_elt; ++i)
34863 {
34864 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
34865 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
34866 }
34867
34868 if (!target || !register_operand (target, tmode))
34869 target = gen_reg_rtx (tmode);
34870
34871 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
34872 return target;
34873 }
34874
34875 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34876 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
34877 had a language-level syntax for referencing vector elements. */
34878
34879 static rtx
34880 ix86_expand_vec_ext_builtin (tree exp, rtx target)
34881 {
34882 enum machine_mode tmode, mode0;
34883 tree arg0, arg1;
34884 int elt;
34885 rtx op0;
34886
34887 arg0 = CALL_EXPR_ARG (exp, 0);
34888 arg1 = CALL_EXPR_ARG (exp, 1);
34889
34890 op0 = expand_normal (arg0);
34891 elt = get_element_number (TREE_TYPE (arg0), arg1);
34892
34893 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
34894 mode0 = TYPE_MODE (TREE_TYPE (arg0));
34895 gcc_assert (VECTOR_MODE_P (mode0));
34896
34897 op0 = force_reg (mode0, op0);
34898
34899 if (optimize || !target || !register_operand (target, tmode))
34900 target = gen_reg_rtx (tmode);
34901
34902 ix86_expand_vector_extract (true, target, op0, elt);
34903
34904 return target;
34905 }
34906
34907 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34908 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
34909 a language-level syntax for referencing vector elements. */
34910
34911 static rtx
34912 ix86_expand_vec_set_builtin (tree exp)
34913 {
34914 enum machine_mode tmode, mode1;
34915 tree arg0, arg1, arg2;
34916 int elt;
34917 rtx op0, op1, target;
34918
34919 arg0 = CALL_EXPR_ARG (exp, 0);
34920 arg1 = CALL_EXPR_ARG (exp, 1);
34921 arg2 = CALL_EXPR_ARG (exp, 2);
34922
34923 tmode = TYPE_MODE (TREE_TYPE (arg0));
34924 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
34925 gcc_assert (VECTOR_MODE_P (tmode));
34926
34927 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
34928 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
34929 elt = get_element_number (TREE_TYPE (arg0), arg2);
34930
34931 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
34932 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
34933
34934 op0 = force_reg (tmode, op0);
34935 op1 = force_reg (mode1, op1);
34936
34937 /* OP0 is the source of these builtin functions and shouldn't be
34938 modified. Create a copy, use it and return it as target. */
34939 target = gen_reg_rtx (tmode);
34940 emit_move_insn (target, op0);
34941 ix86_expand_vector_set (true, target, op1, elt);
34942
34943 return target;
34944 }
34945
34946 /* Expand an expression EXP that calls a built-in function,
34947 with result going to TARGET if that's convenient
34948 (and in mode MODE if that's convenient).
34949 SUBTARGET may be used as the target for computing one of EXP's operands.
34950 IGNORE is nonzero if the value is to be ignored. */
34951
34952 static rtx
34953 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
34954 enum machine_mode mode, int ignore)
34955 {
34956 const struct builtin_description *d;
34957 size_t i;
34958 enum insn_code icode;
34959 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
34960 tree arg0, arg1, arg2, arg3, arg4;
34961 rtx op0, op1, op2, op3, op4, pat, insn;
34962 enum machine_mode mode0, mode1, mode2, mode3, mode4;
34963 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
34964
34965 /* For CPU builtins that can be folded, fold first and expand the fold. */
34966 switch (fcode)
34967 {
34968 case IX86_BUILTIN_CPU_INIT:
34969 {
34970 /* Make it call __cpu_indicator_init in libgcc. */
34971 tree call_expr, fndecl, type;
34972 type = build_function_type_list (integer_type_node, NULL_TREE);
34973 fndecl = build_fn_decl ("__cpu_indicator_init", type);
34974 call_expr = build_call_expr (fndecl, 0);
34975 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
34976 }
34977 case IX86_BUILTIN_CPU_IS:
34978 case IX86_BUILTIN_CPU_SUPPORTS:
34979 {
34980 tree arg0 = CALL_EXPR_ARG (exp, 0);
34981 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
34982 gcc_assert (fold_expr != NULL_TREE);
34983 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
34984 }
34985 }
34986
34987 /* Determine whether the builtin function is available under the current ISA.
34988 Originally the builtin was not created if it wasn't applicable to the
34989 current ISA based on the command line switches. With function specific
34990 options, we need to check in the context of the function making the call
34991 whether it is supported. */
34992 if (ix86_builtins_isa[fcode].isa
34993 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
34994 {
34995 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
34996 NULL, (enum fpmath_unit) 0, false);
34997
34998 if (!opts)
34999 error ("%qE needs unknown isa option", fndecl);
35000 else
35001 {
35002 gcc_assert (opts != NULL);
35003 error ("%qE needs isa option %s", fndecl, opts);
35004 free (opts);
35005 }
35006 return const0_rtx;
35007 }
35008
35009 switch (fcode)
35010 {
35011 case IX86_BUILTIN_MASKMOVQ:
35012 case IX86_BUILTIN_MASKMOVDQU:
35013 icode = (fcode == IX86_BUILTIN_MASKMOVQ
35014 ? CODE_FOR_mmx_maskmovq
35015 : CODE_FOR_sse2_maskmovdqu);
35016 /* Note the arg order is different from the operand order. */
35017 arg1 = CALL_EXPR_ARG (exp, 0);
35018 arg2 = CALL_EXPR_ARG (exp, 1);
35019 arg0 = CALL_EXPR_ARG (exp, 2);
35020 op0 = expand_normal (arg0);
35021 op1 = expand_normal (arg1);
35022 op2 = expand_normal (arg2);
35023 mode0 = insn_data[icode].operand[0].mode;
35024 mode1 = insn_data[icode].operand[1].mode;
35025 mode2 = insn_data[icode].operand[2].mode;
35026
35027 op0 = ix86_zero_extend_to_Pmode (op0);
35028 op0 = gen_rtx_MEM (mode1, op0);
35029
35030 if (!insn_data[icode].operand[0].predicate (op0, mode0))
35031 op0 = copy_to_mode_reg (mode0, op0);
35032 if (!insn_data[icode].operand[1].predicate (op1, mode1))
35033 op1 = copy_to_mode_reg (mode1, op1);
35034 if (!insn_data[icode].operand[2].predicate (op2, mode2))
35035 op2 = copy_to_mode_reg (mode2, op2);
35036 pat = GEN_FCN (icode) (op0, op1, op2);
35037 if (! pat)
35038 return 0;
35039 emit_insn (pat);
35040 return 0;
35041
35042 case IX86_BUILTIN_LDMXCSR:
35043 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
35044 target = assign_386_stack_local (SImode, SLOT_TEMP);
35045 emit_move_insn (target, op0);
35046 emit_insn (gen_sse_ldmxcsr (target));
35047 return 0;
35048
35049 case IX86_BUILTIN_STMXCSR:
35050 target = assign_386_stack_local (SImode, SLOT_TEMP);
35051 emit_insn (gen_sse_stmxcsr (target));
35052 return copy_to_mode_reg (SImode, target);
35053
35054 case IX86_BUILTIN_CLFLUSH:
35055 arg0 = CALL_EXPR_ARG (exp, 0);
35056 op0 = expand_normal (arg0);
35057 icode = CODE_FOR_sse2_clflush;
35058 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35059 op0 = ix86_zero_extend_to_Pmode (op0);
35060
35061 emit_insn (gen_sse2_clflush (op0));
35062 return 0;
35063
35064 case IX86_BUILTIN_MONITOR:
35065 arg0 = CALL_EXPR_ARG (exp, 0);
35066 arg1 = CALL_EXPR_ARG (exp, 1);
35067 arg2 = CALL_EXPR_ARG (exp, 2);
35068 op0 = expand_normal (arg0);
35069 op1 = expand_normal (arg1);
35070 op2 = expand_normal (arg2);
35071 if (!REG_P (op0))
35072 op0 = ix86_zero_extend_to_Pmode (op0);
35073 if (!REG_P (op1))
35074 op1 = copy_to_mode_reg (SImode, op1);
35075 if (!REG_P (op2))
35076 op2 = copy_to_mode_reg (SImode, op2);
35077 emit_insn (ix86_gen_monitor (op0, op1, op2));
35078 return 0;
35079
35080 case IX86_BUILTIN_MWAIT:
35081 arg0 = CALL_EXPR_ARG (exp, 0);
35082 arg1 = CALL_EXPR_ARG (exp, 1);
35083 op0 = expand_normal (arg0);
35084 op1 = expand_normal (arg1);
35085 if (!REG_P (op0))
35086 op0 = copy_to_mode_reg (SImode, op0);
35087 if (!REG_P (op1))
35088 op1 = copy_to_mode_reg (SImode, op1);
35089 emit_insn (gen_sse3_mwait (op0, op1));
35090 return 0;
35091
35092 case IX86_BUILTIN_VEC_INIT_V2SI:
35093 case IX86_BUILTIN_VEC_INIT_V4HI:
35094 case IX86_BUILTIN_VEC_INIT_V8QI:
35095 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
35096
35097 case IX86_BUILTIN_VEC_EXT_V2DF:
35098 case IX86_BUILTIN_VEC_EXT_V2DI:
35099 case IX86_BUILTIN_VEC_EXT_V4SF:
35100 case IX86_BUILTIN_VEC_EXT_V4SI:
35101 case IX86_BUILTIN_VEC_EXT_V8HI:
35102 case IX86_BUILTIN_VEC_EXT_V2SI:
35103 case IX86_BUILTIN_VEC_EXT_V4HI:
35104 case IX86_BUILTIN_VEC_EXT_V16QI:
35105 return ix86_expand_vec_ext_builtin (exp, target);
35106
35107 case IX86_BUILTIN_VEC_SET_V2DI:
35108 case IX86_BUILTIN_VEC_SET_V4SF:
35109 case IX86_BUILTIN_VEC_SET_V4SI:
35110 case IX86_BUILTIN_VEC_SET_V8HI:
35111 case IX86_BUILTIN_VEC_SET_V4HI:
35112 case IX86_BUILTIN_VEC_SET_V16QI:
35113 return ix86_expand_vec_set_builtin (exp);
35114
35115 case IX86_BUILTIN_INFQ:
35116 case IX86_BUILTIN_HUGE_VALQ:
35117 {
35118 REAL_VALUE_TYPE inf;
35119 rtx tmp;
35120
35121 real_inf (&inf);
35122 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
35123
35124 tmp = validize_mem (force_const_mem (mode, tmp));
35125
35126 if (target == 0)
35127 target = gen_reg_rtx (mode);
35128
35129 emit_move_insn (target, tmp);
35130 return target;
35131 }
35132
35133 case IX86_BUILTIN_RDPMC:
35134 case IX86_BUILTIN_RDTSC:
35135 case IX86_BUILTIN_RDTSCP:
35136
35137 op0 = gen_reg_rtx (DImode);
35138 op1 = gen_reg_rtx (DImode);
35139
35140 if (fcode == IX86_BUILTIN_RDPMC)
35141 {
35142 arg0 = CALL_EXPR_ARG (exp, 0);
35143 op2 = expand_normal (arg0);
35144 if (!register_operand (op2, SImode))
35145 op2 = copy_to_mode_reg (SImode, op2);
35146
35147 insn = (TARGET_64BIT
35148 ? gen_rdpmc_rex64 (op0, op1, op2)
35149 : gen_rdpmc (op0, op2));
35150 emit_insn (insn);
35151 }
35152 else if (fcode == IX86_BUILTIN_RDTSC)
35153 {
35154 insn = (TARGET_64BIT
35155 ? gen_rdtsc_rex64 (op0, op1)
35156 : gen_rdtsc (op0));
35157 emit_insn (insn);
35158 }
35159 else
35160 {
35161 op2 = gen_reg_rtx (SImode);
35162
35163 insn = (TARGET_64BIT
35164 ? gen_rdtscp_rex64 (op0, op1, op2)
35165 : gen_rdtscp (op0, op2));
35166 emit_insn (insn);
35167
35168 arg0 = CALL_EXPR_ARG (exp, 0);
35169 op4 = expand_normal (arg0);
35170 if (!address_operand (op4, VOIDmode))
35171 {
35172 op4 = convert_memory_address (Pmode, op4);
35173 op4 = copy_addr_to_reg (op4);
35174 }
35175 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
35176 }
35177
35178 if (target == 0)
35179 {
35180 /* mode is VOIDmode if __builtin_rd* has been called
35181 without lhs. */
35182 if (mode == VOIDmode)
35183 return target;
35184 target = gen_reg_rtx (mode);
35185 }
35186
35187 if (TARGET_64BIT)
35188 {
35189 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
35190 op1, 1, OPTAB_DIRECT);
35191 op0 = expand_simple_binop (DImode, IOR, op0, op1,
35192 op0, 1, OPTAB_DIRECT);
35193 }
35194
35195 emit_move_insn (target, op0);
35196 return target;
35197
35198 case IX86_BUILTIN_FXSAVE:
35199 case IX86_BUILTIN_FXRSTOR:
35200 case IX86_BUILTIN_FXSAVE64:
35201 case IX86_BUILTIN_FXRSTOR64:
35202 case IX86_BUILTIN_FNSTENV:
35203 case IX86_BUILTIN_FLDENV:
35204 case IX86_BUILTIN_FNSTSW:
35205 mode0 = BLKmode;
35206 switch (fcode)
35207 {
35208 case IX86_BUILTIN_FXSAVE:
35209 icode = CODE_FOR_fxsave;
35210 break;
35211 case IX86_BUILTIN_FXRSTOR:
35212 icode = CODE_FOR_fxrstor;
35213 break;
35214 case IX86_BUILTIN_FXSAVE64:
35215 icode = CODE_FOR_fxsave64;
35216 break;
35217 case IX86_BUILTIN_FXRSTOR64:
35218 icode = CODE_FOR_fxrstor64;
35219 break;
35220 case IX86_BUILTIN_FNSTENV:
35221 icode = CODE_FOR_fnstenv;
35222 break;
35223 case IX86_BUILTIN_FLDENV:
35224 icode = CODE_FOR_fldenv;
35225 break;
35226 case IX86_BUILTIN_FNSTSW:
35227 icode = CODE_FOR_fnstsw;
35228 mode0 = HImode;
35229 break;
35230 default:
35231 gcc_unreachable ();
35232 }
35233
35234 arg0 = CALL_EXPR_ARG (exp, 0);
35235 op0 = expand_normal (arg0);
35236
35237 if (!address_operand (op0, VOIDmode))
35238 {
35239 op0 = convert_memory_address (Pmode, op0);
35240 op0 = copy_addr_to_reg (op0);
35241 }
35242 op0 = gen_rtx_MEM (mode0, op0);
35243
35244 pat = GEN_FCN (icode) (op0);
35245 if (pat)
35246 emit_insn (pat);
35247 return 0;
35248
35249 case IX86_BUILTIN_XSAVE:
35250 case IX86_BUILTIN_XRSTOR:
35251 case IX86_BUILTIN_XSAVE64:
35252 case IX86_BUILTIN_XRSTOR64:
35253 case IX86_BUILTIN_XSAVEOPT:
35254 case IX86_BUILTIN_XSAVEOPT64:
35255 arg0 = CALL_EXPR_ARG (exp, 0);
35256 arg1 = CALL_EXPR_ARG (exp, 1);
35257 op0 = expand_normal (arg0);
35258 op1 = expand_normal (arg1);
35259
35260 if (!address_operand (op0, VOIDmode))
35261 {
35262 op0 = convert_memory_address (Pmode, op0);
35263 op0 = copy_addr_to_reg (op0);
35264 }
35265 op0 = gen_rtx_MEM (BLKmode, op0);
35266
35267 op1 = force_reg (DImode, op1);
35268
35269 if (TARGET_64BIT)
35270 {
35271 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
35272 NULL, 1, OPTAB_DIRECT);
35273 switch (fcode)
35274 {
35275 case IX86_BUILTIN_XSAVE:
35276 icode = CODE_FOR_xsave_rex64;
35277 break;
35278 case IX86_BUILTIN_XRSTOR:
35279 icode = CODE_FOR_xrstor_rex64;
35280 break;
35281 case IX86_BUILTIN_XSAVE64:
35282 icode = CODE_FOR_xsave64;
35283 break;
35284 case IX86_BUILTIN_XRSTOR64:
35285 icode = CODE_FOR_xrstor64;
35286 break;
35287 case IX86_BUILTIN_XSAVEOPT:
35288 icode = CODE_FOR_xsaveopt_rex64;
35289 break;
35290 case IX86_BUILTIN_XSAVEOPT64:
35291 icode = CODE_FOR_xsaveopt64;
35292 break;
35293 default:
35294 gcc_unreachable ();
35295 }
35296
35297 op2 = gen_lowpart (SImode, op2);
35298 op1 = gen_lowpart (SImode, op1);
35299 pat = GEN_FCN (icode) (op0, op1, op2);
35300 }
35301 else
35302 {
35303 switch (fcode)
35304 {
35305 case IX86_BUILTIN_XSAVE:
35306 icode = CODE_FOR_xsave;
35307 break;
35308 case IX86_BUILTIN_XRSTOR:
35309 icode = CODE_FOR_xrstor;
35310 break;
35311 case IX86_BUILTIN_XSAVEOPT:
35312 icode = CODE_FOR_xsaveopt;
35313 break;
35314 default:
35315 gcc_unreachable ();
35316 }
35317 pat = GEN_FCN (icode) (op0, op1);
35318 }
35319
35320 if (pat)
35321 emit_insn (pat);
35322 return 0;
35323
35324 case IX86_BUILTIN_LLWPCB:
35325 arg0 = CALL_EXPR_ARG (exp, 0);
35326 op0 = expand_normal (arg0);
35327 icode = CODE_FOR_lwp_llwpcb;
35328 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35329 op0 = ix86_zero_extend_to_Pmode (op0);
35330 emit_insn (gen_lwp_llwpcb (op0));
35331 return 0;
35332
35333 case IX86_BUILTIN_SLWPCB:
35334 icode = CODE_FOR_lwp_slwpcb;
35335 if (!target
35336 || !insn_data[icode].operand[0].predicate (target, Pmode))
35337 target = gen_reg_rtx (Pmode);
35338 emit_insn (gen_lwp_slwpcb (target));
35339 return target;
35340
35341 case IX86_BUILTIN_BEXTRI32:
35342 case IX86_BUILTIN_BEXTRI64:
35343 arg0 = CALL_EXPR_ARG (exp, 0);
35344 arg1 = CALL_EXPR_ARG (exp, 1);
35345 op0 = expand_normal (arg0);
35346 op1 = expand_normal (arg1);
35347 icode = (fcode == IX86_BUILTIN_BEXTRI32
35348 ? CODE_FOR_tbm_bextri_si
35349 : CODE_FOR_tbm_bextri_di);
35350 if (!CONST_INT_P (op1))
35351 {
35352 error ("last argument must be an immediate");
35353 return const0_rtx;
35354 }
35355 else
35356 {
35357 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
35358 unsigned char lsb_index = INTVAL (op1) & 0xFF;
35359 op1 = GEN_INT (length);
35360 op2 = GEN_INT (lsb_index);
35361 pat = GEN_FCN (icode) (target, op0, op1, op2);
35362 if (pat)
35363 emit_insn (pat);
35364 return target;
35365 }
35366
35367 case IX86_BUILTIN_RDRAND16_STEP:
35368 icode = CODE_FOR_rdrandhi_1;
35369 mode0 = HImode;
35370 goto rdrand_step;
35371
35372 case IX86_BUILTIN_RDRAND32_STEP:
35373 icode = CODE_FOR_rdrandsi_1;
35374 mode0 = SImode;
35375 goto rdrand_step;
35376
35377 case IX86_BUILTIN_RDRAND64_STEP:
35378 icode = CODE_FOR_rdranddi_1;
35379 mode0 = DImode;
35380
35381 rdrand_step:
35382 op0 = gen_reg_rtx (mode0);
35383 emit_insn (GEN_FCN (icode) (op0));
35384
35385 arg0 = CALL_EXPR_ARG (exp, 0);
35386 op1 = expand_normal (arg0);
35387 if (!address_operand (op1, VOIDmode))
35388 {
35389 op1 = convert_memory_address (Pmode, op1);
35390 op1 = copy_addr_to_reg (op1);
35391 }
35392 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35393
35394 op1 = gen_reg_rtx (SImode);
35395 emit_move_insn (op1, CONST1_RTX (SImode));
35396
35397 /* Emit SImode conditional move. */
35398 if (mode0 == HImode)
35399 {
35400 op2 = gen_reg_rtx (SImode);
35401 emit_insn (gen_zero_extendhisi2 (op2, op0));
35402 }
35403 else if (mode0 == SImode)
35404 op2 = op0;
35405 else
35406 op2 = gen_rtx_SUBREG (SImode, op0, 0);
35407
35408 if (target == 0)
35409 target = gen_reg_rtx (SImode);
35410
35411 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
35412 const0_rtx);
35413 emit_insn (gen_rtx_SET (VOIDmode, target,
35414 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
35415 return target;
35416
35417 case IX86_BUILTIN_RDSEED16_STEP:
35418 icode = CODE_FOR_rdseedhi_1;
35419 mode0 = HImode;
35420 goto rdseed_step;
35421
35422 case IX86_BUILTIN_RDSEED32_STEP:
35423 icode = CODE_FOR_rdseedsi_1;
35424 mode0 = SImode;
35425 goto rdseed_step;
35426
35427 case IX86_BUILTIN_RDSEED64_STEP:
35428 icode = CODE_FOR_rdseeddi_1;
35429 mode0 = DImode;
35430
35431 rdseed_step:
35432 op0 = gen_reg_rtx (mode0);
35433 emit_insn (GEN_FCN (icode) (op0));
35434
35435 arg0 = CALL_EXPR_ARG (exp, 0);
35436 op1 = expand_normal (arg0);
35437 if (!address_operand (op1, VOIDmode))
35438 {
35439 op1 = convert_memory_address (Pmode, op1);
35440 op1 = copy_addr_to_reg (op1);
35441 }
35442 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35443
35444 op2 = gen_reg_rtx (QImode);
35445
35446 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
35447 const0_rtx);
35448 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
35449
35450 if (target == 0)
35451 target = gen_reg_rtx (SImode);
35452
35453 emit_insn (gen_zero_extendqisi2 (target, op2));
35454 return target;
35455
35456 case IX86_BUILTIN_ADDCARRYX32:
35457 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
35458 mode0 = SImode;
35459 goto addcarryx;
35460
35461 case IX86_BUILTIN_ADDCARRYX64:
35462 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
35463 mode0 = DImode;
35464
35465 addcarryx:
35466 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
35467 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
35468 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
35469 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
35470
35471 op0 = gen_reg_rtx (QImode);
35472
35473 /* Generate CF from input operand. */
35474 op1 = expand_normal (arg0);
35475 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
35476 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
35477
35478 /* Gen ADCX instruction to compute X+Y+CF. */
35479 op2 = expand_normal (arg1);
35480 op3 = expand_normal (arg2);
35481
35482 if (!REG_P (op2))
35483 op2 = copy_to_mode_reg (mode0, op2);
35484 if (!REG_P (op3))
35485 op3 = copy_to_mode_reg (mode0, op3);
35486
35487 op0 = gen_reg_rtx (mode0);
35488
35489 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
35490 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
35491 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
35492
35493 /* Store the result. */
35494 op4 = expand_normal (arg3);
35495 if (!address_operand (op4, VOIDmode))
35496 {
35497 op4 = convert_memory_address (Pmode, op4);
35498 op4 = copy_addr_to_reg (op4);
35499 }
35500 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
35501
35502 /* Return current CF value. */
35503 if (target == 0)
35504 target = gen_reg_rtx (QImode);
35505
35506 PUT_MODE (pat, QImode);
35507 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
35508 return target;
35509
35510 case IX86_BUILTIN_READ_FLAGS:
35511 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
35512
35513 if (optimize
35514 || target == NULL_RTX
35515 || !nonimmediate_operand (target, word_mode)
35516 || GET_MODE (target) != word_mode)
35517 target = gen_reg_rtx (word_mode);
35518
35519 emit_insn (gen_pop (target));
35520 return target;
35521
35522 case IX86_BUILTIN_WRITE_FLAGS:
35523
35524 arg0 = CALL_EXPR_ARG (exp, 0);
35525 op0 = expand_normal (arg0);
35526 if (!general_no_elim_operand (op0, word_mode))
35527 op0 = copy_to_mode_reg (word_mode, op0);
35528
35529 emit_insn (gen_push (op0));
35530 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
35531 return 0;
35532
35533 case IX86_BUILTIN_KORTESTC16:
35534 icode = CODE_FOR_kortestchi;
35535 mode0 = HImode;
35536 mode1 = CCCmode;
35537 goto kortest;
35538
35539 case IX86_BUILTIN_KORTESTZ16:
35540 icode = CODE_FOR_kortestzhi;
35541 mode0 = HImode;
35542 mode1 = CCZmode;
35543
35544 kortest:
35545 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
35546 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
35547 op0 = expand_normal (arg0);
35548 op1 = expand_normal (arg1);
35549
35550 op0 = copy_to_reg (op0);
35551 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
35552 op1 = copy_to_reg (op1);
35553 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
35554
35555 target = gen_reg_rtx (QImode);
35556 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
35557
35558 /* Emit kortest. */
35559 emit_insn (GEN_FCN (icode) (op0, op1));
35560 /* And use setcc to return result from flags. */
35561 ix86_expand_setcc (target, EQ,
35562 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
35563 return target;
35564
35565 case IX86_BUILTIN_GATHERSIV2DF:
35566 icode = CODE_FOR_avx2_gathersiv2df;
35567 goto gather_gen;
35568 case IX86_BUILTIN_GATHERSIV4DF:
35569 icode = CODE_FOR_avx2_gathersiv4df;
35570 goto gather_gen;
35571 case IX86_BUILTIN_GATHERDIV2DF:
35572 icode = CODE_FOR_avx2_gatherdiv2df;
35573 goto gather_gen;
35574 case IX86_BUILTIN_GATHERDIV4DF:
35575 icode = CODE_FOR_avx2_gatherdiv4df;
35576 goto gather_gen;
35577 case IX86_BUILTIN_GATHERSIV4SF:
35578 icode = CODE_FOR_avx2_gathersiv4sf;
35579 goto gather_gen;
35580 case IX86_BUILTIN_GATHERSIV8SF:
35581 icode = CODE_FOR_avx2_gathersiv8sf;
35582 goto gather_gen;
35583 case IX86_BUILTIN_GATHERDIV4SF:
35584 icode = CODE_FOR_avx2_gatherdiv4sf;
35585 goto gather_gen;
35586 case IX86_BUILTIN_GATHERDIV8SF:
35587 icode = CODE_FOR_avx2_gatherdiv8sf;
35588 goto gather_gen;
35589 case IX86_BUILTIN_GATHERSIV2DI:
35590 icode = CODE_FOR_avx2_gathersiv2di;
35591 goto gather_gen;
35592 case IX86_BUILTIN_GATHERSIV4DI:
35593 icode = CODE_FOR_avx2_gathersiv4di;
35594 goto gather_gen;
35595 case IX86_BUILTIN_GATHERDIV2DI:
35596 icode = CODE_FOR_avx2_gatherdiv2di;
35597 goto gather_gen;
35598 case IX86_BUILTIN_GATHERDIV4DI:
35599 icode = CODE_FOR_avx2_gatherdiv4di;
35600 goto gather_gen;
35601 case IX86_BUILTIN_GATHERSIV4SI:
35602 icode = CODE_FOR_avx2_gathersiv4si;
35603 goto gather_gen;
35604 case IX86_BUILTIN_GATHERSIV8SI:
35605 icode = CODE_FOR_avx2_gathersiv8si;
35606 goto gather_gen;
35607 case IX86_BUILTIN_GATHERDIV4SI:
35608 icode = CODE_FOR_avx2_gatherdiv4si;
35609 goto gather_gen;
35610 case IX86_BUILTIN_GATHERDIV8SI:
35611 icode = CODE_FOR_avx2_gatherdiv8si;
35612 goto gather_gen;
35613 case IX86_BUILTIN_GATHERALTSIV4DF:
35614 icode = CODE_FOR_avx2_gathersiv4df;
35615 goto gather_gen;
35616 case IX86_BUILTIN_GATHERALTDIV8SF:
35617 icode = CODE_FOR_avx2_gatherdiv8sf;
35618 goto gather_gen;
35619 case IX86_BUILTIN_GATHERALTSIV4DI:
35620 icode = CODE_FOR_avx2_gathersiv4di;
35621 goto gather_gen;
35622 case IX86_BUILTIN_GATHERALTDIV8SI:
35623 icode = CODE_FOR_avx2_gatherdiv8si;
35624 goto gather_gen;
35625 case IX86_BUILTIN_GATHER3SIV16SF:
35626 icode = CODE_FOR_avx512f_gathersiv16sf;
35627 goto gather_gen;
35628 case IX86_BUILTIN_GATHER3SIV8DF:
35629 icode = CODE_FOR_avx512f_gathersiv8df;
35630 goto gather_gen;
35631 case IX86_BUILTIN_GATHER3DIV16SF:
35632 icode = CODE_FOR_avx512f_gatherdiv16sf;
35633 goto gather_gen;
35634 case IX86_BUILTIN_GATHER3DIV8DF:
35635 icode = CODE_FOR_avx512f_gatherdiv8df;
35636 goto gather_gen;
35637 case IX86_BUILTIN_GATHER3SIV16SI:
35638 icode = CODE_FOR_avx512f_gathersiv16si;
35639 goto gather_gen;
35640 case IX86_BUILTIN_GATHER3SIV8DI:
35641 icode = CODE_FOR_avx512f_gathersiv8di;
35642 goto gather_gen;
35643 case IX86_BUILTIN_GATHER3DIV16SI:
35644 icode = CODE_FOR_avx512f_gatherdiv16si;
35645 goto gather_gen;
35646 case IX86_BUILTIN_GATHER3DIV8DI:
35647 icode = CODE_FOR_avx512f_gatherdiv8di;
35648 goto gather_gen;
35649 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35650 icode = CODE_FOR_avx512f_gathersiv8df;
35651 goto gather_gen;
35652 case IX86_BUILTIN_GATHER3ALTDIV16SF:
35653 icode = CODE_FOR_avx512f_gatherdiv16sf;
35654 goto gather_gen;
35655 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35656 icode = CODE_FOR_avx512f_gathersiv8di;
35657 goto gather_gen;
35658 case IX86_BUILTIN_GATHER3ALTDIV16SI:
35659 icode = CODE_FOR_avx512f_gatherdiv16si;
35660 goto gather_gen;
35661 case IX86_BUILTIN_SCATTERSIV16SF:
35662 icode = CODE_FOR_avx512f_scattersiv16sf;
35663 goto scatter_gen;
35664 case IX86_BUILTIN_SCATTERSIV8DF:
35665 icode = CODE_FOR_avx512f_scattersiv8df;
35666 goto scatter_gen;
35667 case IX86_BUILTIN_SCATTERDIV16SF:
35668 icode = CODE_FOR_avx512f_scatterdiv16sf;
35669 goto scatter_gen;
35670 case IX86_BUILTIN_SCATTERDIV8DF:
35671 icode = CODE_FOR_avx512f_scatterdiv8df;
35672 goto scatter_gen;
35673 case IX86_BUILTIN_SCATTERSIV16SI:
35674 icode = CODE_FOR_avx512f_scattersiv16si;
35675 goto scatter_gen;
35676 case IX86_BUILTIN_SCATTERSIV8DI:
35677 icode = CODE_FOR_avx512f_scattersiv8di;
35678 goto scatter_gen;
35679 case IX86_BUILTIN_SCATTERDIV16SI:
35680 icode = CODE_FOR_avx512f_scatterdiv16si;
35681 goto scatter_gen;
35682 case IX86_BUILTIN_SCATTERDIV8DI:
35683 icode = CODE_FOR_avx512f_scatterdiv8di;
35684 goto scatter_gen;
35685
35686 case IX86_BUILTIN_GATHERPFDPD:
35687 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
35688 goto vec_prefetch_gen;
35689 case IX86_BUILTIN_GATHERPFDPS:
35690 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
35691 goto vec_prefetch_gen;
35692 case IX86_BUILTIN_GATHERPFQPD:
35693 icode = CODE_FOR_avx512pf_gatherpfv8didf;
35694 goto vec_prefetch_gen;
35695 case IX86_BUILTIN_GATHERPFQPS:
35696 icode = CODE_FOR_avx512pf_gatherpfv8disf;
35697 goto vec_prefetch_gen;
35698 case IX86_BUILTIN_SCATTERPFDPD:
35699 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
35700 goto vec_prefetch_gen;
35701 case IX86_BUILTIN_SCATTERPFDPS:
35702 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
35703 goto vec_prefetch_gen;
35704 case IX86_BUILTIN_SCATTERPFQPD:
35705 icode = CODE_FOR_avx512pf_scatterpfv8didf;
35706 goto vec_prefetch_gen;
35707 case IX86_BUILTIN_SCATTERPFQPS:
35708 icode = CODE_FOR_avx512pf_scatterpfv8disf;
35709 goto vec_prefetch_gen;
35710
35711 gather_gen:
35712 rtx half;
35713 rtx (*gen) (rtx, rtx);
35714
35715 arg0 = CALL_EXPR_ARG (exp, 0);
35716 arg1 = CALL_EXPR_ARG (exp, 1);
35717 arg2 = CALL_EXPR_ARG (exp, 2);
35718 arg3 = CALL_EXPR_ARG (exp, 3);
35719 arg4 = CALL_EXPR_ARG (exp, 4);
35720 op0 = expand_normal (arg0);
35721 op1 = expand_normal (arg1);
35722 op2 = expand_normal (arg2);
35723 op3 = expand_normal (arg3);
35724 op4 = expand_normal (arg4);
35725 /* Note the arg order is different from the operand order. */
35726 mode0 = insn_data[icode].operand[1].mode;
35727 mode2 = insn_data[icode].operand[3].mode;
35728 mode3 = insn_data[icode].operand[4].mode;
35729 mode4 = insn_data[icode].operand[5].mode;
35730
35731 if (target == NULL_RTX
35732 || GET_MODE (target) != insn_data[icode].operand[0].mode
35733 || !insn_data[icode].operand[0].predicate (target,
35734 GET_MODE (target)))
35735 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
35736 else
35737 subtarget = target;
35738
35739 switch (fcode)
35740 {
35741 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35742 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35743 half = gen_reg_rtx (V8SImode);
35744 if (!nonimmediate_operand (op2, V16SImode))
35745 op2 = copy_to_mode_reg (V16SImode, op2);
35746 emit_insn (gen_vec_extract_lo_v16si (half, op2));
35747 op2 = half;
35748 break;
35749 case IX86_BUILTIN_GATHERALTSIV4DF:
35750 case IX86_BUILTIN_GATHERALTSIV4DI:
35751 half = gen_reg_rtx (V4SImode);
35752 if (!nonimmediate_operand (op2, V8SImode))
35753 op2 = copy_to_mode_reg (V8SImode, op2);
35754 emit_insn (gen_vec_extract_lo_v8si (half, op2));
35755 op2 = half;
35756 break;
35757 case IX86_BUILTIN_GATHER3ALTDIV16SF:
35758 case IX86_BUILTIN_GATHER3ALTDIV16SI:
35759 half = gen_reg_rtx (mode0);
35760 if (mode0 == V8SFmode)
35761 gen = gen_vec_extract_lo_v16sf;
35762 else
35763 gen = gen_vec_extract_lo_v16si;
35764 if (!nonimmediate_operand (op0, GET_MODE (op0)))
35765 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
35766 emit_insn (gen (half, op0));
35767 op0 = half;
35768 if (GET_MODE (op3) != VOIDmode)
35769 {
35770 if (!nonimmediate_operand (op3, GET_MODE (op3)))
35771 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
35772 emit_insn (gen (half, op3));
35773 op3 = half;
35774 }
35775 break;
35776 case IX86_BUILTIN_GATHERALTDIV8SF:
35777 case IX86_BUILTIN_GATHERALTDIV8SI:
35778 half = gen_reg_rtx (mode0);
35779 if (mode0 == V4SFmode)
35780 gen = gen_vec_extract_lo_v8sf;
35781 else
35782 gen = gen_vec_extract_lo_v8si;
35783 if (!nonimmediate_operand (op0, GET_MODE (op0)))
35784 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
35785 emit_insn (gen (half, op0));
35786 op0 = half;
35787 if (GET_MODE (op3) != VOIDmode)
35788 {
35789 if (!nonimmediate_operand (op3, GET_MODE (op3)))
35790 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
35791 emit_insn (gen (half, op3));
35792 op3 = half;
35793 }
35794 break;
35795 default:
35796 break;
35797 }
35798
35799 /* Force memory operand only with base register here. But we
35800 don't want to do it on memory operand for other builtin
35801 functions. */
35802 op1 = ix86_zero_extend_to_Pmode (op1);
35803
35804 if (!insn_data[icode].operand[1].predicate (op0, mode0))
35805 op0 = copy_to_mode_reg (mode0, op0);
35806 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
35807 op1 = copy_to_mode_reg (Pmode, op1);
35808 if (!insn_data[icode].operand[3].predicate (op2, mode2))
35809 op2 = copy_to_mode_reg (mode2, op2);
35810 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
35811 {
35812 if (!insn_data[icode].operand[4].predicate (op3, mode3))
35813 op3 = copy_to_mode_reg (mode3, op3);
35814 }
35815 else
35816 {
35817 op3 = copy_to_reg (op3);
35818 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
35819 }
35820 if (!insn_data[icode].operand[5].predicate (op4, mode4))
35821 {
35822 error ("the last argument must be scale 1, 2, 4, 8");
35823 return const0_rtx;
35824 }
35825
35826 /* Optimize. If mask is known to have all high bits set,
35827 replace op0 with pc_rtx to signal that the instruction
35828 overwrites the whole destination and doesn't use its
35829 previous contents. */
35830 if (optimize)
35831 {
35832 if (TREE_CODE (arg3) == INTEGER_CST)
35833 {
35834 if (integer_all_onesp (arg3))
35835 op0 = pc_rtx;
35836 }
35837 else if (TREE_CODE (arg3) == VECTOR_CST)
35838 {
35839 unsigned int negative = 0;
35840 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
35841 {
35842 tree cst = VECTOR_CST_ELT (arg3, i);
35843 if (TREE_CODE (cst) == INTEGER_CST
35844 && tree_int_cst_sign_bit (cst))
35845 negative++;
35846 else if (TREE_CODE (cst) == REAL_CST
35847 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
35848 negative++;
35849 }
35850 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
35851 op0 = pc_rtx;
35852 }
35853 else if (TREE_CODE (arg3) == SSA_NAME
35854 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
35855 {
35856 /* Recognize also when mask is like:
35857 __v2df src = _mm_setzero_pd ();
35858 __v2df mask = _mm_cmpeq_pd (src, src);
35859 or
35860 __v8sf src = _mm256_setzero_ps ();
35861 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
35862 as that is a cheaper way to load all ones into
35863 a register than having to load a constant from
35864 memory. */
35865 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
35866 if (is_gimple_call (def_stmt))
35867 {
35868 tree fndecl = gimple_call_fndecl (def_stmt);
35869 if (fndecl
35870 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
35871 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
35872 {
35873 case IX86_BUILTIN_CMPPD:
35874 case IX86_BUILTIN_CMPPS:
35875 case IX86_BUILTIN_CMPPD256:
35876 case IX86_BUILTIN_CMPPS256:
35877 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
35878 break;
35879 /* FALLTHRU */
35880 case IX86_BUILTIN_CMPEQPD:
35881 case IX86_BUILTIN_CMPEQPS:
35882 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
35883 && initializer_zerop (gimple_call_arg (def_stmt,
35884 1)))
35885 op0 = pc_rtx;
35886 break;
35887 default:
35888 break;
35889 }
35890 }
35891 }
35892 }
35893
35894 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
35895 if (! pat)
35896 return const0_rtx;
35897 emit_insn (pat);
35898
35899 switch (fcode)
35900 {
35901 case IX86_BUILTIN_GATHER3DIV16SF:
35902 if (target == NULL_RTX)
35903 target = gen_reg_rtx (V8SFmode);
35904 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
35905 break;
35906 case IX86_BUILTIN_GATHER3DIV16SI:
35907 if (target == NULL_RTX)
35908 target = gen_reg_rtx (V8SImode);
35909 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
35910 break;
35911 case IX86_BUILTIN_GATHERDIV8SF:
35912 if (target == NULL_RTX)
35913 target = gen_reg_rtx (V4SFmode);
35914 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
35915 break;
35916 case IX86_BUILTIN_GATHERDIV8SI:
35917 if (target == NULL_RTX)
35918 target = gen_reg_rtx (V4SImode);
35919 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
35920 break;
35921 default:
35922 target = subtarget;
35923 break;
35924 }
35925 return target;
35926
35927 scatter_gen:
35928 arg0 = CALL_EXPR_ARG (exp, 0);
35929 arg1 = CALL_EXPR_ARG (exp, 1);
35930 arg2 = CALL_EXPR_ARG (exp, 2);
35931 arg3 = CALL_EXPR_ARG (exp, 3);
35932 arg4 = CALL_EXPR_ARG (exp, 4);
35933 op0 = expand_normal (arg0);
35934 op1 = expand_normal (arg1);
35935 op2 = expand_normal (arg2);
35936 op3 = expand_normal (arg3);
35937 op4 = expand_normal (arg4);
35938 mode1 = insn_data[icode].operand[1].mode;
35939 mode2 = insn_data[icode].operand[2].mode;
35940 mode3 = insn_data[icode].operand[3].mode;
35941 mode4 = insn_data[icode].operand[4].mode;
35942
35943 /* Force memory operand only with base register here. But we
35944 don't want to do it on memory operand for other builtin
35945 functions. */
35946 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
35947
35948 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35949 op0 = copy_to_mode_reg (Pmode, op0);
35950
35951 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
35952 {
35953 if (!insn_data[icode].operand[1].predicate (op1, mode1))
35954 op1 = copy_to_mode_reg (mode1, op1);
35955 }
35956 else
35957 {
35958 op1 = copy_to_reg (op1);
35959 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
35960 }
35961
35962 if (!insn_data[icode].operand[2].predicate (op2, mode2))
35963 op2 = copy_to_mode_reg (mode2, op2);
35964
35965 if (!insn_data[icode].operand[3].predicate (op3, mode3))
35966 op3 = copy_to_mode_reg (mode3, op3);
35967
35968 if (!insn_data[icode].operand[4].predicate (op4, mode4))
35969 {
35970 error ("the last argument must be scale 1, 2, 4, 8");
35971 return const0_rtx;
35972 }
35973
35974 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
35975 if (! pat)
35976 return const0_rtx;
35977
35978 emit_insn (pat);
35979 return 0;
35980
35981 vec_prefetch_gen:
35982 arg0 = CALL_EXPR_ARG (exp, 0);
35983 arg1 = CALL_EXPR_ARG (exp, 1);
35984 arg2 = CALL_EXPR_ARG (exp, 2);
35985 arg3 = CALL_EXPR_ARG (exp, 3);
35986 arg4 = CALL_EXPR_ARG (exp, 4);
35987 op0 = expand_normal (arg0);
35988 op1 = expand_normal (arg1);
35989 op2 = expand_normal (arg2);
35990 op3 = expand_normal (arg3);
35991 op4 = expand_normal (arg4);
35992 mode0 = insn_data[icode].operand[0].mode;
35993 mode1 = insn_data[icode].operand[1].mode;
35994 mode3 = insn_data[icode].operand[3].mode;
35995 mode4 = insn_data[icode].operand[4].mode;
35996
35997 if (GET_MODE (op0) == mode0
35998 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
35999 {
36000 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36001 op0 = copy_to_mode_reg (mode0, op0);
36002 }
36003 else if (op0 != constm1_rtx)
36004 {
36005 op0 = copy_to_reg (op0);
36006 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
36007 }
36008
36009 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36010 op1 = copy_to_mode_reg (mode1, op1);
36011
36012 /* Force memory operand only with base register here. But we
36013 don't want to do it on memory operand for other builtin
36014 functions. */
36015 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
36016
36017 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
36018 op2 = copy_to_mode_reg (Pmode, op2);
36019
36020 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36021 {
36022 error ("the forth argument must be scale 1, 2, 4, 8");
36023 return const0_rtx;
36024 }
36025
36026 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36027 {
36028 error ("the last argument must be hint 0 or 1");
36029 return const0_rtx;
36030 }
36031
36032 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36033 if (! pat)
36034 return const0_rtx;
36035
36036 emit_insn (pat);
36037
36038 return 0;
36039
36040 case IX86_BUILTIN_XABORT:
36041 icode = CODE_FOR_xabort;
36042 arg0 = CALL_EXPR_ARG (exp, 0);
36043 op0 = expand_normal (arg0);
36044 mode0 = insn_data[icode].operand[0].mode;
36045 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36046 {
36047 error ("the xabort's argument must be an 8-bit immediate");
36048 return const0_rtx;
36049 }
36050 emit_insn (gen_xabort (op0));
36051 return 0;
36052
36053 default:
36054 break;
36055 }
36056
36057 for (i = 0, d = bdesc_special_args;
36058 i < ARRAY_SIZE (bdesc_special_args);
36059 i++, d++)
36060 if (d->code == fcode)
36061 return ix86_expand_special_args_builtin (d, exp, target);
36062
36063 for (i = 0, d = bdesc_args;
36064 i < ARRAY_SIZE (bdesc_args);
36065 i++, d++)
36066 if (d->code == fcode)
36067 switch (fcode)
36068 {
36069 case IX86_BUILTIN_FABSQ:
36070 case IX86_BUILTIN_COPYSIGNQ:
36071 if (!TARGET_SSE)
36072 /* Emit a normal call if SSE isn't available. */
36073 return expand_call (exp, target, ignore);
36074 default:
36075 return ix86_expand_args_builtin (d, exp, target);
36076 }
36077
36078 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
36079 if (d->code == fcode)
36080 return ix86_expand_sse_comi (d, exp, target);
36081
36082 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
36083 if (d->code == fcode)
36084 return ix86_expand_round_builtin (d, exp, target);
36085
36086 for (i = 0, d = bdesc_pcmpestr;
36087 i < ARRAY_SIZE (bdesc_pcmpestr);
36088 i++, d++)
36089 if (d->code == fcode)
36090 return ix86_expand_sse_pcmpestr (d, exp, target);
36091
36092 for (i = 0, d = bdesc_pcmpistr;
36093 i < ARRAY_SIZE (bdesc_pcmpistr);
36094 i++, d++)
36095 if (d->code == fcode)
36096 return ix86_expand_sse_pcmpistr (d, exp, target);
36097
36098 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
36099 if (d->code == fcode)
36100 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
36101 (enum ix86_builtin_func_type)
36102 d->flag, d->comparison);
36103
36104 gcc_unreachable ();
36105 }
36106
36107 /* This returns the target-specific builtin with code CODE if
36108 current_function_decl has visibility on this builtin, which is checked
36109 using isa flags. Returns NULL_TREE otherwise. */
36110
36111 static tree ix86_get_builtin (enum ix86_builtins code)
36112 {
36113 struct cl_target_option *opts;
36114 tree target_tree = NULL_TREE;
36115
36116 /* Determine the isa flags of current_function_decl. */
36117
36118 if (current_function_decl)
36119 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
36120
36121 if (target_tree == NULL)
36122 target_tree = target_option_default_node;
36123
36124 opts = TREE_TARGET_OPTION (target_tree);
36125
36126 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
36127 return ix86_builtin_decl (code, true);
36128 else
36129 return NULL_TREE;
36130 }
36131
36132 /* Returns a function decl for a vectorized version of the builtin function
36133 with builtin function code FN and the result vector type TYPE, or NULL_TREE
36134 if it is not available. */
36135
36136 static tree
36137 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
36138 tree type_in)
36139 {
36140 enum machine_mode in_mode, out_mode;
36141 int in_n, out_n;
36142 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
36143
36144 if (TREE_CODE (type_out) != VECTOR_TYPE
36145 || TREE_CODE (type_in) != VECTOR_TYPE
36146 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
36147 return NULL_TREE;
36148
36149 out_mode = TYPE_MODE (TREE_TYPE (type_out));
36150 out_n = TYPE_VECTOR_SUBPARTS (type_out);
36151 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36152 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36153
36154 switch (fn)
36155 {
36156 case BUILT_IN_SQRT:
36157 if (out_mode == DFmode && in_mode == DFmode)
36158 {
36159 if (out_n == 2 && in_n == 2)
36160 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
36161 else if (out_n == 4 && in_n == 4)
36162 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
36163 else if (out_n == 8 && in_n == 8)
36164 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
36165 }
36166 break;
36167
36168 case BUILT_IN_EXP2F:
36169 if (out_mode == SFmode && in_mode == SFmode)
36170 {
36171 if (out_n == 16 && in_n == 16)
36172 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
36173 }
36174 break;
36175
36176 case BUILT_IN_SQRTF:
36177 if (out_mode == SFmode && in_mode == SFmode)
36178 {
36179 if (out_n == 4 && in_n == 4)
36180 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
36181 else if (out_n == 8 && in_n == 8)
36182 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
36183 else if (out_n == 16 && in_n == 16)
36184 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
36185 }
36186 break;
36187
36188 case BUILT_IN_IFLOOR:
36189 case BUILT_IN_LFLOOR:
36190 case BUILT_IN_LLFLOOR:
36191 /* The round insn does not trap on denormals. */
36192 if (flag_trapping_math || !TARGET_ROUND)
36193 break;
36194
36195 if (out_mode == SImode && in_mode == DFmode)
36196 {
36197 if (out_n == 4 && in_n == 2)
36198 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
36199 else if (out_n == 8 && in_n == 4)
36200 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
36201 else if (out_n == 16 && in_n == 8)
36202 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
36203 }
36204 break;
36205
36206 case BUILT_IN_IFLOORF:
36207 case BUILT_IN_LFLOORF:
36208 case BUILT_IN_LLFLOORF:
36209 /* The round insn does not trap on denormals. */
36210 if (flag_trapping_math || !TARGET_ROUND)
36211 break;
36212
36213 if (out_mode == SImode && in_mode == SFmode)
36214 {
36215 if (out_n == 4 && in_n == 4)
36216 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
36217 else if (out_n == 8 && in_n == 8)
36218 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
36219 }
36220 break;
36221
36222 case BUILT_IN_ICEIL:
36223 case BUILT_IN_LCEIL:
36224 case BUILT_IN_LLCEIL:
36225 /* The round insn does not trap on denormals. */
36226 if (flag_trapping_math || !TARGET_ROUND)
36227 break;
36228
36229 if (out_mode == SImode && in_mode == DFmode)
36230 {
36231 if (out_n == 4 && in_n == 2)
36232 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
36233 else if (out_n == 8 && in_n == 4)
36234 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
36235 else if (out_n == 16 && in_n == 8)
36236 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
36237 }
36238 break;
36239
36240 case BUILT_IN_ICEILF:
36241 case BUILT_IN_LCEILF:
36242 case BUILT_IN_LLCEILF:
36243 /* The round insn does not trap on denormals. */
36244 if (flag_trapping_math || !TARGET_ROUND)
36245 break;
36246
36247 if (out_mode == SImode && in_mode == SFmode)
36248 {
36249 if (out_n == 4 && in_n == 4)
36250 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
36251 else if (out_n == 8 && in_n == 8)
36252 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
36253 }
36254 break;
36255
36256 case BUILT_IN_IRINT:
36257 case BUILT_IN_LRINT:
36258 case BUILT_IN_LLRINT:
36259 if (out_mode == SImode && in_mode == DFmode)
36260 {
36261 if (out_n == 4 && in_n == 2)
36262 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
36263 else if (out_n == 8 && in_n == 4)
36264 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
36265 }
36266 break;
36267
36268 case BUILT_IN_IRINTF:
36269 case BUILT_IN_LRINTF:
36270 case BUILT_IN_LLRINTF:
36271 if (out_mode == SImode && in_mode == SFmode)
36272 {
36273 if (out_n == 4 && in_n == 4)
36274 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
36275 else if (out_n == 8 && in_n == 8)
36276 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
36277 }
36278 break;
36279
36280 case BUILT_IN_IROUND:
36281 case BUILT_IN_LROUND:
36282 case BUILT_IN_LLROUND:
36283 /* The round insn does not trap on denormals. */
36284 if (flag_trapping_math || !TARGET_ROUND)
36285 break;
36286
36287 if (out_mode == SImode && in_mode == DFmode)
36288 {
36289 if (out_n == 4 && in_n == 2)
36290 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
36291 else if (out_n == 8 && in_n == 4)
36292 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
36293 else if (out_n == 16 && in_n == 8)
36294 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
36295 }
36296 break;
36297
36298 case BUILT_IN_IROUNDF:
36299 case BUILT_IN_LROUNDF:
36300 case BUILT_IN_LLROUNDF:
36301 /* The round insn does not trap on denormals. */
36302 if (flag_trapping_math || !TARGET_ROUND)
36303 break;
36304
36305 if (out_mode == SImode && in_mode == SFmode)
36306 {
36307 if (out_n == 4 && in_n == 4)
36308 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
36309 else if (out_n == 8 && in_n == 8)
36310 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
36311 }
36312 break;
36313
36314 case BUILT_IN_COPYSIGN:
36315 if (out_mode == DFmode && in_mode == DFmode)
36316 {
36317 if (out_n == 2 && in_n == 2)
36318 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
36319 else if (out_n == 4 && in_n == 4)
36320 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
36321 else if (out_n == 8 && in_n == 8)
36322 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
36323 }
36324 break;
36325
36326 case BUILT_IN_COPYSIGNF:
36327 if (out_mode == SFmode && in_mode == SFmode)
36328 {
36329 if (out_n == 4 && in_n == 4)
36330 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
36331 else if (out_n == 8 && in_n == 8)
36332 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
36333 else if (out_n == 16 && in_n == 16)
36334 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
36335 }
36336 break;
36337
36338 case BUILT_IN_FLOOR:
36339 /* The round insn does not trap on denormals. */
36340 if (flag_trapping_math || !TARGET_ROUND)
36341 break;
36342
36343 if (out_mode == DFmode && in_mode == DFmode)
36344 {
36345 if (out_n == 2 && in_n == 2)
36346 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
36347 else if (out_n == 4 && in_n == 4)
36348 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
36349 }
36350 break;
36351
36352 case BUILT_IN_FLOORF:
36353 /* The round insn does not trap on denormals. */
36354 if (flag_trapping_math || !TARGET_ROUND)
36355 break;
36356
36357 if (out_mode == SFmode && in_mode == SFmode)
36358 {
36359 if (out_n == 4 && in_n == 4)
36360 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
36361 else if (out_n == 8 && in_n == 8)
36362 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
36363 }
36364 break;
36365
36366 case BUILT_IN_CEIL:
36367 /* The round insn does not trap on denormals. */
36368 if (flag_trapping_math || !TARGET_ROUND)
36369 break;
36370
36371 if (out_mode == DFmode && in_mode == DFmode)
36372 {
36373 if (out_n == 2 && in_n == 2)
36374 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
36375 else if (out_n == 4 && in_n == 4)
36376 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
36377 }
36378 break;
36379
36380 case BUILT_IN_CEILF:
36381 /* The round insn does not trap on denormals. */
36382 if (flag_trapping_math || !TARGET_ROUND)
36383 break;
36384
36385 if (out_mode == SFmode && in_mode == SFmode)
36386 {
36387 if (out_n == 4 && in_n == 4)
36388 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
36389 else if (out_n == 8 && in_n == 8)
36390 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
36391 }
36392 break;
36393
36394 case BUILT_IN_TRUNC:
36395 /* The round insn does not trap on denormals. */
36396 if (flag_trapping_math || !TARGET_ROUND)
36397 break;
36398
36399 if (out_mode == DFmode && in_mode == DFmode)
36400 {
36401 if (out_n == 2 && in_n == 2)
36402 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
36403 else if (out_n == 4 && in_n == 4)
36404 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
36405 }
36406 break;
36407
36408 case BUILT_IN_TRUNCF:
36409 /* The round insn does not trap on denormals. */
36410 if (flag_trapping_math || !TARGET_ROUND)
36411 break;
36412
36413 if (out_mode == SFmode && in_mode == SFmode)
36414 {
36415 if (out_n == 4 && in_n == 4)
36416 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
36417 else if (out_n == 8 && in_n == 8)
36418 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
36419 }
36420 break;
36421
36422 case BUILT_IN_RINT:
36423 /* The round insn does not trap on denormals. */
36424 if (flag_trapping_math || !TARGET_ROUND)
36425 break;
36426
36427 if (out_mode == DFmode && in_mode == DFmode)
36428 {
36429 if (out_n == 2 && in_n == 2)
36430 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
36431 else if (out_n == 4 && in_n == 4)
36432 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
36433 }
36434 break;
36435
36436 case BUILT_IN_RINTF:
36437 /* The round insn does not trap on denormals. */
36438 if (flag_trapping_math || !TARGET_ROUND)
36439 break;
36440
36441 if (out_mode == SFmode && in_mode == SFmode)
36442 {
36443 if (out_n == 4 && in_n == 4)
36444 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
36445 else if (out_n == 8 && in_n == 8)
36446 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
36447 }
36448 break;
36449
36450 case BUILT_IN_ROUND:
36451 /* The round insn does not trap on denormals. */
36452 if (flag_trapping_math || !TARGET_ROUND)
36453 break;
36454
36455 if (out_mode == DFmode && in_mode == DFmode)
36456 {
36457 if (out_n == 2 && in_n == 2)
36458 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
36459 else if (out_n == 4 && in_n == 4)
36460 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
36461 }
36462 break;
36463
36464 case BUILT_IN_ROUNDF:
36465 /* The round insn does not trap on denormals. */
36466 if (flag_trapping_math || !TARGET_ROUND)
36467 break;
36468
36469 if (out_mode == SFmode && in_mode == SFmode)
36470 {
36471 if (out_n == 4 && in_n == 4)
36472 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
36473 else if (out_n == 8 && in_n == 8)
36474 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
36475 }
36476 break;
36477
36478 case BUILT_IN_FMA:
36479 if (out_mode == DFmode && in_mode == DFmode)
36480 {
36481 if (out_n == 2 && in_n == 2)
36482 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
36483 if (out_n == 4 && in_n == 4)
36484 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
36485 }
36486 break;
36487
36488 case BUILT_IN_FMAF:
36489 if (out_mode == SFmode && in_mode == SFmode)
36490 {
36491 if (out_n == 4 && in_n == 4)
36492 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
36493 if (out_n == 8 && in_n == 8)
36494 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
36495 }
36496 break;
36497
36498 default:
36499 break;
36500 }
36501
36502 /* Dispatch to a handler for a vectorization library. */
36503 if (ix86_veclib_handler)
36504 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
36505 type_in);
36506
36507 return NULL_TREE;
36508 }
36509
36510 /* Handler for an SVML-style interface to
36511 a library with vectorized intrinsics. */
36512
36513 static tree
36514 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
36515 {
36516 char name[20];
36517 tree fntype, new_fndecl, args;
36518 unsigned arity;
36519 const char *bname;
36520 enum machine_mode el_mode, in_mode;
36521 int n, in_n;
36522
36523 /* The SVML is suitable for unsafe math only. */
36524 if (!flag_unsafe_math_optimizations)
36525 return NULL_TREE;
36526
36527 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36528 n = TYPE_VECTOR_SUBPARTS (type_out);
36529 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36530 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36531 if (el_mode != in_mode
36532 || n != in_n)
36533 return NULL_TREE;
36534
36535 switch (fn)
36536 {
36537 case BUILT_IN_EXP:
36538 case BUILT_IN_LOG:
36539 case BUILT_IN_LOG10:
36540 case BUILT_IN_POW:
36541 case BUILT_IN_TANH:
36542 case BUILT_IN_TAN:
36543 case BUILT_IN_ATAN:
36544 case BUILT_IN_ATAN2:
36545 case BUILT_IN_ATANH:
36546 case BUILT_IN_CBRT:
36547 case BUILT_IN_SINH:
36548 case BUILT_IN_SIN:
36549 case BUILT_IN_ASINH:
36550 case BUILT_IN_ASIN:
36551 case BUILT_IN_COSH:
36552 case BUILT_IN_COS:
36553 case BUILT_IN_ACOSH:
36554 case BUILT_IN_ACOS:
36555 if (el_mode != DFmode || n != 2)
36556 return NULL_TREE;
36557 break;
36558
36559 case BUILT_IN_EXPF:
36560 case BUILT_IN_LOGF:
36561 case BUILT_IN_LOG10F:
36562 case BUILT_IN_POWF:
36563 case BUILT_IN_TANHF:
36564 case BUILT_IN_TANF:
36565 case BUILT_IN_ATANF:
36566 case BUILT_IN_ATAN2F:
36567 case BUILT_IN_ATANHF:
36568 case BUILT_IN_CBRTF:
36569 case BUILT_IN_SINHF:
36570 case BUILT_IN_SINF:
36571 case BUILT_IN_ASINHF:
36572 case BUILT_IN_ASINF:
36573 case BUILT_IN_COSHF:
36574 case BUILT_IN_COSF:
36575 case BUILT_IN_ACOSHF:
36576 case BUILT_IN_ACOSF:
36577 if (el_mode != SFmode || n != 4)
36578 return NULL_TREE;
36579 break;
36580
36581 default:
36582 return NULL_TREE;
36583 }
36584
36585 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36586
36587 if (fn == BUILT_IN_LOGF)
36588 strcpy (name, "vmlsLn4");
36589 else if (fn == BUILT_IN_LOG)
36590 strcpy (name, "vmldLn2");
36591 else if (n == 4)
36592 {
36593 sprintf (name, "vmls%s", bname+10);
36594 name[strlen (name)-1] = '4';
36595 }
36596 else
36597 sprintf (name, "vmld%s2", bname+10);
36598
36599 /* Convert to uppercase. */
36600 name[4] &= ~0x20;
36601
36602 arity = 0;
36603 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36604 args;
36605 args = TREE_CHAIN (args))
36606 arity++;
36607
36608 if (arity == 1)
36609 fntype = build_function_type_list (type_out, type_in, NULL);
36610 else
36611 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36612
36613 /* Build a function declaration for the vectorized function. */
36614 new_fndecl = build_decl (BUILTINS_LOCATION,
36615 FUNCTION_DECL, get_identifier (name), fntype);
36616 TREE_PUBLIC (new_fndecl) = 1;
36617 DECL_EXTERNAL (new_fndecl) = 1;
36618 DECL_IS_NOVOPS (new_fndecl) = 1;
36619 TREE_READONLY (new_fndecl) = 1;
36620
36621 return new_fndecl;
36622 }
36623
36624 /* Handler for an ACML-style interface to
36625 a library with vectorized intrinsics. */
36626
36627 static tree
36628 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
36629 {
36630 char name[20] = "__vr.._";
36631 tree fntype, new_fndecl, args;
36632 unsigned arity;
36633 const char *bname;
36634 enum machine_mode el_mode, in_mode;
36635 int n, in_n;
36636
36637 /* The ACML is 64bits only and suitable for unsafe math only as
36638 it does not correctly support parts of IEEE with the required
36639 precision such as denormals. */
36640 if (!TARGET_64BIT
36641 || !flag_unsafe_math_optimizations)
36642 return NULL_TREE;
36643
36644 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36645 n = TYPE_VECTOR_SUBPARTS (type_out);
36646 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36647 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36648 if (el_mode != in_mode
36649 || n != in_n)
36650 return NULL_TREE;
36651
36652 switch (fn)
36653 {
36654 case BUILT_IN_SIN:
36655 case BUILT_IN_COS:
36656 case BUILT_IN_EXP:
36657 case BUILT_IN_LOG:
36658 case BUILT_IN_LOG2:
36659 case BUILT_IN_LOG10:
36660 name[4] = 'd';
36661 name[5] = '2';
36662 if (el_mode != DFmode
36663 || n != 2)
36664 return NULL_TREE;
36665 break;
36666
36667 case BUILT_IN_SINF:
36668 case BUILT_IN_COSF:
36669 case BUILT_IN_EXPF:
36670 case BUILT_IN_POWF:
36671 case BUILT_IN_LOGF:
36672 case BUILT_IN_LOG2F:
36673 case BUILT_IN_LOG10F:
36674 name[4] = 's';
36675 name[5] = '4';
36676 if (el_mode != SFmode
36677 || n != 4)
36678 return NULL_TREE;
36679 break;
36680
36681 default:
36682 return NULL_TREE;
36683 }
36684
36685 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36686 sprintf (name + 7, "%s", bname+10);
36687
36688 arity = 0;
36689 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36690 args;
36691 args = TREE_CHAIN (args))
36692 arity++;
36693
36694 if (arity == 1)
36695 fntype = build_function_type_list (type_out, type_in, NULL);
36696 else
36697 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36698
36699 /* Build a function declaration for the vectorized function. */
36700 new_fndecl = build_decl (BUILTINS_LOCATION,
36701 FUNCTION_DECL, get_identifier (name), fntype);
36702 TREE_PUBLIC (new_fndecl) = 1;
36703 DECL_EXTERNAL (new_fndecl) = 1;
36704 DECL_IS_NOVOPS (new_fndecl) = 1;
36705 TREE_READONLY (new_fndecl) = 1;
36706
36707 return new_fndecl;
36708 }
36709
36710 /* Returns a decl of a function that implements gather load with
36711 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
36712 Return NULL_TREE if it is not available. */
36713
36714 static tree
36715 ix86_vectorize_builtin_gather (const_tree mem_vectype,
36716 const_tree index_type, int scale)
36717 {
36718 bool si;
36719 enum ix86_builtins code;
36720
36721 if (! TARGET_AVX2)
36722 return NULL_TREE;
36723
36724 if ((TREE_CODE (index_type) != INTEGER_TYPE
36725 && !POINTER_TYPE_P (index_type))
36726 || (TYPE_MODE (index_type) != SImode
36727 && TYPE_MODE (index_type) != DImode))
36728 return NULL_TREE;
36729
36730 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
36731 return NULL_TREE;
36732
36733 /* v*gather* insn sign extends index to pointer mode. */
36734 if (TYPE_PRECISION (index_type) < POINTER_SIZE
36735 && TYPE_UNSIGNED (index_type))
36736 return NULL_TREE;
36737
36738 if (scale <= 0
36739 || scale > 8
36740 || (scale & (scale - 1)) != 0)
36741 return NULL_TREE;
36742
36743 si = TYPE_MODE (index_type) == SImode;
36744 switch (TYPE_MODE (mem_vectype))
36745 {
36746 case V2DFmode:
36747 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
36748 break;
36749 case V4DFmode:
36750 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
36751 break;
36752 case V2DImode:
36753 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
36754 break;
36755 case V4DImode:
36756 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
36757 break;
36758 case V4SFmode:
36759 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
36760 break;
36761 case V8SFmode:
36762 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
36763 break;
36764 case V4SImode:
36765 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
36766 break;
36767 case V8SImode:
36768 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
36769 break;
36770 case V8DFmode:
36771 if (TARGET_AVX512F)
36772 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
36773 else
36774 return NULL_TREE;
36775 break;
36776 case V8DImode:
36777 if (TARGET_AVX512F)
36778 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
36779 else
36780 return NULL_TREE;
36781 break;
36782 case V16SFmode:
36783 if (TARGET_AVX512F)
36784 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
36785 else
36786 return NULL_TREE;
36787 break;
36788 case V16SImode:
36789 if (TARGET_AVX512F)
36790 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
36791 else
36792 return NULL_TREE;
36793 break;
36794 default:
36795 return NULL_TREE;
36796 }
36797
36798 return ix86_get_builtin (code);
36799 }
36800
36801 /* Returns a code for a target-specific builtin that implements
36802 reciprocal of the function, or NULL_TREE if not available. */
36803
36804 static tree
36805 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
36806 bool sqrt ATTRIBUTE_UNUSED)
36807 {
36808 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
36809 && flag_finite_math_only && !flag_trapping_math
36810 && flag_unsafe_math_optimizations))
36811 return NULL_TREE;
36812
36813 if (md_fn)
36814 /* Machine dependent builtins. */
36815 switch (fn)
36816 {
36817 /* Vectorized version of sqrt to rsqrt conversion. */
36818 case IX86_BUILTIN_SQRTPS_NR:
36819 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
36820
36821 case IX86_BUILTIN_SQRTPS_NR256:
36822 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
36823
36824 default:
36825 return NULL_TREE;
36826 }
36827 else
36828 /* Normal builtins. */
36829 switch (fn)
36830 {
36831 /* Sqrt to rsqrt conversion. */
36832 case BUILT_IN_SQRTF:
36833 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
36834
36835 default:
36836 return NULL_TREE;
36837 }
36838 }
36839 \f
36840 /* Helper for avx_vpermilps256_operand et al. This is also used by
36841 the expansion functions to turn the parallel back into a mask.
36842 The return value is 0 for no match and the imm8+1 for a match. */
36843
36844 int
36845 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
36846 {
36847 unsigned i, nelt = GET_MODE_NUNITS (mode);
36848 unsigned mask = 0;
36849 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
36850
36851 if (XVECLEN (par, 0) != (int) nelt)
36852 return 0;
36853
36854 /* Validate that all of the elements are constants, and not totally
36855 out of range. Copy the data into an integral array to make the
36856 subsequent checks easier. */
36857 for (i = 0; i < nelt; ++i)
36858 {
36859 rtx er = XVECEXP (par, 0, i);
36860 unsigned HOST_WIDE_INT ei;
36861
36862 if (!CONST_INT_P (er))
36863 return 0;
36864 ei = INTVAL (er);
36865 if (ei >= nelt)
36866 return 0;
36867 ipar[i] = ei;
36868 }
36869
36870 switch (mode)
36871 {
36872 case V8DFmode:
36873 /* In the 512-bit DFmode case, we can only move elements within
36874 a 128-bit lane. First fill the second part of the mask,
36875 then fallthru. */
36876 for (i = 4; i < 6; ++i)
36877 {
36878 if (ipar[i] < 4 || ipar[i] >= 6)
36879 return 0;
36880 mask |= (ipar[i] - 4) << i;
36881 }
36882 for (i = 6; i < 8; ++i)
36883 {
36884 if (ipar[i] < 6)
36885 return 0;
36886 mask |= (ipar[i] - 6) << i;
36887 }
36888 /* FALLTHRU */
36889
36890 case V4DFmode:
36891 /* In the 256-bit DFmode case, we can only move elements within
36892 a 128-bit lane. */
36893 for (i = 0; i < 2; ++i)
36894 {
36895 if (ipar[i] >= 2)
36896 return 0;
36897 mask |= ipar[i] << i;
36898 }
36899 for (i = 2; i < 4; ++i)
36900 {
36901 if (ipar[i] < 2)
36902 return 0;
36903 mask |= (ipar[i] - 2) << i;
36904 }
36905 break;
36906
36907 case V16SFmode:
36908 /* In 512 bit SFmode case, permutation in the upper 256 bits
36909 must mirror the permutation in the lower 256-bits. */
36910 for (i = 0; i < 8; ++i)
36911 if (ipar[i] + 8 != ipar[i + 8])
36912 return 0;
36913 /* FALLTHRU */
36914
36915 case V8SFmode:
36916 /* In 256 bit SFmode case, we have full freedom of
36917 movement within the low 128-bit lane, but the high 128-bit
36918 lane must mirror the exact same pattern. */
36919 for (i = 0; i < 4; ++i)
36920 if (ipar[i] + 4 != ipar[i + 4])
36921 return 0;
36922 nelt = 4;
36923 /* FALLTHRU */
36924
36925 case V2DFmode:
36926 case V4SFmode:
36927 /* In the 128-bit case, we've full freedom in the placement of
36928 the elements from the source operand. */
36929 for (i = 0; i < nelt; ++i)
36930 mask |= ipar[i] << (i * (nelt / 2));
36931 break;
36932
36933 default:
36934 gcc_unreachable ();
36935 }
36936
36937 /* Make sure success has a non-zero value by adding one. */
36938 return mask + 1;
36939 }
36940
36941 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
36942 the expansion functions to turn the parallel back into a mask.
36943 The return value is 0 for no match and the imm8+1 for a match. */
36944
36945 int
36946 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
36947 {
36948 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
36949 unsigned mask = 0;
36950 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
36951
36952 if (XVECLEN (par, 0) != (int) nelt)
36953 return 0;
36954
36955 /* Validate that all of the elements are constants, and not totally
36956 out of range. Copy the data into an integral array to make the
36957 subsequent checks easier. */
36958 for (i = 0; i < nelt; ++i)
36959 {
36960 rtx er = XVECEXP (par, 0, i);
36961 unsigned HOST_WIDE_INT ei;
36962
36963 if (!CONST_INT_P (er))
36964 return 0;
36965 ei = INTVAL (er);
36966 if (ei >= 2 * nelt)
36967 return 0;
36968 ipar[i] = ei;
36969 }
36970
36971 /* Validate that the halves of the permute are halves. */
36972 for (i = 0; i < nelt2 - 1; ++i)
36973 if (ipar[i] + 1 != ipar[i + 1])
36974 return 0;
36975 for (i = nelt2; i < nelt - 1; ++i)
36976 if (ipar[i] + 1 != ipar[i + 1])
36977 return 0;
36978
36979 /* Reconstruct the mask. */
36980 for (i = 0; i < 2; ++i)
36981 {
36982 unsigned e = ipar[i * nelt2];
36983 if (e % nelt2)
36984 return 0;
36985 e /= nelt2;
36986 mask |= e << (i * 4);
36987 }
36988
36989 /* Make sure success has a non-zero value by adding one. */
36990 return mask + 1;
36991 }
36992 \f
36993 /* Store OPERAND to the memory after reload is completed. This means
36994 that we can't easily use assign_stack_local. */
36995 rtx
36996 ix86_force_to_memory (enum machine_mode mode, rtx operand)
36997 {
36998 rtx result;
36999
37000 gcc_assert (reload_completed);
37001 if (ix86_using_red_zone ())
37002 {
37003 result = gen_rtx_MEM (mode,
37004 gen_rtx_PLUS (Pmode,
37005 stack_pointer_rtx,
37006 GEN_INT (-RED_ZONE_SIZE)));
37007 emit_move_insn (result, operand);
37008 }
37009 else if (TARGET_64BIT)
37010 {
37011 switch (mode)
37012 {
37013 case HImode:
37014 case SImode:
37015 operand = gen_lowpart (DImode, operand);
37016 /* FALLTHRU */
37017 case DImode:
37018 emit_insn (
37019 gen_rtx_SET (VOIDmode,
37020 gen_rtx_MEM (DImode,
37021 gen_rtx_PRE_DEC (DImode,
37022 stack_pointer_rtx)),
37023 operand));
37024 break;
37025 default:
37026 gcc_unreachable ();
37027 }
37028 result = gen_rtx_MEM (mode, stack_pointer_rtx);
37029 }
37030 else
37031 {
37032 switch (mode)
37033 {
37034 case DImode:
37035 {
37036 rtx operands[2];
37037 split_double_mode (mode, &operand, 1, operands, operands + 1);
37038 emit_insn (
37039 gen_rtx_SET (VOIDmode,
37040 gen_rtx_MEM (SImode,
37041 gen_rtx_PRE_DEC (Pmode,
37042 stack_pointer_rtx)),
37043 operands[1]));
37044 emit_insn (
37045 gen_rtx_SET (VOIDmode,
37046 gen_rtx_MEM (SImode,
37047 gen_rtx_PRE_DEC (Pmode,
37048 stack_pointer_rtx)),
37049 operands[0]));
37050 }
37051 break;
37052 case HImode:
37053 /* Store HImodes as SImodes. */
37054 operand = gen_lowpart (SImode, operand);
37055 /* FALLTHRU */
37056 case SImode:
37057 emit_insn (
37058 gen_rtx_SET (VOIDmode,
37059 gen_rtx_MEM (GET_MODE (operand),
37060 gen_rtx_PRE_DEC (SImode,
37061 stack_pointer_rtx)),
37062 operand));
37063 break;
37064 default:
37065 gcc_unreachable ();
37066 }
37067 result = gen_rtx_MEM (mode, stack_pointer_rtx);
37068 }
37069 return result;
37070 }
37071
37072 /* Free operand from the memory. */
37073 void
37074 ix86_free_from_memory (enum machine_mode mode)
37075 {
37076 if (!ix86_using_red_zone ())
37077 {
37078 int size;
37079
37080 if (mode == DImode || TARGET_64BIT)
37081 size = 8;
37082 else
37083 size = 4;
37084 /* Use LEA to deallocate stack space. In peephole2 it will be converted
37085 to pop or add instruction if registers are available. */
37086 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
37087 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
37088 GEN_INT (size))));
37089 }
37090 }
37091
37092 /* Return a register priority for hard reg REGNO. */
37093 static int
37094 ix86_register_priority (int hard_regno)
37095 {
37096 /* ebp and r13 as the base always wants a displacement, r12 as the
37097 base always wants an index. So discourage their usage in an
37098 address. */
37099 if (hard_regno == R12_REG || hard_regno == R13_REG)
37100 return 0;
37101 if (hard_regno == BP_REG)
37102 return 1;
37103 /* New x86-64 int registers result in bigger code size. Discourage
37104 them. */
37105 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
37106 return 2;
37107 /* New x86-64 SSE registers result in bigger code size. Discourage
37108 them. */
37109 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
37110 return 2;
37111 /* Usage of AX register results in smaller code. Prefer it. */
37112 if (hard_regno == 0)
37113 return 4;
37114 return 3;
37115 }
37116
37117 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
37118
37119 Put float CONST_DOUBLE in the constant pool instead of fp regs.
37120 QImode must go into class Q_REGS.
37121 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
37122 movdf to do mem-to-mem moves through integer regs. */
37123
37124 static reg_class_t
37125 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
37126 {
37127 enum machine_mode mode = GET_MODE (x);
37128
37129 /* We're only allowed to return a subclass of CLASS. Many of the
37130 following checks fail for NO_REGS, so eliminate that early. */
37131 if (regclass == NO_REGS)
37132 return NO_REGS;
37133
37134 /* All classes can load zeros. */
37135 if (x == CONST0_RTX (mode))
37136 return regclass;
37137
37138 /* Force constants into memory if we are loading a (nonzero) constant into
37139 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
37140 instructions to load from a constant. */
37141 if (CONSTANT_P (x)
37142 && (MAYBE_MMX_CLASS_P (regclass)
37143 || MAYBE_SSE_CLASS_P (regclass)
37144 || MAYBE_MASK_CLASS_P (regclass)))
37145 return NO_REGS;
37146
37147 /* Prefer SSE regs only, if we can use them for math. */
37148 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
37149 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
37150
37151 /* Floating-point constants need more complex checks. */
37152 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
37153 {
37154 /* General regs can load everything. */
37155 if (reg_class_subset_p (regclass, GENERAL_REGS))
37156 return regclass;
37157
37158 /* Floats can load 0 and 1 plus some others. Note that we eliminated
37159 zero above. We only want to wind up preferring 80387 registers if
37160 we plan on doing computation with them. */
37161 if (TARGET_80387
37162 && standard_80387_constant_p (x) > 0)
37163 {
37164 /* Limit class to non-sse. */
37165 if (regclass == FLOAT_SSE_REGS)
37166 return FLOAT_REGS;
37167 if (regclass == FP_TOP_SSE_REGS)
37168 return FP_TOP_REG;
37169 if (regclass == FP_SECOND_SSE_REGS)
37170 return FP_SECOND_REG;
37171 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
37172 return regclass;
37173 }
37174
37175 return NO_REGS;
37176 }
37177
37178 /* Generally when we see PLUS here, it's the function invariant
37179 (plus soft-fp const_int). Which can only be computed into general
37180 regs. */
37181 if (GET_CODE (x) == PLUS)
37182 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
37183
37184 /* QImode constants are easy to load, but non-constant QImode data
37185 must go into Q_REGS. */
37186 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
37187 {
37188 if (reg_class_subset_p (regclass, Q_REGS))
37189 return regclass;
37190 if (reg_class_subset_p (Q_REGS, regclass))
37191 return Q_REGS;
37192 return NO_REGS;
37193 }
37194
37195 return regclass;
37196 }
37197
37198 /* Discourage putting floating-point values in SSE registers unless
37199 SSE math is being used, and likewise for the 387 registers. */
37200 static reg_class_t
37201 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
37202 {
37203 enum machine_mode mode = GET_MODE (x);
37204
37205 /* Restrict the output reload class to the register bank that we are doing
37206 math on. If we would like not to return a subset of CLASS, reject this
37207 alternative: if reload cannot do this, it will still use its choice. */
37208 mode = GET_MODE (x);
37209 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
37210 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
37211
37212 if (X87_FLOAT_MODE_P (mode))
37213 {
37214 if (regclass == FP_TOP_SSE_REGS)
37215 return FP_TOP_REG;
37216 else if (regclass == FP_SECOND_SSE_REGS)
37217 return FP_SECOND_REG;
37218 else
37219 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
37220 }
37221
37222 return regclass;
37223 }
37224
37225 static reg_class_t
37226 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
37227 enum machine_mode mode, secondary_reload_info *sri)
37228 {
37229 /* Double-word spills from general registers to non-offsettable memory
37230 references (zero-extended addresses) require special handling. */
37231 if (TARGET_64BIT
37232 && MEM_P (x)
37233 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
37234 && INTEGER_CLASS_P (rclass)
37235 && !offsettable_memref_p (x))
37236 {
37237 sri->icode = (in_p
37238 ? CODE_FOR_reload_noff_load
37239 : CODE_FOR_reload_noff_store);
37240 /* Add the cost of moving address to a temporary. */
37241 sri->extra_cost = 1;
37242
37243 return NO_REGS;
37244 }
37245
37246 /* QImode spills from non-QI registers require
37247 intermediate register on 32bit targets. */
37248 if (mode == QImode
37249 && (MAYBE_MASK_CLASS_P (rclass)
37250 || (!TARGET_64BIT && !in_p
37251 && INTEGER_CLASS_P (rclass)
37252 && MAYBE_NON_Q_CLASS_P (rclass))))
37253 {
37254 int regno;
37255
37256 if (REG_P (x))
37257 regno = REGNO (x);
37258 else
37259 regno = -1;
37260
37261 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
37262 regno = true_regnum (x);
37263
37264 /* Return Q_REGS if the operand is in memory. */
37265 if (regno == -1)
37266 return Q_REGS;
37267 }
37268
37269 /* This condition handles corner case where an expression involving
37270 pointers gets vectorized. We're trying to use the address of a
37271 stack slot as a vector initializer.
37272
37273 (set (reg:V2DI 74 [ vect_cst_.2 ])
37274 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
37275
37276 Eventually frame gets turned into sp+offset like this:
37277
37278 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37279 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37280 (const_int 392 [0x188]))))
37281
37282 That later gets turned into:
37283
37284 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37285 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37286 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
37287
37288 We'll have the following reload recorded:
37289
37290 Reload 0: reload_in (DI) =
37291 (plus:DI (reg/f:DI 7 sp)
37292 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
37293 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37294 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
37295 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
37296 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37297 reload_reg_rtx: (reg:V2DI 22 xmm1)
37298
37299 Which isn't going to work since SSE instructions can't handle scalar
37300 additions. Returning GENERAL_REGS forces the addition into integer
37301 register and reload can handle subsequent reloads without problems. */
37302
37303 if (in_p && GET_CODE (x) == PLUS
37304 && SSE_CLASS_P (rclass)
37305 && SCALAR_INT_MODE_P (mode))
37306 return GENERAL_REGS;
37307
37308 return NO_REGS;
37309 }
37310
37311 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
37312
37313 static bool
37314 ix86_class_likely_spilled_p (reg_class_t rclass)
37315 {
37316 switch (rclass)
37317 {
37318 case AREG:
37319 case DREG:
37320 case CREG:
37321 case BREG:
37322 case AD_REGS:
37323 case SIREG:
37324 case DIREG:
37325 case SSE_FIRST_REG:
37326 case FP_TOP_REG:
37327 case FP_SECOND_REG:
37328 return true;
37329
37330 default:
37331 break;
37332 }
37333
37334 return false;
37335 }
37336
37337 /* If we are copying between general and FP registers, we need a memory
37338 location. The same is true for SSE and MMX registers.
37339
37340 To optimize register_move_cost performance, allow inline variant.
37341
37342 The macro can't work reliably when one of the CLASSES is class containing
37343 registers from multiple units (SSE, MMX, integer). We avoid this by never
37344 combining those units in single alternative in the machine description.
37345 Ensure that this constraint holds to avoid unexpected surprises.
37346
37347 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
37348 enforce these sanity checks. */
37349
37350 static inline bool
37351 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37352 enum machine_mode mode, int strict)
37353 {
37354 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
37355 return false;
37356 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
37357 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
37358 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
37359 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
37360 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
37361 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
37362 {
37363 gcc_assert (!strict || lra_in_progress);
37364 return true;
37365 }
37366
37367 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
37368 return true;
37369
37370 /* ??? This is a lie. We do have moves between mmx/general, and for
37371 mmx/sse2. But by saying we need secondary memory we discourage the
37372 register allocator from using the mmx registers unless needed. */
37373 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
37374 return true;
37375
37376 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37377 {
37378 /* SSE1 doesn't have any direct moves from other classes. */
37379 if (!TARGET_SSE2)
37380 return true;
37381
37382 /* If the target says that inter-unit moves are more expensive
37383 than moving through memory, then don't generate them. */
37384 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
37385 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
37386 return true;
37387
37388 /* Between SSE and general, we have moves no larger than word size. */
37389 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
37390 return true;
37391 }
37392
37393 return false;
37394 }
37395
37396 bool
37397 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37398 enum machine_mode mode, int strict)
37399 {
37400 return inline_secondary_memory_needed (class1, class2, mode, strict);
37401 }
37402
37403 /* Implement the TARGET_CLASS_MAX_NREGS hook.
37404
37405 On the 80386, this is the size of MODE in words,
37406 except in the FP regs, where a single reg is always enough. */
37407
37408 static unsigned char
37409 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
37410 {
37411 if (MAYBE_INTEGER_CLASS_P (rclass))
37412 {
37413 if (mode == XFmode)
37414 return (TARGET_64BIT ? 2 : 3);
37415 else if (mode == XCmode)
37416 return (TARGET_64BIT ? 4 : 6);
37417 else
37418 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
37419 }
37420 else
37421 {
37422 if (COMPLEX_MODE_P (mode))
37423 return 2;
37424 else
37425 return 1;
37426 }
37427 }
37428
37429 /* Return true if the registers in CLASS cannot represent the change from
37430 modes FROM to TO. */
37431
37432 bool
37433 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
37434 enum reg_class regclass)
37435 {
37436 if (from == to)
37437 return false;
37438
37439 /* x87 registers can't do subreg at all, as all values are reformatted
37440 to extended precision. */
37441 if (MAYBE_FLOAT_CLASS_P (regclass))
37442 return true;
37443
37444 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
37445 {
37446 /* Vector registers do not support QI or HImode loads. If we don't
37447 disallow a change to these modes, reload will assume it's ok to
37448 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
37449 the vec_dupv4hi pattern. */
37450 if (GET_MODE_SIZE (from) < 4)
37451 return true;
37452
37453 /* Vector registers do not support subreg with nonzero offsets, which
37454 are otherwise valid for integer registers. Since we can't see
37455 whether we have a nonzero offset from here, prohibit all
37456 nonparadoxical subregs changing size. */
37457 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
37458 return true;
37459 }
37460
37461 return false;
37462 }
37463
37464 /* Return the cost of moving data of mode M between a
37465 register and memory. A value of 2 is the default; this cost is
37466 relative to those in `REGISTER_MOVE_COST'.
37467
37468 This function is used extensively by register_move_cost that is used to
37469 build tables at startup. Make it inline in this case.
37470 When IN is 2, return maximum of in and out move cost.
37471
37472 If moving between registers and memory is more expensive than
37473 between two registers, you should define this macro to express the
37474 relative cost.
37475
37476 Model also increased moving costs of QImode registers in non
37477 Q_REGS classes.
37478 */
37479 static inline int
37480 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
37481 int in)
37482 {
37483 int cost;
37484 if (FLOAT_CLASS_P (regclass))
37485 {
37486 int index;
37487 switch (mode)
37488 {
37489 case SFmode:
37490 index = 0;
37491 break;
37492 case DFmode:
37493 index = 1;
37494 break;
37495 case XFmode:
37496 index = 2;
37497 break;
37498 default:
37499 return 100;
37500 }
37501 if (in == 2)
37502 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
37503 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
37504 }
37505 if (SSE_CLASS_P (regclass))
37506 {
37507 int index;
37508 switch (GET_MODE_SIZE (mode))
37509 {
37510 case 4:
37511 index = 0;
37512 break;
37513 case 8:
37514 index = 1;
37515 break;
37516 case 16:
37517 index = 2;
37518 break;
37519 default:
37520 return 100;
37521 }
37522 if (in == 2)
37523 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
37524 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
37525 }
37526 if (MMX_CLASS_P (regclass))
37527 {
37528 int index;
37529 switch (GET_MODE_SIZE (mode))
37530 {
37531 case 4:
37532 index = 0;
37533 break;
37534 case 8:
37535 index = 1;
37536 break;
37537 default:
37538 return 100;
37539 }
37540 if (in)
37541 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
37542 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
37543 }
37544 switch (GET_MODE_SIZE (mode))
37545 {
37546 case 1:
37547 if (Q_CLASS_P (regclass) || TARGET_64BIT)
37548 {
37549 if (!in)
37550 return ix86_cost->int_store[0];
37551 if (TARGET_PARTIAL_REG_DEPENDENCY
37552 && optimize_function_for_speed_p (cfun))
37553 cost = ix86_cost->movzbl_load;
37554 else
37555 cost = ix86_cost->int_load[0];
37556 if (in == 2)
37557 return MAX (cost, ix86_cost->int_store[0]);
37558 return cost;
37559 }
37560 else
37561 {
37562 if (in == 2)
37563 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
37564 if (in)
37565 return ix86_cost->movzbl_load;
37566 else
37567 return ix86_cost->int_store[0] + 4;
37568 }
37569 break;
37570 case 2:
37571 if (in == 2)
37572 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
37573 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
37574 default:
37575 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
37576 if (mode == TFmode)
37577 mode = XFmode;
37578 if (in == 2)
37579 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
37580 else if (in)
37581 cost = ix86_cost->int_load[2];
37582 else
37583 cost = ix86_cost->int_store[2];
37584 return (cost * (((int) GET_MODE_SIZE (mode)
37585 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
37586 }
37587 }
37588
37589 static int
37590 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
37591 bool in)
37592 {
37593 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
37594 }
37595
37596
37597 /* Return the cost of moving data from a register in class CLASS1 to
37598 one in class CLASS2.
37599
37600 It is not required that the cost always equal 2 when FROM is the same as TO;
37601 on some machines it is expensive to move between registers if they are not
37602 general registers. */
37603
37604 static int
37605 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
37606 reg_class_t class2_i)
37607 {
37608 enum reg_class class1 = (enum reg_class) class1_i;
37609 enum reg_class class2 = (enum reg_class) class2_i;
37610
37611 /* In case we require secondary memory, compute cost of the store followed
37612 by load. In order to avoid bad register allocation choices, we need
37613 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
37614
37615 if (inline_secondary_memory_needed (class1, class2, mode, 0))
37616 {
37617 int cost = 1;
37618
37619 cost += inline_memory_move_cost (mode, class1, 2);
37620 cost += inline_memory_move_cost (mode, class2, 2);
37621
37622 /* In case of copying from general_purpose_register we may emit multiple
37623 stores followed by single load causing memory size mismatch stall.
37624 Count this as arbitrarily high cost of 20. */
37625 if (targetm.class_max_nregs (class1, mode)
37626 > targetm.class_max_nregs (class2, mode))
37627 cost += 20;
37628
37629 /* In the case of FP/MMX moves, the registers actually overlap, and we
37630 have to switch modes in order to treat them differently. */
37631 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
37632 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
37633 cost += 20;
37634
37635 return cost;
37636 }
37637
37638 /* Moves between SSE/MMX and integer unit are expensive. */
37639 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
37640 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37641
37642 /* ??? By keeping returned value relatively high, we limit the number
37643 of moves between integer and MMX/SSE registers for all targets.
37644 Additionally, high value prevents problem with x86_modes_tieable_p(),
37645 where integer modes in MMX/SSE registers are not tieable
37646 because of missing QImode and HImode moves to, from or between
37647 MMX/SSE registers. */
37648 return MAX (8, ix86_cost->mmxsse_to_integer);
37649
37650 if (MAYBE_FLOAT_CLASS_P (class1))
37651 return ix86_cost->fp_move;
37652 if (MAYBE_SSE_CLASS_P (class1))
37653 return ix86_cost->sse_move;
37654 if (MAYBE_MMX_CLASS_P (class1))
37655 return ix86_cost->mmx_move;
37656 return 2;
37657 }
37658
37659 /* Return TRUE if hard register REGNO can hold a value of machine-mode
37660 MODE. */
37661
37662 bool
37663 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
37664 {
37665 /* Flags and only flags can only hold CCmode values. */
37666 if (CC_REGNO_P (regno))
37667 return GET_MODE_CLASS (mode) == MODE_CC;
37668 if (GET_MODE_CLASS (mode) == MODE_CC
37669 || GET_MODE_CLASS (mode) == MODE_RANDOM
37670 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
37671 return false;
37672 if (STACK_REGNO_P (regno))
37673 return VALID_FP_MODE_P (mode);
37674 if (MASK_REGNO_P (regno))
37675 return VALID_MASK_REG_MODE (mode);
37676 if (SSE_REGNO_P (regno))
37677 {
37678 /* We implement the move patterns for all vector modes into and
37679 out of SSE registers, even when no operation instructions
37680 are available. */
37681
37682 /* For AVX-512 we allow, regardless of regno:
37683 - XI mode
37684 - any of 512-bit wide vector mode
37685 - any scalar mode. */
37686 if (TARGET_AVX512F
37687 && (mode == XImode
37688 || VALID_AVX512F_REG_MODE (mode)
37689 || VALID_AVX512F_SCALAR_MODE (mode)))
37690 return true;
37691
37692 /* xmm16-xmm31 are only available for AVX-512. */
37693 if (EXT_REX_SSE_REGNO_P (regno))
37694 return false;
37695
37696 /* OImode and AVX modes are available only when AVX is enabled. */
37697 return ((TARGET_AVX
37698 && VALID_AVX256_REG_OR_OI_MODE (mode))
37699 || VALID_SSE_REG_MODE (mode)
37700 || VALID_SSE2_REG_MODE (mode)
37701 || VALID_MMX_REG_MODE (mode)
37702 || VALID_MMX_REG_MODE_3DNOW (mode));
37703 }
37704 if (MMX_REGNO_P (regno))
37705 {
37706 /* We implement the move patterns for 3DNOW modes even in MMX mode,
37707 so if the register is available at all, then we can move data of
37708 the given mode into or out of it. */
37709 return (VALID_MMX_REG_MODE (mode)
37710 || VALID_MMX_REG_MODE_3DNOW (mode));
37711 }
37712
37713 if (mode == QImode)
37714 {
37715 /* Take care for QImode values - they can be in non-QI regs,
37716 but then they do cause partial register stalls. */
37717 if (ANY_QI_REGNO_P (regno))
37718 return true;
37719 if (!TARGET_PARTIAL_REG_STALL)
37720 return true;
37721 /* LRA checks if the hard register is OK for the given mode.
37722 QImode values can live in non-QI regs, so we allow all
37723 registers here. */
37724 if (lra_in_progress)
37725 return true;
37726 return !can_create_pseudo_p ();
37727 }
37728 /* We handle both integer and floats in the general purpose registers. */
37729 else if (VALID_INT_MODE_P (mode))
37730 return true;
37731 else if (VALID_FP_MODE_P (mode))
37732 return true;
37733 else if (VALID_DFP_MODE_P (mode))
37734 return true;
37735 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
37736 on to use that value in smaller contexts, this can easily force a
37737 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
37738 supporting DImode, allow it. */
37739 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
37740 return true;
37741
37742 return false;
37743 }
37744
37745 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
37746 tieable integer mode. */
37747
37748 static bool
37749 ix86_tieable_integer_mode_p (enum machine_mode mode)
37750 {
37751 switch (mode)
37752 {
37753 case HImode:
37754 case SImode:
37755 return true;
37756
37757 case QImode:
37758 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
37759
37760 case DImode:
37761 return TARGET_64BIT;
37762
37763 default:
37764 return false;
37765 }
37766 }
37767
37768 /* Return true if MODE1 is accessible in a register that can hold MODE2
37769 without copying. That is, all register classes that can hold MODE2
37770 can also hold MODE1. */
37771
37772 bool
37773 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
37774 {
37775 if (mode1 == mode2)
37776 return true;
37777
37778 if (ix86_tieable_integer_mode_p (mode1)
37779 && ix86_tieable_integer_mode_p (mode2))
37780 return true;
37781
37782 /* MODE2 being XFmode implies fp stack or general regs, which means we
37783 can tie any smaller floating point modes to it. Note that we do not
37784 tie this with TFmode. */
37785 if (mode2 == XFmode)
37786 return mode1 == SFmode || mode1 == DFmode;
37787
37788 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
37789 that we can tie it with SFmode. */
37790 if (mode2 == DFmode)
37791 return mode1 == SFmode;
37792
37793 /* If MODE2 is only appropriate for an SSE register, then tie with
37794 any other mode acceptable to SSE registers. */
37795 if (GET_MODE_SIZE (mode2) == 32
37796 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37797 return (GET_MODE_SIZE (mode1) == 32
37798 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37799 if (GET_MODE_SIZE (mode2) == 16
37800 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37801 return (GET_MODE_SIZE (mode1) == 16
37802 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37803
37804 /* If MODE2 is appropriate for an MMX register, then tie
37805 with any other mode acceptable to MMX registers. */
37806 if (GET_MODE_SIZE (mode2) == 8
37807 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
37808 return (GET_MODE_SIZE (mode1) == 8
37809 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
37810
37811 return false;
37812 }
37813
37814 /* Return the cost of moving between two registers of mode MODE. */
37815
37816 static int
37817 ix86_set_reg_reg_cost (enum machine_mode mode)
37818 {
37819 unsigned int units = UNITS_PER_WORD;
37820
37821 switch (GET_MODE_CLASS (mode))
37822 {
37823 default:
37824 break;
37825
37826 case MODE_CC:
37827 units = GET_MODE_SIZE (CCmode);
37828 break;
37829
37830 case MODE_FLOAT:
37831 if ((TARGET_SSE && mode == TFmode)
37832 || (TARGET_80387 && mode == XFmode)
37833 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
37834 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
37835 units = GET_MODE_SIZE (mode);
37836 break;
37837
37838 case MODE_COMPLEX_FLOAT:
37839 if ((TARGET_SSE && mode == TCmode)
37840 || (TARGET_80387 && mode == XCmode)
37841 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
37842 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
37843 units = GET_MODE_SIZE (mode);
37844 break;
37845
37846 case MODE_VECTOR_INT:
37847 case MODE_VECTOR_FLOAT:
37848 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
37849 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
37850 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
37851 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
37852 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
37853 units = GET_MODE_SIZE (mode);
37854 }
37855
37856 /* Return the cost of moving between two registers of mode MODE,
37857 assuming that the move will be in pieces of at most UNITS bytes. */
37858 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
37859 }
37860
37861 /* Compute a (partial) cost for rtx X. Return true if the complete
37862 cost has been computed, and false if subexpressions should be
37863 scanned. In either case, *TOTAL contains the cost result. */
37864
37865 static bool
37866 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
37867 bool speed)
37868 {
37869 rtx mask;
37870 enum rtx_code code = (enum rtx_code) code_i;
37871 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
37872 enum machine_mode mode = GET_MODE (x);
37873 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
37874
37875 switch (code)
37876 {
37877 case SET:
37878 if (register_operand (SET_DEST (x), VOIDmode)
37879 && reg_or_0_operand (SET_SRC (x), VOIDmode))
37880 {
37881 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
37882 return true;
37883 }
37884 return false;
37885
37886 case CONST_INT:
37887 case CONST:
37888 case LABEL_REF:
37889 case SYMBOL_REF:
37890 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
37891 *total = 3;
37892 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
37893 *total = 2;
37894 else if (flag_pic && SYMBOLIC_CONST (x)
37895 && (!TARGET_64BIT
37896 || (!GET_CODE (x) != LABEL_REF
37897 && (GET_CODE (x) != SYMBOL_REF
37898 || !SYMBOL_REF_LOCAL_P (x)))))
37899 *total = 1;
37900 else
37901 *total = 0;
37902 return true;
37903
37904 case CONST_DOUBLE:
37905 if (mode == VOIDmode)
37906 {
37907 *total = 0;
37908 return true;
37909 }
37910 switch (standard_80387_constant_p (x))
37911 {
37912 case 1: /* 0.0 */
37913 *total = 1;
37914 return true;
37915 default: /* Other constants */
37916 *total = 2;
37917 return true;
37918 case 0:
37919 case -1:
37920 break;
37921 }
37922 if (SSE_FLOAT_MODE_P (mode))
37923 {
37924 case CONST_VECTOR:
37925 switch (standard_sse_constant_p (x))
37926 {
37927 case 0:
37928 break;
37929 case 1: /* 0: xor eliminates false dependency */
37930 *total = 0;
37931 return true;
37932 default: /* -1: cmp contains false dependency */
37933 *total = 1;
37934 return true;
37935 }
37936 }
37937 /* Fall back to (MEM (SYMBOL_REF)), since that's where
37938 it'll probably end up. Add a penalty for size. */
37939 *total = (COSTS_N_INSNS (1)
37940 + (flag_pic != 0 && !TARGET_64BIT)
37941 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
37942 return true;
37943
37944 case ZERO_EXTEND:
37945 /* The zero extensions is often completely free on x86_64, so make
37946 it as cheap as possible. */
37947 if (TARGET_64BIT && mode == DImode
37948 && GET_MODE (XEXP (x, 0)) == SImode)
37949 *total = 1;
37950 else if (TARGET_ZERO_EXTEND_WITH_AND)
37951 *total = cost->add;
37952 else
37953 *total = cost->movzx;
37954 return false;
37955
37956 case SIGN_EXTEND:
37957 *total = cost->movsx;
37958 return false;
37959
37960 case ASHIFT:
37961 if (SCALAR_INT_MODE_P (mode)
37962 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
37963 && CONST_INT_P (XEXP (x, 1)))
37964 {
37965 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
37966 if (value == 1)
37967 {
37968 *total = cost->add;
37969 return false;
37970 }
37971 if ((value == 2 || value == 3)
37972 && cost->lea <= cost->shift_const)
37973 {
37974 *total = cost->lea;
37975 return false;
37976 }
37977 }
37978 /* FALLTHRU */
37979
37980 case ROTATE:
37981 case ASHIFTRT:
37982 case LSHIFTRT:
37983 case ROTATERT:
37984 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
37985 {
37986 /* ??? Should be SSE vector operation cost. */
37987 /* At least for published AMD latencies, this really is the same
37988 as the latency for a simple fpu operation like fabs. */
37989 /* V*QImode is emulated with 1-11 insns. */
37990 if (mode == V16QImode || mode == V32QImode)
37991 {
37992 int count = 11;
37993 if (TARGET_XOP && mode == V16QImode)
37994 {
37995 /* For XOP we use vpshab, which requires a broadcast of the
37996 value to the variable shift insn. For constants this
37997 means a V16Q const in mem; even when we can perform the
37998 shift with one insn set the cost to prefer paddb. */
37999 if (CONSTANT_P (XEXP (x, 1)))
38000 {
38001 *total = (cost->fabs
38002 + rtx_cost (XEXP (x, 0), code, 0, speed)
38003 + (speed ? 2 : COSTS_N_BYTES (16)));
38004 return true;
38005 }
38006 count = 3;
38007 }
38008 else if (TARGET_SSSE3)
38009 count = 7;
38010 *total = cost->fabs * count;
38011 }
38012 else
38013 *total = cost->fabs;
38014 }
38015 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38016 {
38017 if (CONST_INT_P (XEXP (x, 1)))
38018 {
38019 if (INTVAL (XEXP (x, 1)) > 32)
38020 *total = cost->shift_const + COSTS_N_INSNS (2);
38021 else
38022 *total = cost->shift_const * 2;
38023 }
38024 else
38025 {
38026 if (GET_CODE (XEXP (x, 1)) == AND)
38027 *total = cost->shift_var * 2;
38028 else
38029 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
38030 }
38031 }
38032 else
38033 {
38034 if (CONST_INT_P (XEXP (x, 1)))
38035 *total = cost->shift_const;
38036 else if (GET_CODE (XEXP (x, 1)) == SUBREG
38037 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
38038 {
38039 /* Return the cost after shift-and truncation. */
38040 *total = cost->shift_var;
38041 return true;
38042 }
38043 else
38044 *total = cost->shift_var;
38045 }
38046 return false;
38047
38048 case FMA:
38049 {
38050 rtx sub;
38051
38052 gcc_assert (FLOAT_MODE_P (mode));
38053 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
38054
38055 /* ??? SSE scalar/vector cost should be used here. */
38056 /* ??? Bald assumption that fma has the same cost as fmul. */
38057 *total = cost->fmul;
38058 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
38059
38060 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
38061 sub = XEXP (x, 0);
38062 if (GET_CODE (sub) == NEG)
38063 sub = XEXP (sub, 0);
38064 *total += rtx_cost (sub, FMA, 0, speed);
38065
38066 sub = XEXP (x, 2);
38067 if (GET_CODE (sub) == NEG)
38068 sub = XEXP (sub, 0);
38069 *total += rtx_cost (sub, FMA, 2, speed);
38070 return true;
38071 }
38072
38073 case MULT:
38074 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38075 {
38076 /* ??? SSE scalar cost should be used here. */
38077 *total = cost->fmul;
38078 return false;
38079 }
38080 else if (X87_FLOAT_MODE_P (mode))
38081 {
38082 *total = cost->fmul;
38083 return false;
38084 }
38085 else if (FLOAT_MODE_P (mode))
38086 {
38087 /* ??? SSE vector cost should be used here. */
38088 *total = cost->fmul;
38089 return false;
38090 }
38091 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38092 {
38093 /* V*QImode is emulated with 7-13 insns. */
38094 if (mode == V16QImode || mode == V32QImode)
38095 {
38096 int extra = 11;
38097 if (TARGET_XOP && mode == V16QImode)
38098 extra = 5;
38099 else if (TARGET_SSSE3)
38100 extra = 6;
38101 *total = cost->fmul * 2 + cost->fabs * extra;
38102 }
38103 /* V*DImode is emulated with 5-8 insns. */
38104 else if (mode == V2DImode || mode == V4DImode)
38105 {
38106 if (TARGET_XOP && mode == V2DImode)
38107 *total = cost->fmul * 2 + cost->fabs * 3;
38108 else
38109 *total = cost->fmul * 3 + cost->fabs * 5;
38110 }
38111 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
38112 insns, including two PMULUDQ. */
38113 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
38114 *total = cost->fmul * 2 + cost->fabs * 5;
38115 else
38116 *total = cost->fmul;
38117 return false;
38118 }
38119 else
38120 {
38121 rtx op0 = XEXP (x, 0);
38122 rtx op1 = XEXP (x, 1);
38123 int nbits;
38124 if (CONST_INT_P (XEXP (x, 1)))
38125 {
38126 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
38127 for (nbits = 0; value != 0; value &= value - 1)
38128 nbits++;
38129 }
38130 else
38131 /* This is arbitrary. */
38132 nbits = 7;
38133
38134 /* Compute costs correctly for widening multiplication. */
38135 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
38136 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
38137 == GET_MODE_SIZE (mode))
38138 {
38139 int is_mulwiden = 0;
38140 enum machine_mode inner_mode = GET_MODE (op0);
38141
38142 if (GET_CODE (op0) == GET_CODE (op1))
38143 is_mulwiden = 1, op1 = XEXP (op1, 0);
38144 else if (CONST_INT_P (op1))
38145 {
38146 if (GET_CODE (op0) == SIGN_EXTEND)
38147 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
38148 == INTVAL (op1);
38149 else
38150 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
38151 }
38152
38153 if (is_mulwiden)
38154 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
38155 }
38156
38157 *total = (cost->mult_init[MODE_INDEX (mode)]
38158 + nbits * cost->mult_bit
38159 + rtx_cost (op0, outer_code, opno, speed)
38160 + rtx_cost (op1, outer_code, opno, speed));
38161
38162 return true;
38163 }
38164
38165 case DIV:
38166 case UDIV:
38167 case MOD:
38168 case UMOD:
38169 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38170 /* ??? SSE cost should be used here. */
38171 *total = cost->fdiv;
38172 else if (X87_FLOAT_MODE_P (mode))
38173 *total = cost->fdiv;
38174 else if (FLOAT_MODE_P (mode))
38175 /* ??? SSE vector cost should be used here. */
38176 *total = cost->fdiv;
38177 else
38178 *total = cost->divide[MODE_INDEX (mode)];
38179 return false;
38180
38181 case PLUS:
38182 if (GET_MODE_CLASS (mode) == MODE_INT
38183 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
38184 {
38185 if (GET_CODE (XEXP (x, 0)) == PLUS
38186 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
38187 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
38188 && CONSTANT_P (XEXP (x, 1)))
38189 {
38190 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
38191 if (val == 2 || val == 4 || val == 8)
38192 {
38193 *total = cost->lea;
38194 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38195 outer_code, opno, speed);
38196 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
38197 outer_code, opno, speed);
38198 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38199 return true;
38200 }
38201 }
38202 else if (GET_CODE (XEXP (x, 0)) == MULT
38203 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
38204 {
38205 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
38206 if (val == 2 || val == 4 || val == 8)
38207 {
38208 *total = cost->lea;
38209 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38210 outer_code, opno, speed);
38211 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38212 return true;
38213 }
38214 }
38215 else if (GET_CODE (XEXP (x, 0)) == PLUS)
38216 {
38217 *total = cost->lea;
38218 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38219 outer_code, opno, speed);
38220 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38221 outer_code, opno, speed);
38222 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38223 return true;
38224 }
38225 }
38226 /* FALLTHRU */
38227
38228 case MINUS:
38229 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38230 {
38231 /* ??? SSE cost should be used here. */
38232 *total = cost->fadd;
38233 return false;
38234 }
38235 else if (X87_FLOAT_MODE_P (mode))
38236 {
38237 *total = cost->fadd;
38238 return false;
38239 }
38240 else if (FLOAT_MODE_P (mode))
38241 {
38242 /* ??? SSE vector cost should be used here. */
38243 *total = cost->fadd;
38244 return false;
38245 }
38246 /* FALLTHRU */
38247
38248 case AND:
38249 case IOR:
38250 case XOR:
38251 if (GET_MODE_CLASS (mode) == MODE_INT
38252 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38253 {
38254 *total = (cost->add * 2
38255 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
38256 << (GET_MODE (XEXP (x, 0)) != DImode))
38257 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
38258 << (GET_MODE (XEXP (x, 1)) != DImode)));
38259 return true;
38260 }
38261 /* FALLTHRU */
38262
38263 case NEG:
38264 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38265 {
38266 /* ??? SSE cost should be used here. */
38267 *total = cost->fchs;
38268 return false;
38269 }
38270 else if (X87_FLOAT_MODE_P (mode))
38271 {
38272 *total = cost->fchs;
38273 return false;
38274 }
38275 else if (FLOAT_MODE_P (mode))
38276 {
38277 /* ??? SSE vector cost should be used here. */
38278 *total = cost->fchs;
38279 return false;
38280 }
38281 /* FALLTHRU */
38282
38283 case NOT:
38284 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38285 {
38286 /* ??? Should be SSE vector operation cost. */
38287 /* At least for published AMD latencies, this really is the same
38288 as the latency for a simple fpu operation like fabs. */
38289 *total = cost->fabs;
38290 }
38291 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38292 *total = cost->add * 2;
38293 else
38294 *total = cost->add;
38295 return false;
38296
38297 case COMPARE:
38298 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
38299 && XEXP (XEXP (x, 0), 1) == const1_rtx
38300 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
38301 && XEXP (x, 1) == const0_rtx)
38302 {
38303 /* This kind of construct is implemented using test[bwl].
38304 Treat it as if we had an AND. */
38305 *total = (cost->add
38306 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
38307 + rtx_cost (const1_rtx, outer_code, opno, speed));
38308 return true;
38309 }
38310 return false;
38311
38312 case FLOAT_EXTEND:
38313 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
38314 *total = 0;
38315 return false;
38316
38317 case ABS:
38318 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38319 /* ??? SSE cost should be used here. */
38320 *total = cost->fabs;
38321 else if (X87_FLOAT_MODE_P (mode))
38322 *total = cost->fabs;
38323 else if (FLOAT_MODE_P (mode))
38324 /* ??? SSE vector cost should be used here. */
38325 *total = cost->fabs;
38326 return false;
38327
38328 case SQRT:
38329 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38330 /* ??? SSE cost should be used here. */
38331 *total = cost->fsqrt;
38332 else if (X87_FLOAT_MODE_P (mode))
38333 *total = cost->fsqrt;
38334 else if (FLOAT_MODE_P (mode))
38335 /* ??? SSE vector cost should be used here. */
38336 *total = cost->fsqrt;
38337 return false;
38338
38339 case UNSPEC:
38340 if (XINT (x, 1) == UNSPEC_TP)
38341 *total = 0;
38342 return false;
38343
38344 case VEC_SELECT:
38345 case VEC_CONCAT:
38346 case VEC_DUPLICATE:
38347 /* ??? Assume all of these vector manipulation patterns are
38348 recognizable. In which case they all pretty much have the
38349 same cost. */
38350 *total = cost->fabs;
38351 return true;
38352 case VEC_MERGE:
38353 mask = XEXP (x, 2);
38354 /* This is masked instruction, assume the same cost,
38355 as nonmasked variant. */
38356 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
38357 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
38358 else
38359 *total = cost->fabs;
38360 return true;
38361
38362 default:
38363 return false;
38364 }
38365 }
38366
38367 #if TARGET_MACHO
38368
38369 static int current_machopic_label_num;
38370
38371 /* Given a symbol name and its associated stub, write out the
38372 definition of the stub. */
38373
38374 void
38375 machopic_output_stub (FILE *file, const char *symb, const char *stub)
38376 {
38377 unsigned int length;
38378 char *binder_name, *symbol_name, lazy_ptr_name[32];
38379 int label = ++current_machopic_label_num;
38380
38381 /* For 64-bit we shouldn't get here. */
38382 gcc_assert (!TARGET_64BIT);
38383
38384 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
38385 symb = targetm.strip_name_encoding (symb);
38386
38387 length = strlen (stub);
38388 binder_name = XALLOCAVEC (char, length + 32);
38389 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
38390
38391 length = strlen (symb);
38392 symbol_name = XALLOCAVEC (char, length + 32);
38393 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
38394
38395 sprintf (lazy_ptr_name, "L%d$lz", label);
38396
38397 if (MACHOPIC_ATT_STUB)
38398 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
38399 else if (MACHOPIC_PURE)
38400 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
38401 else
38402 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
38403
38404 fprintf (file, "%s:\n", stub);
38405 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38406
38407 if (MACHOPIC_ATT_STUB)
38408 {
38409 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
38410 }
38411 else if (MACHOPIC_PURE)
38412 {
38413 /* PIC stub. */
38414 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38415 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
38416 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
38417 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
38418 label, lazy_ptr_name, label);
38419 fprintf (file, "\tjmp\t*%%ecx\n");
38420 }
38421 else
38422 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
38423
38424 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
38425 it needs no stub-binding-helper. */
38426 if (MACHOPIC_ATT_STUB)
38427 return;
38428
38429 fprintf (file, "%s:\n", binder_name);
38430
38431 if (MACHOPIC_PURE)
38432 {
38433 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
38434 fprintf (file, "\tpushl\t%%ecx\n");
38435 }
38436 else
38437 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
38438
38439 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
38440
38441 /* N.B. Keep the correspondence of these
38442 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
38443 old-pic/new-pic/non-pic stubs; altering this will break
38444 compatibility with existing dylibs. */
38445 if (MACHOPIC_PURE)
38446 {
38447 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38448 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
38449 }
38450 else
38451 /* 16-byte -mdynamic-no-pic stub. */
38452 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
38453
38454 fprintf (file, "%s:\n", lazy_ptr_name);
38455 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38456 fprintf (file, ASM_LONG "%s\n", binder_name);
38457 }
38458 #endif /* TARGET_MACHO */
38459
38460 /* Order the registers for register allocator. */
38461
38462 void
38463 x86_order_regs_for_local_alloc (void)
38464 {
38465 int pos = 0;
38466 int i;
38467
38468 /* First allocate the local general purpose registers. */
38469 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38470 if (GENERAL_REGNO_P (i) && call_used_regs[i])
38471 reg_alloc_order [pos++] = i;
38472
38473 /* Global general purpose registers. */
38474 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38475 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
38476 reg_alloc_order [pos++] = i;
38477
38478 /* x87 registers come first in case we are doing FP math
38479 using them. */
38480 if (!TARGET_SSE_MATH)
38481 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38482 reg_alloc_order [pos++] = i;
38483
38484 /* SSE registers. */
38485 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
38486 reg_alloc_order [pos++] = i;
38487 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
38488 reg_alloc_order [pos++] = i;
38489
38490 /* Extended REX SSE registers. */
38491 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
38492 reg_alloc_order [pos++] = i;
38493
38494 /* Mask register. */
38495 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
38496 reg_alloc_order [pos++] = i;
38497
38498 /* x87 registers. */
38499 if (TARGET_SSE_MATH)
38500 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38501 reg_alloc_order [pos++] = i;
38502
38503 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
38504 reg_alloc_order [pos++] = i;
38505
38506 /* Initialize the rest of array as we do not allocate some registers
38507 at all. */
38508 while (pos < FIRST_PSEUDO_REGISTER)
38509 reg_alloc_order [pos++] = 0;
38510 }
38511
38512 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
38513 in struct attribute_spec handler. */
38514 static tree
38515 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
38516 tree args,
38517 int flags ATTRIBUTE_UNUSED,
38518 bool *no_add_attrs)
38519 {
38520 if (TREE_CODE (*node) != FUNCTION_TYPE
38521 && TREE_CODE (*node) != METHOD_TYPE
38522 && TREE_CODE (*node) != FIELD_DECL
38523 && TREE_CODE (*node) != TYPE_DECL)
38524 {
38525 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38526 name);
38527 *no_add_attrs = true;
38528 return NULL_TREE;
38529 }
38530 if (TARGET_64BIT)
38531 {
38532 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
38533 name);
38534 *no_add_attrs = true;
38535 return NULL_TREE;
38536 }
38537 if (is_attribute_p ("callee_pop_aggregate_return", name))
38538 {
38539 tree cst;
38540
38541 cst = TREE_VALUE (args);
38542 if (TREE_CODE (cst) != INTEGER_CST)
38543 {
38544 warning (OPT_Wattributes,
38545 "%qE attribute requires an integer constant argument",
38546 name);
38547 *no_add_attrs = true;
38548 }
38549 else if (compare_tree_int (cst, 0) != 0
38550 && compare_tree_int (cst, 1) != 0)
38551 {
38552 warning (OPT_Wattributes,
38553 "argument to %qE attribute is neither zero, nor one",
38554 name);
38555 *no_add_attrs = true;
38556 }
38557
38558 return NULL_TREE;
38559 }
38560
38561 return NULL_TREE;
38562 }
38563
38564 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
38565 struct attribute_spec.handler. */
38566 static tree
38567 ix86_handle_abi_attribute (tree *node, tree name,
38568 tree args ATTRIBUTE_UNUSED,
38569 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38570 {
38571 if (TREE_CODE (*node) != FUNCTION_TYPE
38572 && TREE_CODE (*node) != METHOD_TYPE
38573 && TREE_CODE (*node) != FIELD_DECL
38574 && TREE_CODE (*node) != TYPE_DECL)
38575 {
38576 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38577 name);
38578 *no_add_attrs = true;
38579 return NULL_TREE;
38580 }
38581
38582 /* Can combine regparm with all attributes but fastcall. */
38583 if (is_attribute_p ("ms_abi", name))
38584 {
38585 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
38586 {
38587 error ("ms_abi and sysv_abi attributes are not compatible");
38588 }
38589
38590 return NULL_TREE;
38591 }
38592 else if (is_attribute_p ("sysv_abi", name))
38593 {
38594 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
38595 {
38596 error ("ms_abi and sysv_abi attributes are not compatible");
38597 }
38598
38599 return NULL_TREE;
38600 }
38601
38602 return NULL_TREE;
38603 }
38604
38605 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
38606 struct attribute_spec.handler. */
38607 static tree
38608 ix86_handle_struct_attribute (tree *node, tree name,
38609 tree args ATTRIBUTE_UNUSED,
38610 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38611 {
38612 tree *type = NULL;
38613 if (DECL_P (*node))
38614 {
38615 if (TREE_CODE (*node) == TYPE_DECL)
38616 type = &TREE_TYPE (*node);
38617 }
38618 else
38619 type = node;
38620
38621 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
38622 {
38623 warning (OPT_Wattributes, "%qE attribute ignored",
38624 name);
38625 *no_add_attrs = true;
38626 }
38627
38628 else if ((is_attribute_p ("ms_struct", name)
38629 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
38630 || ((is_attribute_p ("gcc_struct", name)
38631 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
38632 {
38633 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
38634 name);
38635 *no_add_attrs = true;
38636 }
38637
38638 return NULL_TREE;
38639 }
38640
38641 static tree
38642 ix86_handle_fndecl_attribute (tree *node, tree name,
38643 tree args ATTRIBUTE_UNUSED,
38644 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38645 {
38646 if (TREE_CODE (*node) != FUNCTION_DECL)
38647 {
38648 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38649 name);
38650 *no_add_attrs = true;
38651 }
38652 return NULL_TREE;
38653 }
38654
38655 static bool
38656 ix86_ms_bitfield_layout_p (const_tree record_type)
38657 {
38658 return ((TARGET_MS_BITFIELD_LAYOUT
38659 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
38660 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
38661 }
38662
38663 /* Returns an expression indicating where the this parameter is
38664 located on entry to the FUNCTION. */
38665
38666 static rtx
38667 x86_this_parameter (tree function)
38668 {
38669 tree type = TREE_TYPE (function);
38670 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
38671 int nregs;
38672
38673 if (TARGET_64BIT)
38674 {
38675 const int *parm_regs;
38676
38677 if (ix86_function_type_abi (type) == MS_ABI)
38678 parm_regs = x86_64_ms_abi_int_parameter_registers;
38679 else
38680 parm_regs = x86_64_int_parameter_registers;
38681 return gen_rtx_REG (Pmode, parm_regs[aggr]);
38682 }
38683
38684 nregs = ix86_function_regparm (type, function);
38685
38686 if (nregs > 0 && !stdarg_p (type))
38687 {
38688 int regno;
38689 unsigned int ccvt = ix86_get_callcvt (type);
38690
38691 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38692 regno = aggr ? DX_REG : CX_REG;
38693 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38694 {
38695 regno = CX_REG;
38696 if (aggr)
38697 return gen_rtx_MEM (SImode,
38698 plus_constant (Pmode, stack_pointer_rtx, 4));
38699 }
38700 else
38701 {
38702 regno = AX_REG;
38703 if (aggr)
38704 {
38705 regno = DX_REG;
38706 if (nregs == 1)
38707 return gen_rtx_MEM (SImode,
38708 plus_constant (Pmode,
38709 stack_pointer_rtx, 4));
38710 }
38711 }
38712 return gen_rtx_REG (SImode, regno);
38713 }
38714
38715 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
38716 aggr ? 8 : 4));
38717 }
38718
38719 /* Determine whether x86_output_mi_thunk can succeed. */
38720
38721 static bool
38722 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
38723 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
38724 HOST_WIDE_INT vcall_offset, const_tree function)
38725 {
38726 /* 64-bit can handle anything. */
38727 if (TARGET_64BIT)
38728 return true;
38729
38730 /* For 32-bit, everything's fine if we have one free register. */
38731 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
38732 return true;
38733
38734 /* Need a free register for vcall_offset. */
38735 if (vcall_offset)
38736 return false;
38737
38738 /* Need a free register for GOT references. */
38739 if (flag_pic && !targetm.binds_local_p (function))
38740 return false;
38741
38742 /* Otherwise ok. */
38743 return true;
38744 }
38745
38746 /* Output the assembler code for a thunk function. THUNK_DECL is the
38747 declaration for the thunk function itself, FUNCTION is the decl for
38748 the target function. DELTA is an immediate constant offset to be
38749 added to THIS. If VCALL_OFFSET is nonzero, the word at
38750 *(*this + vcall_offset) should be added to THIS. */
38751
38752 static void
38753 x86_output_mi_thunk (FILE *file,
38754 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
38755 HOST_WIDE_INT vcall_offset, tree function)
38756 {
38757 rtx this_param = x86_this_parameter (function);
38758 rtx this_reg, tmp, fnaddr;
38759 unsigned int tmp_regno;
38760
38761 if (TARGET_64BIT)
38762 tmp_regno = R10_REG;
38763 else
38764 {
38765 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
38766 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38767 tmp_regno = AX_REG;
38768 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38769 tmp_regno = DX_REG;
38770 else
38771 tmp_regno = CX_REG;
38772 }
38773
38774 emit_note (NOTE_INSN_PROLOGUE_END);
38775
38776 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
38777 pull it in now and let DELTA benefit. */
38778 if (REG_P (this_param))
38779 this_reg = this_param;
38780 else if (vcall_offset)
38781 {
38782 /* Put the this parameter into %eax. */
38783 this_reg = gen_rtx_REG (Pmode, AX_REG);
38784 emit_move_insn (this_reg, this_param);
38785 }
38786 else
38787 this_reg = NULL_RTX;
38788
38789 /* Adjust the this parameter by a fixed constant. */
38790 if (delta)
38791 {
38792 rtx delta_rtx = GEN_INT (delta);
38793 rtx delta_dst = this_reg ? this_reg : this_param;
38794
38795 if (TARGET_64BIT)
38796 {
38797 if (!x86_64_general_operand (delta_rtx, Pmode))
38798 {
38799 tmp = gen_rtx_REG (Pmode, tmp_regno);
38800 emit_move_insn (tmp, delta_rtx);
38801 delta_rtx = tmp;
38802 }
38803 }
38804
38805 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
38806 }
38807
38808 /* Adjust the this parameter by a value stored in the vtable. */
38809 if (vcall_offset)
38810 {
38811 rtx vcall_addr, vcall_mem, this_mem;
38812
38813 tmp = gen_rtx_REG (Pmode, tmp_regno);
38814
38815 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
38816 if (Pmode != ptr_mode)
38817 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
38818 emit_move_insn (tmp, this_mem);
38819
38820 /* Adjust the this parameter. */
38821 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
38822 if (TARGET_64BIT
38823 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
38824 {
38825 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
38826 emit_move_insn (tmp2, GEN_INT (vcall_offset));
38827 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
38828 }
38829
38830 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
38831 if (Pmode != ptr_mode)
38832 emit_insn (gen_addsi_1_zext (this_reg,
38833 gen_rtx_REG (ptr_mode,
38834 REGNO (this_reg)),
38835 vcall_mem));
38836 else
38837 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
38838 }
38839
38840 /* If necessary, drop THIS back to its stack slot. */
38841 if (this_reg && this_reg != this_param)
38842 emit_move_insn (this_param, this_reg);
38843
38844 fnaddr = XEXP (DECL_RTL (function), 0);
38845 if (TARGET_64BIT)
38846 {
38847 if (!flag_pic || targetm.binds_local_p (function)
38848 || TARGET_PECOFF)
38849 ;
38850 else
38851 {
38852 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
38853 tmp = gen_rtx_CONST (Pmode, tmp);
38854 fnaddr = gen_rtx_MEM (Pmode, tmp);
38855 }
38856 }
38857 else
38858 {
38859 if (!flag_pic || targetm.binds_local_p (function))
38860 ;
38861 #if TARGET_MACHO
38862 else if (TARGET_MACHO)
38863 {
38864 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
38865 fnaddr = XEXP (fnaddr, 0);
38866 }
38867 #endif /* TARGET_MACHO */
38868 else
38869 {
38870 tmp = gen_rtx_REG (Pmode, CX_REG);
38871 output_set_got (tmp, NULL_RTX);
38872
38873 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
38874 fnaddr = gen_rtx_PLUS (Pmode, fnaddr, tmp);
38875 fnaddr = gen_rtx_MEM (Pmode, fnaddr);
38876 }
38877 }
38878
38879 /* Our sibling call patterns do not allow memories, because we have no
38880 predicate that can distinguish between frame and non-frame memory.
38881 For our purposes here, we can get away with (ab)using a jump pattern,
38882 because we're going to do no optimization. */
38883 if (MEM_P (fnaddr))
38884 emit_jump_insn (gen_indirect_jump (fnaddr));
38885 else
38886 {
38887 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
38888 fnaddr = legitimize_pic_address (fnaddr,
38889 gen_rtx_REG (Pmode, tmp_regno));
38890
38891 if (!sibcall_insn_operand (fnaddr, word_mode))
38892 {
38893 tmp = gen_rtx_REG (word_mode, tmp_regno);
38894 if (GET_MODE (fnaddr) != word_mode)
38895 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
38896 emit_move_insn (tmp, fnaddr);
38897 fnaddr = tmp;
38898 }
38899
38900 tmp = gen_rtx_MEM (QImode, fnaddr);
38901 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
38902 tmp = emit_call_insn (tmp);
38903 SIBLING_CALL_P (tmp) = 1;
38904 }
38905 emit_barrier ();
38906
38907 /* Emit just enough of rest_of_compilation to get the insns emitted.
38908 Note that use_thunk calls assemble_start_function et al. */
38909 tmp = get_insns ();
38910 shorten_branches (tmp);
38911 final_start_function (tmp, file, 1);
38912 final (tmp, file, 1);
38913 final_end_function ();
38914 }
38915
38916 static void
38917 x86_file_start (void)
38918 {
38919 default_file_start ();
38920 if (TARGET_16BIT)
38921 fputs ("\t.code16gcc\n", asm_out_file);
38922 #if TARGET_MACHO
38923 darwin_file_start ();
38924 #endif
38925 if (X86_FILE_START_VERSION_DIRECTIVE)
38926 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
38927 if (X86_FILE_START_FLTUSED)
38928 fputs ("\t.global\t__fltused\n", asm_out_file);
38929 if (ix86_asm_dialect == ASM_INTEL)
38930 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
38931 }
38932
38933 int
38934 x86_field_alignment (tree field, int computed)
38935 {
38936 enum machine_mode mode;
38937 tree type = TREE_TYPE (field);
38938
38939 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
38940 return computed;
38941 mode = TYPE_MODE (strip_array_types (type));
38942 if (mode == DFmode || mode == DCmode
38943 || GET_MODE_CLASS (mode) == MODE_INT
38944 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
38945 return MIN (32, computed);
38946 return computed;
38947 }
38948
38949 /* Output assembler code to FILE to increment profiler label # LABELNO
38950 for profiling a function entry. */
38951 void
38952 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
38953 {
38954 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
38955 : MCOUNT_NAME);
38956
38957 if (TARGET_64BIT)
38958 {
38959 #ifndef NO_PROFILE_COUNTERS
38960 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
38961 #endif
38962
38963 if (!TARGET_PECOFF && flag_pic)
38964 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
38965 else
38966 fprintf (file, "\tcall\t%s\n", mcount_name);
38967 }
38968 else if (flag_pic)
38969 {
38970 #ifndef NO_PROFILE_COUNTERS
38971 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
38972 LPREFIX, labelno);
38973 #endif
38974 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
38975 }
38976 else
38977 {
38978 #ifndef NO_PROFILE_COUNTERS
38979 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
38980 LPREFIX, labelno);
38981 #endif
38982 fprintf (file, "\tcall\t%s\n", mcount_name);
38983 }
38984 }
38985
38986 /* We don't have exact information about the insn sizes, but we may assume
38987 quite safely that we are informed about all 1 byte insns and memory
38988 address sizes. This is enough to eliminate unnecessary padding in
38989 99% of cases. */
38990
38991 static int
38992 min_insn_size (rtx insn)
38993 {
38994 int l = 0, len;
38995
38996 if (!INSN_P (insn) || !active_insn_p (insn))
38997 return 0;
38998
38999 /* Discard alignments we've emit and jump instructions. */
39000 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
39001 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
39002 return 0;
39003
39004 /* Important case - calls are always 5 bytes.
39005 It is common to have many calls in the row. */
39006 if (CALL_P (insn)
39007 && symbolic_reference_mentioned_p (PATTERN (insn))
39008 && !SIBLING_CALL_P (insn))
39009 return 5;
39010 len = get_attr_length (insn);
39011 if (len <= 1)
39012 return 1;
39013
39014 /* For normal instructions we rely on get_attr_length being exact,
39015 with a few exceptions. */
39016 if (!JUMP_P (insn))
39017 {
39018 enum attr_type type = get_attr_type (insn);
39019
39020 switch (type)
39021 {
39022 case TYPE_MULTI:
39023 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
39024 || asm_noperands (PATTERN (insn)) >= 0)
39025 return 0;
39026 break;
39027 case TYPE_OTHER:
39028 case TYPE_FCMP:
39029 break;
39030 default:
39031 /* Otherwise trust get_attr_length. */
39032 return len;
39033 }
39034
39035 l = get_attr_length_address (insn);
39036 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
39037 l = 4;
39038 }
39039 if (l)
39040 return 1+l;
39041 else
39042 return 2;
39043 }
39044
39045 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39046
39047 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
39048 window. */
39049
39050 static void
39051 ix86_avoid_jump_mispredicts (void)
39052 {
39053 rtx insn, start = get_insns ();
39054 int nbytes = 0, njumps = 0;
39055 int isjump = 0;
39056
39057 /* Look for all minimal intervals of instructions containing 4 jumps.
39058 The intervals are bounded by START and INSN. NBYTES is the total
39059 size of instructions in the interval including INSN and not including
39060 START. When the NBYTES is smaller than 16 bytes, it is possible
39061 that the end of START and INSN ends up in the same 16byte page.
39062
39063 The smallest offset in the page INSN can start is the case where START
39064 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
39065 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
39066
39067 Don't consider asm goto as jump, while it can contain a jump, it doesn't
39068 have to, control transfer to label(s) can be performed through other
39069 means, and also we estimate minimum length of all asm stmts as 0. */
39070 for (insn = start; insn; insn = NEXT_INSN (insn))
39071 {
39072 int min_size;
39073
39074 if (LABEL_P (insn))
39075 {
39076 int align = label_to_alignment (insn);
39077 int max_skip = label_to_max_skip (insn);
39078
39079 if (max_skip > 15)
39080 max_skip = 15;
39081 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
39082 already in the current 16 byte page, because otherwise
39083 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
39084 bytes to reach 16 byte boundary. */
39085 if (align <= 0
39086 || (align <= 3 && max_skip != (1 << align) - 1))
39087 max_skip = 0;
39088 if (dump_file)
39089 fprintf (dump_file, "Label %i with max_skip %i\n",
39090 INSN_UID (insn), max_skip);
39091 if (max_skip)
39092 {
39093 while (nbytes + max_skip >= 16)
39094 {
39095 start = NEXT_INSN (start);
39096 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39097 || CALL_P (start))
39098 njumps--, isjump = 1;
39099 else
39100 isjump = 0;
39101 nbytes -= min_insn_size (start);
39102 }
39103 }
39104 continue;
39105 }
39106
39107 min_size = min_insn_size (insn);
39108 nbytes += min_size;
39109 if (dump_file)
39110 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
39111 INSN_UID (insn), min_size);
39112 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
39113 || CALL_P (insn))
39114 njumps++;
39115 else
39116 continue;
39117
39118 while (njumps > 3)
39119 {
39120 start = NEXT_INSN (start);
39121 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39122 || CALL_P (start))
39123 njumps--, isjump = 1;
39124 else
39125 isjump = 0;
39126 nbytes -= min_insn_size (start);
39127 }
39128 gcc_assert (njumps >= 0);
39129 if (dump_file)
39130 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
39131 INSN_UID (start), INSN_UID (insn), nbytes);
39132
39133 if (njumps == 3 && isjump && nbytes < 16)
39134 {
39135 int padsize = 15 - nbytes + min_insn_size (insn);
39136
39137 if (dump_file)
39138 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
39139 INSN_UID (insn), padsize);
39140 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
39141 }
39142 }
39143 }
39144 #endif
39145
39146 /* AMD Athlon works faster
39147 when RET is not destination of conditional jump or directly preceded
39148 by other jump instruction. We avoid the penalty by inserting NOP just
39149 before the RET instructions in such cases. */
39150 static void
39151 ix86_pad_returns (void)
39152 {
39153 edge e;
39154 edge_iterator ei;
39155
39156 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39157 {
39158 basic_block bb = e->src;
39159 rtx ret = BB_END (bb);
39160 rtx prev;
39161 bool replace = false;
39162
39163 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
39164 || optimize_bb_for_size_p (bb))
39165 continue;
39166 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
39167 if (active_insn_p (prev) || LABEL_P (prev))
39168 break;
39169 if (prev && LABEL_P (prev))
39170 {
39171 edge e;
39172 edge_iterator ei;
39173
39174 FOR_EACH_EDGE (e, ei, bb->preds)
39175 if (EDGE_FREQUENCY (e) && e->src->index >= 0
39176 && !(e->flags & EDGE_FALLTHRU))
39177 {
39178 replace = true;
39179 break;
39180 }
39181 }
39182 if (!replace)
39183 {
39184 prev = prev_active_insn (ret);
39185 if (prev
39186 && ((JUMP_P (prev) && any_condjump_p (prev))
39187 || CALL_P (prev)))
39188 replace = true;
39189 /* Empty functions get branch mispredict even when
39190 the jump destination is not visible to us. */
39191 if (!prev && !optimize_function_for_size_p (cfun))
39192 replace = true;
39193 }
39194 if (replace)
39195 {
39196 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
39197 delete_insn (ret);
39198 }
39199 }
39200 }
39201
39202 /* Count the minimum number of instructions in BB. Return 4 if the
39203 number of instructions >= 4. */
39204
39205 static int
39206 ix86_count_insn_bb (basic_block bb)
39207 {
39208 rtx insn;
39209 int insn_count = 0;
39210
39211 /* Count number of instructions in this block. Return 4 if the number
39212 of instructions >= 4. */
39213 FOR_BB_INSNS (bb, insn)
39214 {
39215 /* Only happen in exit blocks. */
39216 if (JUMP_P (insn)
39217 && ANY_RETURN_P (PATTERN (insn)))
39218 break;
39219
39220 if (NONDEBUG_INSN_P (insn)
39221 && GET_CODE (PATTERN (insn)) != USE
39222 && GET_CODE (PATTERN (insn)) != CLOBBER)
39223 {
39224 insn_count++;
39225 if (insn_count >= 4)
39226 return insn_count;
39227 }
39228 }
39229
39230 return insn_count;
39231 }
39232
39233
39234 /* Count the minimum number of instructions in code path in BB.
39235 Return 4 if the number of instructions >= 4. */
39236
39237 static int
39238 ix86_count_insn (basic_block bb)
39239 {
39240 edge e;
39241 edge_iterator ei;
39242 int min_prev_count;
39243
39244 /* Only bother counting instructions along paths with no
39245 more than 2 basic blocks between entry and exit. Given
39246 that BB has an edge to exit, determine if a predecessor
39247 of BB has an edge from entry. If so, compute the number
39248 of instructions in the predecessor block. If there
39249 happen to be multiple such blocks, compute the minimum. */
39250 min_prev_count = 4;
39251 FOR_EACH_EDGE (e, ei, bb->preds)
39252 {
39253 edge prev_e;
39254 edge_iterator prev_ei;
39255
39256 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39257 {
39258 min_prev_count = 0;
39259 break;
39260 }
39261 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
39262 {
39263 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39264 {
39265 int count = ix86_count_insn_bb (e->src);
39266 if (count < min_prev_count)
39267 min_prev_count = count;
39268 break;
39269 }
39270 }
39271 }
39272
39273 if (min_prev_count < 4)
39274 min_prev_count += ix86_count_insn_bb (bb);
39275
39276 return min_prev_count;
39277 }
39278
39279 /* Pad short function to 4 instructions. */
39280
39281 static void
39282 ix86_pad_short_function (void)
39283 {
39284 edge e;
39285 edge_iterator ei;
39286
39287 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39288 {
39289 rtx ret = BB_END (e->src);
39290 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
39291 {
39292 int insn_count = ix86_count_insn (e->src);
39293
39294 /* Pad short function. */
39295 if (insn_count < 4)
39296 {
39297 rtx insn = ret;
39298
39299 /* Find epilogue. */
39300 while (insn
39301 && (!NOTE_P (insn)
39302 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
39303 insn = PREV_INSN (insn);
39304
39305 if (!insn)
39306 insn = ret;
39307
39308 /* Two NOPs count as one instruction. */
39309 insn_count = 2 * (4 - insn_count);
39310 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
39311 }
39312 }
39313 }
39314 }
39315
39316 /* Fix up a Windows system unwinder issue. If an EH region falls through into
39317 the epilogue, the Windows system unwinder will apply epilogue logic and
39318 produce incorrect offsets. This can be avoided by adding a nop between
39319 the last insn that can throw and the first insn of the epilogue. */
39320
39321 static void
39322 ix86_seh_fixup_eh_fallthru (void)
39323 {
39324 edge e;
39325 edge_iterator ei;
39326
39327 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39328 {
39329 rtx insn, next;
39330
39331 /* Find the beginning of the epilogue. */
39332 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
39333 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
39334 break;
39335 if (insn == NULL)
39336 continue;
39337
39338 /* We only care about preceding insns that can throw. */
39339 insn = prev_active_insn (insn);
39340 if (insn == NULL || !can_throw_internal (insn))
39341 continue;
39342
39343 /* Do not separate calls from their debug information. */
39344 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
39345 if (NOTE_P (next)
39346 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
39347 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
39348 insn = next;
39349 else
39350 break;
39351
39352 emit_insn_after (gen_nops (const1_rtx), insn);
39353 }
39354 }
39355
39356 /* Implement machine specific optimizations. We implement padding of returns
39357 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
39358 static void
39359 ix86_reorg (void)
39360 {
39361 /* We are freeing block_for_insn in the toplev to keep compatibility
39362 with old MDEP_REORGS that are not CFG based. Recompute it now. */
39363 compute_bb_for_insn ();
39364
39365 if (TARGET_SEH && current_function_has_exception_handlers ())
39366 ix86_seh_fixup_eh_fallthru ();
39367
39368 if (optimize && optimize_function_for_speed_p (cfun))
39369 {
39370 if (TARGET_PAD_SHORT_FUNCTION)
39371 ix86_pad_short_function ();
39372 else if (TARGET_PAD_RETURNS)
39373 ix86_pad_returns ();
39374 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39375 if (TARGET_FOUR_JUMP_LIMIT)
39376 ix86_avoid_jump_mispredicts ();
39377 #endif
39378 }
39379 }
39380
39381 /* Return nonzero when QImode register that must be represented via REX prefix
39382 is used. */
39383 bool
39384 x86_extended_QIreg_mentioned_p (rtx insn)
39385 {
39386 int i;
39387 extract_insn_cached (insn);
39388 for (i = 0; i < recog_data.n_operands; i++)
39389 if (GENERAL_REG_P (recog_data.operand[i])
39390 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
39391 return true;
39392 return false;
39393 }
39394
39395 /* Return nonzero when P points to register encoded via REX prefix.
39396 Called via for_each_rtx. */
39397 static int
39398 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
39399 {
39400 unsigned int regno;
39401 if (!REG_P (*p))
39402 return 0;
39403 regno = REGNO (*p);
39404 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
39405 }
39406
39407 /* Return true when INSN mentions register that must be encoded using REX
39408 prefix. */
39409 bool
39410 x86_extended_reg_mentioned_p (rtx insn)
39411 {
39412 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
39413 extended_reg_mentioned_1, NULL);
39414 }
39415
39416 /* If profitable, negate (without causing overflow) integer constant
39417 of mode MODE at location LOC. Return true in this case. */
39418 bool
39419 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
39420 {
39421 HOST_WIDE_INT val;
39422
39423 if (!CONST_INT_P (*loc))
39424 return false;
39425
39426 switch (mode)
39427 {
39428 case DImode:
39429 /* DImode x86_64 constants must fit in 32 bits. */
39430 gcc_assert (x86_64_immediate_operand (*loc, mode));
39431
39432 mode = SImode;
39433 break;
39434
39435 case SImode:
39436 case HImode:
39437 case QImode:
39438 break;
39439
39440 default:
39441 gcc_unreachable ();
39442 }
39443
39444 /* Avoid overflows. */
39445 if (mode_signbit_p (mode, *loc))
39446 return false;
39447
39448 val = INTVAL (*loc);
39449
39450 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
39451 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
39452 if ((val < 0 && val != -128)
39453 || val == 128)
39454 {
39455 *loc = GEN_INT (-val);
39456 return true;
39457 }
39458
39459 return false;
39460 }
39461
39462 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
39463 optabs would emit if we didn't have TFmode patterns. */
39464
39465 void
39466 x86_emit_floatuns (rtx operands[2])
39467 {
39468 rtx neglab, donelab, i0, i1, f0, in, out;
39469 enum machine_mode mode, inmode;
39470
39471 inmode = GET_MODE (operands[1]);
39472 gcc_assert (inmode == SImode || inmode == DImode);
39473
39474 out = operands[0];
39475 in = force_reg (inmode, operands[1]);
39476 mode = GET_MODE (out);
39477 neglab = gen_label_rtx ();
39478 donelab = gen_label_rtx ();
39479 f0 = gen_reg_rtx (mode);
39480
39481 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
39482
39483 expand_float (out, in, 0);
39484
39485 emit_jump_insn (gen_jump (donelab));
39486 emit_barrier ();
39487
39488 emit_label (neglab);
39489
39490 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
39491 1, OPTAB_DIRECT);
39492 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
39493 1, OPTAB_DIRECT);
39494 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
39495
39496 expand_float (f0, i0, 0);
39497
39498 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
39499
39500 emit_label (donelab);
39501 }
39502 \f
39503 /* AVX512F does support 64-byte integer vector operations,
39504 thus the longest vector we are faced with is V64QImode. */
39505 #define MAX_VECT_LEN 64
39506
39507 struct expand_vec_perm_d
39508 {
39509 rtx target, op0, op1;
39510 unsigned char perm[MAX_VECT_LEN];
39511 enum machine_mode vmode;
39512 unsigned char nelt;
39513 bool one_operand_p;
39514 bool testing_p;
39515 };
39516
39517 static bool canonicalize_perm (struct expand_vec_perm_d *d);
39518 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
39519 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
39520
39521 /* Get a vector mode of the same size as the original but with elements
39522 twice as wide. This is only guaranteed to apply to integral vectors. */
39523
39524 static inline enum machine_mode
39525 get_mode_wider_vector (enum machine_mode o)
39526 {
39527 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
39528 enum machine_mode n = GET_MODE_WIDER_MODE (o);
39529 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
39530 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
39531 return n;
39532 }
39533
39534 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
39535 fill target with val via vec_duplicate. */
39536
39537 static bool
39538 ix86_vector_duplicate_value (enum machine_mode mode, rtx target, rtx val)
39539 {
39540 bool ok;
39541 rtx insn, dup;
39542
39543 /* First attempt to recognize VAL as-is. */
39544 dup = gen_rtx_VEC_DUPLICATE (mode, val);
39545 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
39546 if (recog_memoized (insn) < 0)
39547 {
39548 rtx seq;
39549 /* If that fails, force VAL into a register. */
39550
39551 start_sequence ();
39552 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
39553 seq = get_insns ();
39554 end_sequence ();
39555 if (seq)
39556 emit_insn_before (seq, insn);
39557
39558 ok = recog_memoized (insn) >= 0;
39559 gcc_assert (ok);
39560 }
39561 return true;
39562 }
39563
39564 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39565 with all elements equal to VAR. Return true if successful. */
39566
39567 static bool
39568 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
39569 rtx target, rtx val)
39570 {
39571 bool ok;
39572
39573 switch (mode)
39574 {
39575 case V2SImode:
39576 case V2SFmode:
39577 if (!mmx_ok)
39578 return false;
39579 /* FALLTHRU */
39580
39581 case V4DFmode:
39582 case V4DImode:
39583 case V8SFmode:
39584 case V8SImode:
39585 case V2DFmode:
39586 case V2DImode:
39587 case V4SFmode:
39588 case V4SImode:
39589 case V16SImode:
39590 case V8DImode:
39591 case V16SFmode:
39592 case V8DFmode:
39593 return ix86_vector_duplicate_value (mode, target, val);
39594
39595 case V4HImode:
39596 if (!mmx_ok)
39597 return false;
39598 if (TARGET_SSE || TARGET_3DNOW_A)
39599 {
39600 rtx x;
39601
39602 val = gen_lowpart (SImode, val);
39603 x = gen_rtx_TRUNCATE (HImode, val);
39604 x = gen_rtx_VEC_DUPLICATE (mode, x);
39605 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39606 return true;
39607 }
39608 goto widen;
39609
39610 case V8QImode:
39611 if (!mmx_ok)
39612 return false;
39613 goto widen;
39614
39615 case V8HImode:
39616 if (TARGET_SSE2)
39617 {
39618 struct expand_vec_perm_d dperm;
39619 rtx tmp1, tmp2;
39620
39621 permute:
39622 memset (&dperm, 0, sizeof (dperm));
39623 dperm.target = target;
39624 dperm.vmode = mode;
39625 dperm.nelt = GET_MODE_NUNITS (mode);
39626 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
39627 dperm.one_operand_p = true;
39628
39629 /* Extend to SImode using a paradoxical SUBREG. */
39630 tmp1 = gen_reg_rtx (SImode);
39631 emit_move_insn (tmp1, gen_lowpart (SImode, val));
39632
39633 /* Insert the SImode value as low element of a V4SImode vector. */
39634 tmp2 = gen_reg_rtx (V4SImode);
39635 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
39636 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
39637
39638 ok = (expand_vec_perm_1 (&dperm)
39639 || expand_vec_perm_broadcast_1 (&dperm));
39640 gcc_assert (ok);
39641 return ok;
39642 }
39643 goto widen;
39644
39645 case V16QImode:
39646 if (TARGET_SSE2)
39647 goto permute;
39648 goto widen;
39649
39650 widen:
39651 /* Replicate the value once into the next wider mode and recurse. */
39652 {
39653 enum machine_mode smode, wsmode, wvmode;
39654 rtx x;
39655
39656 smode = GET_MODE_INNER (mode);
39657 wvmode = get_mode_wider_vector (mode);
39658 wsmode = GET_MODE_INNER (wvmode);
39659
39660 val = convert_modes (wsmode, smode, val, true);
39661 x = expand_simple_binop (wsmode, ASHIFT, val,
39662 GEN_INT (GET_MODE_BITSIZE (smode)),
39663 NULL_RTX, 1, OPTAB_LIB_WIDEN);
39664 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
39665
39666 x = gen_reg_rtx (wvmode);
39667 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
39668 gcc_assert (ok);
39669 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
39670 return ok;
39671 }
39672
39673 case V16HImode:
39674 case V32QImode:
39675 {
39676 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
39677 rtx x = gen_reg_rtx (hvmode);
39678
39679 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
39680 gcc_assert (ok);
39681
39682 x = gen_rtx_VEC_CONCAT (mode, x, x);
39683 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39684 }
39685 return true;
39686
39687 default:
39688 return false;
39689 }
39690 }
39691
39692 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39693 whose ONE_VAR element is VAR, and other elements are zero. Return true
39694 if successful. */
39695
39696 static bool
39697 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
39698 rtx target, rtx var, int one_var)
39699 {
39700 enum machine_mode vsimode;
39701 rtx new_target;
39702 rtx x, tmp;
39703 bool use_vector_set = false;
39704
39705 switch (mode)
39706 {
39707 case V2DImode:
39708 /* For SSE4.1, we normally use vector set. But if the second
39709 element is zero and inter-unit moves are OK, we use movq
39710 instead. */
39711 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
39712 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
39713 && one_var == 0));
39714 break;
39715 case V16QImode:
39716 case V4SImode:
39717 case V4SFmode:
39718 use_vector_set = TARGET_SSE4_1;
39719 break;
39720 case V8HImode:
39721 use_vector_set = TARGET_SSE2;
39722 break;
39723 case V4HImode:
39724 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
39725 break;
39726 case V32QImode:
39727 case V16HImode:
39728 case V8SImode:
39729 case V8SFmode:
39730 case V4DFmode:
39731 use_vector_set = TARGET_AVX;
39732 break;
39733 case V4DImode:
39734 /* Use ix86_expand_vector_set in 64bit mode only. */
39735 use_vector_set = TARGET_AVX && TARGET_64BIT;
39736 break;
39737 default:
39738 break;
39739 }
39740
39741 if (use_vector_set)
39742 {
39743 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
39744 var = force_reg (GET_MODE_INNER (mode), var);
39745 ix86_expand_vector_set (mmx_ok, target, var, one_var);
39746 return true;
39747 }
39748
39749 switch (mode)
39750 {
39751 case V2SFmode:
39752 case V2SImode:
39753 if (!mmx_ok)
39754 return false;
39755 /* FALLTHRU */
39756
39757 case V2DFmode:
39758 case V2DImode:
39759 if (one_var != 0)
39760 return false;
39761 var = force_reg (GET_MODE_INNER (mode), var);
39762 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
39763 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39764 return true;
39765
39766 case V4SFmode:
39767 case V4SImode:
39768 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
39769 new_target = gen_reg_rtx (mode);
39770 else
39771 new_target = target;
39772 var = force_reg (GET_MODE_INNER (mode), var);
39773 x = gen_rtx_VEC_DUPLICATE (mode, var);
39774 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
39775 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
39776 if (one_var != 0)
39777 {
39778 /* We need to shuffle the value to the correct position, so
39779 create a new pseudo to store the intermediate result. */
39780
39781 /* With SSE2, we can use the integer shuffle insns. */
39782 if (mode != V4SFmode && TARGET_SSE2)
39783 {
39784 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
39785 const1_rtx,
39786 GEN_INT (one_var == 1 ? 0 : 1),
39787 GEN_INT (one_var == 2 ? 0 : 1),
39788 GEN_INT (one_var == 3 ? 0 : 1)));
39789 if (target != new_target)
39790 emit_move_insn (target, new_target);
39791 return true;
39792 }
39793
39794 /* Otherwise convert the intermediate result to V4SFmode and
39795 use the SSE1 shuffle instructions. */
39796 if (mode != V4SFmode)
39797 {
39798 tmp = gen_reg_rtx (V4SFmode);
39799 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
39800 }
39801 else
39802 tmp = new_target;
39803
39804 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
39805 const1_rtx,
39806 GEN_INT (one_var == 1 ? 0 : 1),
39807 GEN_INT (one_var == 2 ? 0+4 : 1+4),
39808 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
39809
39810 if (mode != V4SFmode)
39811 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
39812 else if (tmp != target)
39813 emit_move_insn (target, tmp);
39814 }
39815 else if (target != new_target)
39816 emit_move_insn (target, new_target);
39817 return true;
39818
39819 case V8HImode:
39820 case V16QImode:
39821 vsimode = V4SImode;
39822 goto widen;
39823 case V4HImode:
39824 case V8QImode:
39825 if (!mmx_ok)
39826 return false;
39827 vsimode = V2SImode;
39828 goto widen;
39829 widen:
39830 if (one_var != 0)
39831 return false;
39832
39833 /* Zero extend the variable element to SImode and recurse. */
39834 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
39835
39836 x = gen_reg_rtx (vsimode);
39837 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
39838 var, one_var))
39839 gcc_unreachable ();
39840
39841 emit_move_insn (target, gen_lowpart (mode, x));
39842 return true;
39843
39844 default:
39845 return false;
39846 }
39847 }
39848
39849 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39850 consisting of the values in VALS. It is known that all elements
39851 except ONE_VAR are constants. Return true if successful. */
39852
39853 static bool
39854 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
39855 rtx target, rtx vals, int one_var)
39856 {
39857 rtx var = XVECEXP (vals, 0, one_var);
39858 enum machine_mode wmode;
39859 rtx const_vec, x;
39860
39861 const_vec = copy_rtx (vals);
39862 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
39863 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
39864
39865 switch (mode)
39866 {
39867 case V2DFmode:
39868 case V2DImode:
39869 case V2SFmode:
39870 case V2SImode:
39871 /* For the two element vectors, it's just as easy to use
39872 the general case. */
39873 return false;
39874
39875 case V4DImode:
39876 /* Use ix86_expand_vector_set in 64bit mode only. */
39877 if (!TARGET_64BIT)
39878 return false;
39879 case V4DFmode:
39880 case V8SFmode:
39881 case V8SImode:
39882 case V16HImode:
39883 case V32QImode:
39884 case V4SFmode:
39885 case V4SImode:
39886 case V8HImode:
39887 case V4HImode:
39888 break;
39889
39890 case V16QImode:
39891 if (TARGET_SSE4_1)
39892 break;
39893 wmode = V8HImode;
39894 goto widen;
39895 case V8QImode:
39896 wmode = V4HImode;
39897 goto widen;
39898 widen:
39899 /* There's no way to set one QImode entry easily. Combine
39900 the variable value with its adjacent constant value, and
39901 promote to an HImode set. */
39902 x = XVECEXP (vals, 0, one_var ^ 1);
39903 if (one_var & 1)
39904 {
39905 var = convert_modes (HImode, QImode, var, true);
39906 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
39907 NULL_RTX, 1, OPTAB_LIB_WIDEN);
39908 x = GEN_INT (INTVAL (x) & 0xff);
39909 }
39910 else
39911 {
39912 var = convert_modes (HImode, QImode, var, true);
39913 x = gen_int_mode (INTVAL (x) << 8, HImode);
39914 }
39915 if (x != const0_rtx)
39916 var = expand_simple_binop (HImode, IOR, var, x, var,
39917 1, OPTAB_LIB_WIDEN);
39918
39919 x = gen_reg_rtx (wmode);
39920 emit_move_insn (x, gen_lowpart (wmode, const_vec));
39921 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
39922
39923 emit_move_insn (target, gen_lowpart (mode, x));
39924 return true;
39925
39926 default:
39927 return false;
39928 }
39929
39930 emit_move_insn (target, const_vec);
39931 ix86_expand_vector_set (mmx_ok, target, var, one_var);
39932 return true;
39933 }
39934
39935 /* A subroutine of ix86_expand_vector_init_general. Use vector
39936 concatenate to handle the most general case: all values variable,
39937 and none identical. */
39938
39939 static void
39940 ix86_expand_vector_init_concat (enum machine_mode mode,
39941 rtx target, rtx *ops, int n)
39942 {
39943 enum machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
39944 rtx first[16], second[8], third[4];
39945 rtvec v;
39946 int i, j;
39947
39948 switch (n)
39949 {
39950 case 2:
39951 switch (mode)
39952 {
39953 case V16SImode:
39954 cmode = V8SImode;
39955 break;
39956 case V16SFmode:
39957 cmode = V8SFmode;
39958 break;
39959 case V8DImode:
39960 cmode = V4DImode;
39961 break;
39962 case V8DFmode:
39963 cmode = V4DFmode;
39964 break;
39965 case V8SImode:
39966 cmode = V4SImode;
39967 break;
39968 case V8SFmode:
39969 cmode = V4SFmode;
39970 break;
39971 case V4DImode:
39972 cmode = V2DImode;
39973 break;
39974 case V4DFmode:
39975 cmode = V2DFmode;
39976 break;
39977 case V4SImode:
39978 cmode = V2SImode;
39979 break;
39980 case V4SFmode:
39981 cmode = V2SFmode;
39982 break;
39983 case V2DImode:
39984 cmode = DImode;
39985 break;
39986 case V2SImode:
39987 cmode = SImode;
39988 break;
39989 case V2DFmode:
39990 cmode = DFmode;
39991 break;
39992 case V2SFmode:
39993 cmode = SFmode;
39994 break;
39995 default:
39996 gcc_unreachable ();
39997 }
39998
39999 if (!register_operand (ops[1], cmode))
40000 ops[1] = force_reg (cmode, ops[1]);
40001 if (!register_operand (ops[0], cmode))
40002 ops[0] = force_reg (cmode, ops[0]);
40003 emit_insn (gen_rtx_SET (VOIDmode, target,
40004 gen_rtx_VEC_CONCAT (mode, ops[0],
40005 ops[1])));
40006 break;
40007
40008 case 4:
40009 switch (mode)
40010 {
40011 case V4DImode:
40012 cmode = V2DImode;
40013 break;
40014 case V4DFmode:
40015 cmode = V2DFmode;
40016 break;
40017 case V4SImode:
40018 cmode = V2SImode;
40019 break;
40020 case V4SFmode:
40021 cmode = V2SFmode;
40022 break;
40023 default:
40024 gcc_unreachable ();
40025 }
40026 goto half;
40027
40028 case 8:
40029 switch (mode)
40030 {
40031 case V8DImode:
40032 cmode = V2DImode;
40033 hmode = V4DImode;
40034 break;
40035 case V8DFmode:
40036 cmode = V2DFmode;
40037 hmode = V4DFmode;
40038 break;
40039 case V8SImode:
40040 cmode = V2SImode;
40041 hmode = V4SImode;
40042 break;
40043 case V8SFmode:
40044 cmode = V2SFmode;
40045 hmode = V4SFmode;
40046 break;
40047 default:
40048 gcc_unreachable ();
40049 }
40050 goto half;
40051
40052 case 16:
40053 switch (mode)
40054 {
40055 case V16SImode:
40056 cmode = V2SImode;
40057 hmode = V4SImode;
40058 gmode = V8SImode;
40059 break;
40060 case V16SFmode:
40061 cmode = V2SFmode;
40062 hmode = V4SFmode;
40063 gmode = V8SFmode;
40064 break;
40065 default:
40066 gcc_unreachable ();
40067 }
40068 goto half;
40069
40070 half:
40071 /* FIXME: We process inputs backward to help RA. PR 36222. */
40072 i = n - 1;
40073 j = (n >> 1) - 1;
40074 for (; i > 0; i -= 2, j--)
40075 {
40076 first[j] = gen_reg_rtx (cmode);
40077 v = gen_rtvec (2, ops[i - 1], ops[i]);
40078 ix86_expand_vector_init (false, first[j],
40079 gen_rtx_PARALLEL (cmode, v));
40080 }
40081
40082 n >>= 1;
40083 if (n > 4)
40084 {
40085 gcc_assert (hmode != VOIDmode);
40086 gcc_assert (gmode != VOIDmode);
40087 for (i = j = 0; i < n; i += 2, j++)
40088 {
40089 second[j] = gen_reg_rtx (hmode);
40090 ix86_expand_vector_init_concat (hmode, second [j],
40091 &first [i], 2);
40092 }
40093 n >>= 1;
40094 for (i = j = 0; i < n; i += 2, j++)
40095 {
40096 third[j] = gen_reg_rtx (gmode);
40097 ix86_expand_vector_init_concat (gmode, third[j],
40098 &second[i], 2);
40099 }
40100 n >>= 1;
40101 ix86_expand_vector_init_concat (mode, target, third, n);
40102 }
40103 else if (n > 2)
40104 {
40105 gcc_assert (hmode != VOIDmode);
40106 for (i = j = 0; i < n; i += 2, j++)
40107 {
40108 second[j] = gen_reg_rtx (hmode);
40109 ix86_expand_vector_init_concat (hmode, second [j],
40110 &first [i], 2);
40111 }
40112 n >>= 1;
40113 ix86_expand_vector_init_concat (mode, target, second, n);
40114 }
40115 else
40116 ix86_expand_vector_init_concat (mode, target, first, n);
40117 break;
40118
40119 default:
40120 gcc_unreachable ();
40121 }
40122 }
40123
40124 /* A subroutine of ix86_expand_vector_init_general. Use vector
40125 interleave to handle the most general case: all values variable,
40126 and none identical. */
40127
40128 static void
40129 ix86_expand_vector_init_interleave (enum machine_mode mode,
40130 rtx target, rtx *ops, int n)
40131 {
40132 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
40133 int i, j;
40134 rtx op0, op1;
40135 rtx (*gen_load_even) (rtx, rtx, rtx);
40136 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
40137 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
40138
40139 switch (mode)
40140 {
40141 case V8HImode:
40142 gen_load_even = gen_vec_setv8hi;
40143 gen_interleave_first_low = gen_vec_interleave_lowv4si;
40144 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40145 inner_mode = HImode;
40146 first_imode = V4SImode;
40147 second_imode = V2DImode;
40148 third_imode = VOIDmode;
40149 break;
40150 case V16QImode:
40151 gen_load_even = gen_vec_setv16qi;
40152 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
40153 gen_interleave_second_low = gen_vec_interleave_lowv4si;
40154 inner_mode = QImode;
40155 first_imode = V8HImode;
40156 second_imode = V4SImode;
40157 third_imode = V2DImode;
40158 break;
40159 default:
40160 gcc_unreachable ();
40161 }
40162
40163 for (i = 0; i < n; i++)
40164 {
40165 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
40166 op0 = gen_reg_rtx (SImode);
40167 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
40168
40169 /* Insert the SImode value as low element of V4SImode vector. */
40170 op1 = gen_reg_rtx (V4SImode);
40171 op0 = gen_rtx_VEC_MERGE (V4SImode,
40172 gen_rtx_VEC_DUPLICATE (V4SImode,
40173 op0),
40174 CONST0_RTX (V4SImode),
40175 const1_rtx);
40176 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
40177
40178 /* Cast the V4SImode vector back to a vector in orignal mode. */
40179 op0 = gen_reg_rtx (mode);
40180 emit_move_insn (op0, gen_lowpart (mode, op1));
40181
40182 /* Load even elements into the second position. */
40183 emit_insn (gen_load_even (op0,
40184 force_reg (inner_mode,
40185 ops [i + i + 1]),
40186 const1_rtx));
40187
40188 /* Cast vector to FIRST_IMODE vector. */
40189 ops[i] = gen_reg_rtx (first_imode);
40190 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
40191 }
40192
40193 /* Interleave low FIRST_IMODE vectors. */
40194 for (i = j = 0; i < n; i += 2, j++)
40195 {
40196 op0 = gen_reg_rtx (first_imode);
40197 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
40198
40199 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
40200 ops[j] = gen_reg_rtx (second_imode);
40201 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
40202 }
40203
40204 /* Interleave low SECOND_IMODE vectors. */
40205 switch (second_imode)
40206 {
40207 case V4SImode:
40208 for (i = j = 0; i < n / 2; i += 2, j++)
40209 {
40210 op0 = gen_reg_rtx (second_imode);
40211 emit_insn (gen_interleave_second_low (op0, ops[i],
40212 ops[i + 1]));
40213
40214 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
40215 vector. */
40216 ops[j] = gen_reg_rtx (third_imode);
40217 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
40218 }
40219 second_imode = V2DImode;
40220 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40221 /* FALLTHRU */
40222
40223 case V2DImode:
40224 op0 = gen_reg_rtx (second_imode);
40225 emit_insn (gen_interleave_second_low (op0, ops[0],
40226 ops[1]));
40227
40228 /* Cast the SECOND_IMODE vector back to a vector on original
40229 mode. */
40230 emit_insn (gen_rtx_SET (VOIDmode, target,
40231 gen_lowpart (mode, op0)));
40232 break;
40233
40234 default:
40235 gcc_unreachable ();
40236 }
40237 }
40238
40239 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
40240 all values variable, and none identical. */
40241
40242 static void
40243 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
40244 rtx target, rtx vals)
40245 {
40246 rtx ops[64], op0, op1;
40247 enum machine_mode half_mode = VOIDmode;
40248 int n, i;
40249
40250 switch (mode)
40251 {
40252 case V2SFmode:
40253 case V2SImode:
40254 if (!mmx_ok && !TARGET_SSE)
40255 break;
40256 /* FALLTHRU */
40257
40258 case V16SImode:
40259 case V16SFmode:
40260 case V8DFmode:
40261 case V8DImode:
40262 case V8SFmode:
40263 case V8SImode:
40264 case V4DFmode:
40265 case V4DImode:
40266 case V4SFmode:
40267 case V4SImode:
40268 case V2DFmode:
40269 case V2DImode:
40270 n = GET_MODE_NUNITS (mode);
40271 for (i = 0; i < n; i++)
40272 ops[i] = XVECEXP (vals, 0, i);
40273 ix86_expand_vector_init_concat (mode, target, ops, n);
40274 return;
40275
40276 case V32QImode:
40277 half_mode = V16QImode;
40278 goto half;
40279
40280 case V16HImode:
40281 half_mode = V8HImode;
40282 goto half;
40283
40284 half:
40285 n = GET_MODE_NUNITS (mode);
40286 for (i = 0; i < n; i++)
40287 ops[i] = XVECEXP (vals, 0, i);
40288 op0 = gen_reg_rtx (half_mode);
40289 op1 = gen_reg_rtx (half_mode);
40290 ix86_expand_vector_init_interleave (half_mode, op0, ops,
40291 n >> 2);
40292 ix86_expand_vector_init_interleave (half_mode, op1,
40293 &ops [n >> 1], n >> 2);
40294 emit_insn (gen_rtx_SET (VOIDmode, target,
40295 gen_rtx_VEC_CONCAT (mode, op0, op1)));
40296 return;
40297
40298 case V16QImode:
40299 if (!TARGET_SSE4_1)
40300 break;
40301 /* FALLTHRU */
40302
40303 case V8HImode:
40304 if (!TARGET_SSE2)
40305 break;
40306
40307 /* Don't use ix86_expand_vector_init_interleave if we can't
40308 move from GPR to SSE register directly. */
40309 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
40310 break;
40311
40312 n = GET_MODE_NUNITS (mode);
40313 for (i = 0; i < n; i++)
40314 ops[i] = XVECEXP (vals, 0, i);
40315 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
40316 return;
40317
40318 case V4HImode:
40319 case V8QImode:
40320 break;
40321
40322 default:
40323 gcc_unreachable ();
40324 }
40325
40326 {
40327 int i, j, n_elts, n_words, n_elt_per_word;
40328 enum machine_mode inner_mode;
40329 rtx words[4], shift;
40330
40331 inner_mode = GET_MODE_INNER (mode);
40332 n_elts = GET_MODE_NUNITS (mode);
40333 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
40334 n_elt_per_word = n_elts / n_words;
40335 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
40336
40337 for (i = 0; i < n_words; ++i)
40338 {
40339 rtx word = NULL_RTX;
40340
40341 for (j = 0; j < n_elt_per_word; ++j)
40342 {
40343 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
40344 elt = convert_modes (word_mode, inner_mode, elt, true);
40345
40346 if (j == 0)
40347 word = elt;
40348 else
40349 {
40350 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
40351 word, 1, OPTAB_LIB_WIDEN);
40352 word = expand_simple_binop (word_mode, IOR, word, elt,
40353 word, 1, OPTAB_LIB_WIDEN);
40354 }
40355 }
40356
40357 words[i] = word;
40358 }
40359
40360 if (n_words == 1)
40361 emit_move_insn (target, gen_lowpart (mode, words[0]));
40362 else if (n_words == 2)
40363 {
40364 rtx tmp = gen_reg_rtx (mode);
40365 emit_clobber (tmp);
40366 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
40367 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
40368 emit_move_insn (target, tmp);
40369 }
40370 else if (n_words == 4)
40371 {
40372 rtx tmp = gen_reg_rtx (V4SImode);
40373 gcc_assert (word_mode == SImode);
40374 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
40375 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
40376 emit_move_insn (target, gen_lowpart (mode, tmp));
40377 }
40378 else
40379 gcc_unreachable ();
40380 }
40381 }
40382
40383 /* Initialize vector TARGET via VALS. Suppress the use of MMX
40384 instructions unless MMX_OK is true. */
40385
40386 void
40387 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
40388 {
40389 enum machine_mode mode = GET_MODE (target);
40390 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40391 int n_elts = GET_MODE_NUNITS (mode);
40392 int n_var = 0, one_var = -1;
40393 bool all_same = true, all_const_zero = true;
40394 int i;
40395 rtx x;
40396
40397 for (i = 0; i < n_elts; ++i)
40398 {
40399 x = XVECEXP (vals, 0, i);
40400 if (!(CONST_INT_P (x)
40401 || GET_CODE (x) == CONST_DOUBLE
40402 || GET_CODE (x) == CONST_FIXED))
40403 n_var++, one_var = i;
40404 else if (x != CONST0_RTX (inner_mode))
40405 all_const_zero = false;
40406 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
40407 all_same = false;
40408 }
40409
40410 /* Constants are best loaded from the constant pool. */
40411 if (n_var == 0)
40412 {
40413 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
40414 return;
40415 }
40416
40417 /* If all values are identical, broadcast the value. */
40418 if (all_same
40419 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
40420 XVECEXP (vals, 0, 0)))
40421 return;
40422
40423 /* Values where only one field is non-constant are best loaded from
40424 the pool and overwritten via move later. */
40425 if (n_var == 1)
40426 {
40427 if (all_const_zero
40428 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
40429 XVECEXP (vals, 0, one_var),
40430 one_var))
40431 return;
40432
40433 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
40434 return;
40435 }
40436
40437 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
40438 }
40439
40440 void
40441 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
40442 {
40443 enum machine_mode mode = GET_MODE (target);
40444 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40445 enum machine_mode half_mode;
40446 bool use_vec_merge = false;
40447 rtx tmp;
40448 static rtx (*gen_extract[6][2]) (rtx, rtx)
40449 = {
40450 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
40451 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
40452 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
40453 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
40454 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
40455 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
40456 };
40457 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
40458 = {
40459 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
40460 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
40461 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
40462 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
40463 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
40464 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
40465 };
40466 int i, j, n;
40467
40468 switch (mode)
40469 {
40470 case V2SFmode:
40471 case V2SImode:
40472 if (mmx_ok)
40473 {
40474 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40475 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
40476 if (elt == 0)
40477 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40478 else
40479 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40480 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40481 return;
40482 }
40483 break;
40484
40485 case V2DImode:
40486 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
40487 if (use_vec_merge)
40488 break;
40489
40490 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40491 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
40492 if (elt == 0)
40493 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40494 else
40495 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40496 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40497 return;
40498
40499 case V2DFmode:
40500 {
40501 rtx op0, op1;
40502
40503 /* For the two element vectors, we implement a VEC_CONCAT with
40504 the extraction of the other element. */
40505
40506 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
40507 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
40508
40509 if (elt == 0)
40510 op0 = val, op1 = tmp;
40511 else
40512 op0 = tmp, op1 = val;
40513
40514 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
40515 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40516 }
40517 return;
40518
40519 case V4SFmode:
40520 use_vec_merge = TARGET_SSE4_1;
40521 if (use_vec_merge)
40522 break;
40523
40524 switch (elt)
40525 {
40526 case 0:
40527 use_vec_merge = true;
40528 break;
40529
40530 case 1:
40531 /* tmp = target = A B C D */
40532 tmp = copy_to_reg (target);
40533 /* target = A A B B */
40534 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
40535 /* target = X A B B */
40536 ix86_expand_vector_set (false, target, val, 0);
40537 /* target = A X C D */
40538 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40539 const1_rtx, const0_rtx,
40540 GEN_INT (2+4), GEN_INT (3+4)));
40541 return;
40542
40543 case 2:
40544 /* tmp = target = A B C D */
40545 tmp = copy_to_reg (target);
40546 /* tmp = X B C D */
40547 ix86_expand_vector_set (false, tmp, val, 0);
40548 /* target = A B X D */
40549 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40550 const0_rtx, const1_rtx,
40551 GEN_INT (0+4), GEN_INT (3+4)));
40552 return;
40553
40554 case 3:
40555 /* tmp = target = A B C D */
40556 tmp = copy_to_reg (target);
40557 /* tmp = X B C D */
40558 ix86_expand_vector_set (false, tmp, val, 0);
40559 /* target = A B X D */
40560 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40561 const0_rtx, const1_rtx,
40562 GEN_INT (2+4), GEN_INT (0+4)));
40563 return;
40564
40565 default:
40566 gcc_unreachable ();
40567 }
40568 break;
40569
40570 case V4SImode:
40571 use_vec_merge = TARGET_SSE4_1;
40572 if (use_vec_merge)
40573 break;
40574
40575 /* Element 0 handled by vec_merge below. */
40576 if (elt == 0)
40577 {
40578 use_vec_merge = true;
40579 break;
40580 }
40581
40582 if (TARGET_SSE2)
40583 {
40584 /* With SSE2, use integer shuffles to swap element 0 and ELT,
40585 store into element 0, then shuffle them back. */
40586
40587 rtx order[4];
40588
40589 order[0] = GEN_INT (elt);
40590 order[1] = const1_rtx;
40591 order[2] = const2_rtx;
40592 order[3] = GEN_INT (3);
40593 order[elt] = const0_rtx;
40594
40595 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40596 order[1], order[2], order[3]));
40597
40598 ix86_expand_vector_set (false, target, val, 0);
40599
40600 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40601 order[1], order[2], order[3]));
40602 }
40603 else
40604 {
40605 /* For SSE1, we have to reuse the V4SF code. */
40606 rtx t = gen_reg_rtx (V4SFmode);
40607 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
40608 emit_move_insn (target, gen_lowpart (mode, t));
40609 }
40610 return;
40611
40612 case V8HImode:
40613 use_vec_merge = TARGET_SSE2;
40614 break;
40615 case V4HImode:
40616 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40617 break;
40618
40619 case V16QImode:
40620 use_vec_merge = TARGET_SSE4_1;
40621 break;
40622
40623 case V8QImode:
40624 break;
40625
40626 case V32QImode:
40627 half_mode = V16QImode;
40628 j = 0;
40629 n = 16;
40630 goto half;
40631
40632 case V16HImode:
40633 half_mode = V8HImode;
40634 j = 1;
40635 n = 8;
40636 goto half;
40637
40638 case V8SImode:
40639 half_mode = V4SImode;
40640 j = 2;
40641 n = 4;
40642 goto half;
40643
40644 case V4DImode:
40645 half_mode = V2DImode;
40646 j = 3;
40647 n = 2;
40648 goto half;
40649
40650 case V8SFmode:
40651 half_mode = V4SFmode;
40652 j = 4;
40653 n = 4;
40654 goto half;
40655
40656 case V4DFmode:
40657 half_mode = V2DFmode;
40658 j = 5;
40659 n = 2;
40660 goto half;
40661
40662 half:
40663 /* Compute offset. */
40664 i = elt / n;
40665 elt %= n;
40666
40667 gcc_assert (i <= 1);
40668
40669 /* Extract the half. */
40670 tmp = gen_reg_rtx (half_mode);
40671 emit_insn (gen_extract[j][i] (tmp, target));
40672
40673 /* Put val in tmp at elt. */
40674 ix86_expand_vector_set (false, tmp, val, elt);
40675
40676 /* Put it back. */
40677 emit_insn (gen_insert[j][i] (target, target, tmp));
40678 return;
40679
40680 default:
40681 break;
40682 }
40683
40684 if (use_vec_merge)
40685 {
40686 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
40687 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
40688 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40689 }
40690 else
40691 {
40692 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
40693
40694 emit_move_insn (mem, target);
40695
40696 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
40697 emit_move_insn (tmp, val);
40698
40699 emit_move_insn (target, mem);
40700 }
40701 }
40702
40703 void
40704 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
40705 {
40706 enum machine_mode mode = GET_MODE (vec);
40707 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40708 bool use_vec_extr = false;
40709 rtx tmp;
40710
40711 switch (mode)
40712 {
40713 case V2SImode:
40714 case V2SFmode:
40715 if (!mmx_ok)
40716 break;
40717 /* FALLTHRU */
40718
40719 case V2DFmode:
40720 case V2DImode:
40721 use_vec_extr = true;
40722 break;
40723
40724 case V4SFmode:
40725 use_vec_extr = TARGET_SSE4_1;
40726 if (use_vec_extr)
40727 break;
40728
40729 switch (elt)
40730 {
40731 case 0:
40732 tmp = vec;
40733 break;
40734
40735 case 1:
40736 case 3:
40737 tmp = gen_reg_rtx (mode);
40738 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
40739 GEN_INT (elt), GEN_INT (elt),
40740 GEN_INT (elt+4), GEN_INT (elt+4)));
40741 break;
40742
40743 case 2:
40744 tmp = gen_reg_rtx (mode);
40745 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
40746 break;
40747
40748 default:
40749 gcc_unreachable ();
40750 }
40751 vec = tmp;
40752 use_vec_extr = true;
40753 elt = 0;
40754 break;
40755
40756 case V4SImode:
40757 use_vec_extr = TARGET_SSE4_1;
40758 if (use_vec_extr)
40759 break;
40760
40761 if (TARGET_SSE2)
40762 {
40763 switch (elt)
40764 {
40765 case 0:
40766 tmp = vec;
40767 break;
40768
40769 case 1:
40770 case 3:
40771 tmp = gen_reg_rtx (mode);
40772 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
40773 GEN_INT (elt), GEN_INT (elt),
40774 GEN_INT (elt), GEN_INT (elt)));
40775 break;
40776
40777 case 2:
40778 tmp = gen_reg_rtx (mode);
40779 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
40780 break;
40781
40782 default:
40783 gcc_unreachable ();
40784 }
40785 vec = tmp;
40786 use_vec_extr = true;
40787 elt = 0;
40788 }
40789 else
40790 {
40791 /* For SSE1, we have to reuse the V4SF code. */
40792 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
40793 gen_lowpart (V4SFmode, vec), elt);
40794 return;
40795 }
40796 break;
40797
40798 case V8HImode:
40799 use_vec_extr = TARGET_SSE2;
40800 break;
40801 case V4HImode:
40802 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40803 break;
40804
40805 case V16QImode:
40806 use_vec_extr = TARGET_SSE4_1;
40807 break;
40808
40809 case V8SFmode:
40810 if (TARGET_AVX)
40811 {
40812 tmp = gen_reg_rtx (V4SFmode);
40813 if (elt < 4)
40814 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
40815 else
40816 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
40817 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40818 return;
40819 }
40820 break;
40821
40822 case V4DFmode:
40823 if (TARGET_AVX)
40824 {
40825 tmp = gen_reg_rtx (V2DFmode);
40826 if (elt < 2)
40827 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
40828 else
40829 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
40830 ix86_expand_vector_extract (false, target, tmp, elt & 1);
40831 return;
40832 }
40833 break;
40834
40835 case V32QImode:
40836 if (TARGET_AVX)
40837 {
40838 tmp = gen_reg_rtx (V16QImode);
40839 if (elt < 16)
40840 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
40841 else
40842 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
40843 ix86_expand_vector_extract (false, target, tmp, elt & 15);
40844 return;
40845 }
40846 break;
40847
40848 case V16HImode:
40849 if (TARGET_AVX)
40850 {
40851 tmp = gen_reg_rtx (V8HImode);
40852 if (elt < 8)
40853 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
40854 else
40855 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
40856 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40857 return;
40858 }
40859 break;
40860
40861 case V8SImode:
40862 if (TARGET_AVX)
40863 {
40864 tmp = gen_reg_rtx (V4SImode);
40865 if (elt < 4)
40866 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
40867 else
40868 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
40869 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40870 return;
40871 }
40872 break;
40873
40874 case V4DImode:
40875 if (TARGET_AVX)
40876 {
40877 tmp = gen_reg_rtx (V2DImode);
40878 if (elt < 2)
40879 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
40880 else
40881 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
40882 ix86_expand_vector_extract (false, target, tmp, elt & 1);
40883 return;
40884 }
40885 break;
40886
40887 case V16SFmode:
40888 tmp = gen_reg_rtx (V8SFmode);
40889 if (elt < 8)
40890 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
40891 else
40892 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
40893 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40894 return;
40895
40896 case V8DFmode:
40897 tmp = gen_reg_rtx (V4DFmode);
40898 if (elt < 4)
40899 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
40900 else
40901 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
40902 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40903 return;
40904
40905 case V16SImode:
40906 tmp = gen_reg_rtx (V8SImode);
40907 if (elt < 8)
40908 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
40909 else
40910 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
40911 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40912 return;
40913
40914 case V8DImode:
40915 tmp = gen_reg_rtx (V4DImode);
40916 if (elt < 4)
40917 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
40918 else
40919 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
40920 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40921 return;
40922
40923 case V8QImode:
40924 /* ??? Could extract the appropriate HImode element and shift. */
40925 default:
40926 break;
40927 }
40928
40929 if (use_vec_extr)
40930 {
40931 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
40932 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
40933
40934 /* Let the rtl optimizers know about the zero extension performed. */
40935 if (inner_mode == QImode || inner_mode == HImode)
40936 {
40937 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
40938 target = gen_lowpart (SImode, target);
40939 }
40940
40941 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40942 }
40943 else
40944 {
40945 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
40946
40947 emit_move_insn (mem, vec);
40948
40949 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
40950 emit_move_insn (target, tmp);
40951 }
40952 }
40953
40954 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
40955 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
40956 The upper bits of DEST are undefined, though they shouldn't cause
40957 exceptions (some bits from src or all zeros are ok). */
40958
40959 static void
40960 emit_reduc_half (rtx dest, rtx src, int i)
40961 {
40962 rtx tem, d = dest;
40963 switch (GET_MODE (src))
40964 {
40965 case V4SFmode:
40966 if (i == 128)
40967 tem = gen_sse_movhlps (dest, src, src);
40968 else
40969 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
40970 GEN_INT (1 + 4), GEN_INT (1 + 4));
40971 break;
40972 case V2DFmode:
40973 tem = gen_vec_interleave_highv2df (dest, src, src);
40974 break;
40975 case V16QImode:
40976 case V8HImode:
40977 case V4SImode:
40978 case V2DImode:
40979 d = gen_reg_rtx (V1TImode);
40980 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
40981 GEN_INT (i / 2));
40982 break;
40983 case V8SFmode:
40984 if (i == 256)
40985 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
40986 else
40987 tem = gen_avx_shufps256 (dest, src, src,
40988 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
40989 break;
40990 case V4DFmode:
40991 if (i == 256)
40992 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
40993 else
40994 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
40995 break;
40996 case V32QImode:
40997 case V16HImode:
40998 case V8SImode:
40999 case V4DImode:
41000 if (i == 256)
41001 {
41002 if (GET_MODE (dest) != V4DImode)
41003 d = gen_reg_rtx (V4DImode);
41004 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
41005 gen_lowpart (V4DImode, src),
41006 const1_rtx);
41007 }
41008 else
41009 {
41010 d = gen_reg_rtx (V2TImode);
41011 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
41012 GEN_INT (i / 2));
41013 }
41014 break;
41015 case V16SImode:
41016 case V16SFmode:
41017 case V8DImode:
41018 case V8DFmode:
41019 if (i > 128)
41020 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
41021 gen_lowpart (V16SImode, src),
41022 gen_lowpart (V16SImode, src),
41023 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
41024 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
41025 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
41026 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
41027 GEN_INT (0xC), GEN_INT (0xD),
41028 GEN_INT (0xE), GEN_INT (0xF),
41029 GEN_INT (0x10), GEN_INT (0x11),
41030 GEN_INT (0x12), GEN_INT (0x13),
41031 GEN_INT (0x14), GEN_INT (0x15),
41032 GEN_INT (0x16), GEN_INT (0x17));
41033 else
41034 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
41035 gen_lowpart (V16SImode, src),
41036 GEN_INT (i == 128 ? 0x2 : 0x1),
41037 GEN_INT (0x3),
41038 GEN_INT (0x3),
41039 GEN_INT (0x3),
41040 GEN_INT (i == 128 ? 0x6 : 0x5),
41041 GEN_INT (0x7),
41042 GEN_INT (0x7),
41043 GEN_INT (0x7),
41044 GEN_INT (i == 128 ? 0xA : 0x9),
41045 GEN_INT (0xB),
41046 GEN_INT (0xB),
41047 GEN_INT (0xB),
41048 GEN_INT (i == 128 ? 0xE : 0xD),
41049 GEN_INT (0xF),
41050 GEN_INT (0xF),
41051 GEN_INT (0xF));
41052 break;
41053 default:
41054 gcc_unreachable ();
41055 }
41056 emit_insn (tem);
41057 if (d != dest)
41058 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
41059 }
41060
41061 /* Expand a vector reduction. FN is the binary pattern to reduce;
41062 DEST is the destination; IN is the input vector. */
41063
41064 void
41065 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
41066 {
41067 rtx half, dst, vec = in;
41068 enum machine_mode mode = GET_MODE (in);
41069 int i;
41070
41071 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
41072 if (TARGET_SSE4_1
41073 && mode == V8HImode
41074 && fn == gen_uminv8hi3)
41075 {
41076 emit_insn (gen_sse4_1_phminposuw (dest, in));
41077 return;
41078 }
41079
41080 for (i = GET_MODE_BITSIZE (mode);
41081 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
41082 i >>= 1)
41083 {
41084 half = gen_reg_rtx (mode);
41085 emit_reduc_half (half, vec, i);
41086 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
41087 dst = dest;
41088 else
41089 dst = gen_reg_rtx (mode);
41090 emit_insn (fn (dst, half, vec));
41091 vec = dst;
41092 }
41093 }
41094 \f
41095 /* Target hook for scalar_mode_supported_p. */
41096 static bool
41097 ix86_scalar_mode_supported_p (enum machine_mode mode)
41098 {
41099 if (DECIMAL_FLOAT_MODE_P (mode))
41100 return default_decimal_float_supported_p ();
41101 else if (mode == TFmode)
41102 return true;
41103 else
41104 return default_scalar_mode_supported_p (mode);
41105 }
41106
41107 /* Implements target hook vector_mode_supported_p. */
41108 static bool
41109 ix86_vector_mode_supported_p (enum machine_mode mode)
41110 {
41111 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
41112 return true;
41113 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
41114 return true;
41115 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
41116 return true;
41117 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
41118 return true;
41119 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
41120 return true;
41121 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
41122 return true;
41123 return false;
41124 }
41125
41126 /* Target hook for c_mode_for_suffix. */
41127 static enum machine_mode
41128 ix86_c_mode_for_suffix (char suffix)
41129 {
41130 if (suffix == 'q')
41131 return TFmode;
41132 if (suffix == 'w')
41133 return XFmode;
41134
41135 return VOIDmode;
41136 }
41137
41138 /* Worker function for TARGET_MD_ASM_CLOBBERS.
41139
41140 We do this in the new i386 backend to maintain source compatibility
41141 with the old cc0-based compiler. */
41142
41143 static tree
41144 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
41145 tree inputs ATTRIBUTE_UNUSED,
41146 tree clobbers)
41147 {
41148 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
41149 clobbers);
41150 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
41151 clobbers);
41152 return clobbers;
41153 }
41154
41155 /* Implements target vector targetm.asm.encode_section_info. */
41156
41157 static void ATTRIBUTE_UNUSED
41158 ix86_encode_section_info (tree decl, rtx rtl, int first)
41159 {
41160 default_encode_section_info (decl, rtl, first);
41161
41162 if (TREE_CODE (decl) == VAR_DECL
41163 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
41164 && ix86_in_large_data_p (decl))
41165 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
41166 }
41167
41168 /* Worker function for REVERSE_CONDITION. */
41169
41170 enum rtx_code
41171 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
41172 {
41173 return (mode != CCFPmode && mode != CCFPUmode
41174 ? reverse_condition (code)
41175 : reverse_condition_maybe_unordered (code));
41176 }
41177
41178 /* Output code to perform an x87 FP register move, from OPERANDS[1]
41179 to OPERANDS[0]. */
41180
41181 const char *
41182 output_387_reg_move (rtx insn, rtx *operands)
41183 {
41184 if (REG_P (operands[0]))
41185 {
41186 if (REG_P (operands[1])
41187 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41188 {
41189 if (REGNO (operands[0]) == FIRST_STACK_REG)
41190 return output_387_ffreep (operands, 0);
41191 return "fstp\t%y0";
41192 }
41193 if (STACK_TOP_P (operands[0]))
41194 return "fld%Z1\t%y1";
41195 return "fst\t%y0";
41196 }
41197 else if (MEM_P (operands[0]))
41198 {
41199 gcc_assert (REG_P (operands[1]));
41200 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41201 return "fstp%Z0\t%y0";
41202 else
41203 {
41204 /* There is no non-popping store to memory for XFmode.
41205 So if we need one, follow the store with a load. */
41206 if (GET_MODE (operands[0]) == XFmode)
41207 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
41208 else
41209 return "fst%Z0\t%y0";
41210 }
41211 }
41212 else
41213 gcc_unreachable();
41214 }
41215
41216 /* Output code to perform a conditional jump to LABEL, if C2 flag in
41217 FP status register is set. */
41218
41219 void
41220 ix86_emit_fp_unordered_jump (rtx label)
41221 {
41222 rtx reg = gen_reg_rtx (HImode);
41223 rtx temp;
41224
41225 emit_insn (gen_x86_fnstsw_1 (reg));
41226
41227 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
41228 {
41229 emit_insn (gen_x86_sahf_1 (reg));
41230
41231 temp = gen_rtx_REG (CCmode, FLAGS_REG);
41232 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
41233 }
41234 else
41235 {
41236 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
41237
41238 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
41239 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
41240 }
41241
41242 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
41243 gen_rtx_LABEL_REF (VOIDmode, label),
41244 pc_rtx);
41245 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
41246
41247 emit_jump_insn (temp);
41248 predict_jump (REG_BR_PROB_BASE * 10 / 100);
41249 }
41250
41251 /* Output code to perform a log1p XFmode calculation. */
41252
41253 void ix86_emit_i387_log1p (rtx op0, rtx op1)
41254 {
41255 rtx label1 = gen_label_rtx ();
41256 rtx label2 = gen_label_rtx ();
41257
41258 rtx tmp = gen_reg_rtx (XFmode);
41259 rtx tmp2 = gen_reg_rtx (XFmode);
41260 rtx test;
41261
41262 emit_insn (gen_absxf2 (tmp, op1));
41263 test = gen_rtx_GE (VOIDmode, tmp,
41264 CONST_DOUBLE_FROM_REAL_VALUE (
41265 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
41266 XFmode));
41267 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
41268
41269 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41270 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
41271 emit_jump (label2);
41272
41273 emit_label (label1);
41274 emit_move_insn (tmp, CONST1_RTX (XFmode));
41275 emit_insn (gen_addxf3 (tmp, op1, tmp));
41276 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41277 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
41278
41279 emit_label (label2);
41280 }
41281
41282 /* Emit code for round calculation. */
41283 void ix86_emit_i387_round (rtx op0, rtx op1)
41284 {
41285 enum machine_mode inmode = GET_MODE (op1);
41286 enum machine_mode outmode = GET_MODE (op0);
41287 rtx e1, e2, res, tmp, tmp1, half;
41288 rtx scratch = gen_reg_rtx (HImode);
41289 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
41290 rtx jump_label = gen_label_rtx ();
41291 rtx insn;
41292 rtx (*gen_abs) (rtx, rtx);
41293 rtx (*gen_neg) (rtx, rtx);
41294
41295 switch (inmode)
41296 {
41297 case SFmode:
41298 gen_abs = gen_abssf2;
41299 break;
41300 case DFmode:
41301 gen_abs = gen_absdf2;
41302 break;
41303 case XFmode:
41304 gen_abs = gen_absxf2;
41305 break;
41306 default:
41307 gcc_unreachable ();
41308 }
41309
41310 switch (outmode)
41311 {
41312 case SFmode:
41313 gen_neg = gen_negsf2;
41314 break;
41315 case DFmode:
41316 gen_neg = gen_negdf2;
41317 break;
41318 case XFmode:
41319 gen_neg = gen_negxf2;
41320 break;
41321 case HImode:
41322 gen_neg = gen_neghi2;
41323 break;
41324 case SImode:
41325 gen_neg = gen_negsi2;
41326 break;
41327 case DImode:
41328 gen_neg = gen_negdi2;
41329 break;
41330 default:
41331 gcc_unreachable ();
41332 }
41333
41334 e1 = gen_reg_rtx (inmode);
41335 e2 = gen_reg_rtx (inmode);
41336 res = gen_reg_rtx (outmode);
41337
41338 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
41339
41340 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
41341
41342 /* scratch = fxam(op1) */
41343 emit_insn (gen_rtx_SET (VOIDmode, scratch,
41344 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
41345 UNSPEC_FXAM)));
41346 /* e1 = fabs(op1) */
41347 emit_insn (gen_abs (e1, op1));
41348
41349 /* e2 = e1 + 0.5 */
41350 half = force_reg (inmode, half);
41351 emit_insn (gen_rtx_SET (VOIDmode, e2,
41352 gen_rtx_PLUS (inmode, e1, half)));
41353
41354 /* res = floor(e2) */
41355 if (inmode != XFmode)
41356 {
41357 tmp1 = gen_reg_rtx (XFmode);
41358
41359 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
41360 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
41361 }
41362 else
41363 tmp1 = e2;
41364
41365 switch (outmode)
41366 {
41367 case SFmode:
41368 case DFmode:
41369 {
41370 rtx tmp0 = gen_reg_rtx (XFmode);
41371
41372 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
41373
41374 emit_insn (gen_rtx_SET (VOIDmode, res,
41375 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
41376 UNSPEC_TRUNC_NOOP)));
41377 }
41378 break;
41379 case XFmode:
41380 emit_insn (gen_frndintxf2_floor (res, tmp1));
41381 break;
41382 case HImode:
41383 emit_insn (gen_lfloorxfhi2 (res, tmp1));
41384 break;
41385 case SImode:
41386 emit_insn (gen_lfloorxfsi2 (res, tmp1));
41387 break;
41388 case DImode:
41389 emit_insn (gen_lfloorxfdi2 (res, tmp1));
41390 break;
41391 default:
41392 gcc_unreachable ();
41393 }
41394
41395 /* flags = signbit(a) */
41396 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
41397
41398 /* if (flags) then res = -res */
41399 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
41400 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
41401 gen_rtx_LABEL_REF (VOIDmode, jump_label),
41402 pc_rtx);
41403 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41404 predict_jump (REG_BR_PROB_BASE * 50 / 100);
41405 JUMP_LABEL (insn) = jump_label;
41406
41407 emit_insn (gen_neg (res, res));
41408
41409 emit_label (jump_label);
41410 LABEL_NUSES (jump_label) = 1;
41411
41412 emit_move_insn (op0, res);
41413 }
41414
41415 /* Output code to perform a Newton-Rhapson approximation of a single precision
41416 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
41417
41418 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
41419 {
41420 rtx x0, x1, e0, e1;
41421
41422 x0 = gen_reg_rtx (mode);
41423 e0 = gen_reg_rtx (mode);
41424 e1 = gen_reg_rtx (mode);
41425 x1 = gen_reg_rtx (mode);
41426
41427 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
41428
41429 b = force_reg (mode, b);
41430
41431 /* x0 = rcp(b) estimate */
41432 if (mode == V16SFmode || mode == V8DFmode)
41433 emit_insn (gen_rtx_SET (VOIDmode, x0,
41434 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41435 UNSPEC_RCP14)));
41436 else
41437 emit_insn (gen_rtx_SET (VOIDmode, x0,
41438 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41439 UNSPEC_RCP)));
41440
41441 /* e0 = x0 * b */
41442 emit_insn (gen_rtx_SET (VOIDmode, e0,
41443 gen_rtx_MULT (mode, x0, b)));
41444
41445 /* e0 = x0 * e0 */
41446 emit_insn (gen_rtx_SET (VOIDmode, e0,
41447 gen_rtx_MULT (mode, x0, e0)));
41448
41449 /* e1 = x0 + x0 */
41450 emit_insn (gen_rtx_SET (VOIDmode, e1,
41451 gen_rtx_PLUS (mode, x0, x0)));
41452
41453 /* x1 = e1 - e0 */
41454 emit_insn (gen_rtx_SET (VOIDmode, x1,
41455 gen_rtx_MINUS (mode, e1, e0)));
41456
41457 /* res = a * x1 */
41458 emit_insn (gen_rtx_SET (VOIDmode, res,
41459 gen_rtx_MULT (mode, a, x1)));
41460 }
41461
41462 /* Output code to perform a Newton-Rhapson approximation of a
41463 single precision floating point [reciprocal] square root. */
41464
41465 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
41466 bool recip)
41467 {
41468 rtx x0, e0, e1, e2, e3, mthree, mhalf;
41469 REAL_VALUE_TYPE r;
41470 int unspec;
41471
41472 x0 = gen_reg_rtx (mode);
41473 e0 = gen_reg_rtx (mode);
41474 e1 = gen_reg_rtx (mode);
41475 e2 = gen_reg_rtx (mode);
41476 e3 = gen_reg_rtx (mode);
41477
41478 real_from_integer (&r, VOIDmode, -3, -1, 0);
41479 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41480
41481 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
41482 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41483 unspec = UNSPEC_RSQRT;
41484
41485 if (VECTOR_MODE_P (mode))
41486 {
41487 mthree = ix86_build_const_vector (mode, true, mthree);
41488 mhalf = ix86_build_const_vector (mode, true, mhalf);
41489 /* There is no 512-bit rsqrt. There is however rsqrt14. */
41490 if (GET_MODE_SIZE (mode) == 64)
41491 unspec = UNSPEC_RSQRT14;
41492 }
41493
41494 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
41495 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
41496
41497 a = force_reg (mode, a);
41498
41499 /* x0 = rsqrt(a) estimate */
41500 emit_insn (gen_rtx_SET (VOIDmode, x0,
41501 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
41502 unspec)));
41503
41504 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
41505 if (!recip)
41506 {
41507 rtx zero, mask;
41508
41509 zero = gen_reg_rtx (mode);
41510 mask = gen_reg_rtx (mode);
41511
41512 zero = force_reg (mode, CONST0_RTX(mode));
41513
41514 /* Handle masked compare. */
41515 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
41516 {
41517 mask = gen_reg_rtx (HImode);
41518 /* Imm value 0x4 corresponds to not-equal comparison. */
41519 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
41520 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
41521 }
41522 else
41523 {
41524 emit_insn (gen_rtx_SET (VOIDmode, mask,
41525 gen_rtx_NE (mode, zero, a)));
41526
41527 emit_insn (gen_rtx_SET (VOIDmode, x0,
41528 gen_rtx_AND (mode, x0, mask)));
41529 }
41530 }
41531
41532 /* e0 = x0 * a */
41533 emit_insn (gen_rtx_SET (VOIDmode, e0,
41534 gen_rtx_MULT (mode, x0, a)));
41535 /* e1 = e0 * x0 */
41536 emit_insn (gen_rtx_SET (VOIDmode, e1,
41537 gen_rtx_MULT (mode, e0, x0)));
41538
41539 /* e2 = e1 - 3. */
41540 mthree = force_reg (mode, mthree);
41541 emit_insn (gen_rtx_SET (VOIDmode, e2,
41542 gen_rtx_PLUS (mode, e1, mthree)));
41543
41544 mhalf = force_reg (mode, mhalf);
41545 if (recip)
41546 /* e3 = -.5 * x0 */
41547 emit_insn (gen_rtx_SET (VOIDmode, e3,
41548 gen_rtx_MULT (mode, x0, mhalf)));
41549 else
41550 /* e3 = -.5 * e0 */
41551 emit_insn (gen_rtx_SET (VOIDmode, e3,
41552 gen_rtx_MULT (mode, e0, mhalf)));
41553 /* ret = e2 * e3 */
41554 emit_insn (gen_rtx_SET (VOIDmode, res,
41555 gen_rtx_MULT (mode, e2, e3)));
41556 }
41557
41558 #ifdef TARGET_SOLARIS
41559 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
41560
41561 static void
41562 i386_solaris_elf_named_section (const char *name, unsigned int flags,
41563 tree decl)
41564 {
41565 /* With Binutils 2.15, the "@unwind" marker must be specified on
41566 every occurrence of the ".eh_frame" section, not just the first
41567 one. */
41568 if (TARGET_64BIT
41569 && strcmp (name, ".eh_frame") == 0)
41570 {
41571 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
41572 flags & SECTION_WRITE ? "aw" : "a");
41573 return;
41574 }
41575
41576 #ifndef USE_GAS
41577 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
41578 {
41579 solaris_elf_asm_comdat_section (name, flags, decl);
41580 return;
41581 }
41582 #endif
41583
41584 default_elf_asm_named_section (name, flags, decl);
41585 }
41586 #endif /* TARGET_SOLARIS */
41587
41588 /* Return the mangling of TYPE if it is an extended fundamental type. */
41589
41590 static const char *
41591 ix86_mangle_type (const_tree type)
41592 {
41593 type = TYPE_MAIN_VARIANT (type);
41594
41595 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
41596 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
41597 return NULL;
41598
41599 switch (TYPE_MODE (type))
41600 {
41601 case TFmode:
41602 /* __float128 is "g". */
41603 return "g";
41604 case XFmode:
41605 /* "long double" or __float80 is "e". */
41606 return "e";
41607 default:
41608 return NULL;
41609 }
41610 }
41611
41612 /* For 32-bit code we can save PIC register setup by using
41613 __stack_chk_fail_local hidden function instead of calling
41614 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
41615 register, so it is better to call __stack_chk_fail directly. */
41616
41617 static tree ATTRIBUTE_UNUSED
41618 ix86_stack_protect_fail (void)
41619 {
41620 return TARGET_64BIT
41621 ? default_external_stack_protect_fail ()
41622 : default_hidden_stack_protect_fail ();
41623 }
41624
41625 /* Select a format to encode pointers in exception handling data. CODE
41626 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
41627 true if the symbol may be affected by dynamic relocations.
41628
41629 ??? All x86 object file formats are capable of representing this.
41630 After all, the relocation needed is the same as for the call insn.
41631 Whether or not a particular assembler allows us to enter such, I
41632 guess we'll have to see. */
41633 int
41634 asm_preferred_eh_data_format (int code, int global)
41635 {
41636 if (flag_pic)
41637 {
41638 int type = DW_EH_PE_sdata8;
41639 if (!TARGET_64BIT
41640 || ix86_cmodel == CM_SMALL_PIC
41641 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
41642 type = DW_EH_PE_sdata4;
41643 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
41644 }
41645 if (ix86_cmodel == CM_SMALL
41646 || (ix86_cmodel == CM_MEDIUM && code))
41647 return DW_EH_PE_udata4;
41648 return DW_EH_PE_absptr;
41649 }
41650 \f
41651 /* Expand copysign from SIGN to the positive value ABS_VALUE
41652 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
41653 the sign-bit. */
41654 static void
41655 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
41656 {
41657 enum machine_mode mode = GET_MODE (sign);
41658 rtx sgn = gen_reg_rtx (mode);
41659 if (mask == NULL_RTX)
41660 {
41661 enum machine_mode vmode;
41662
41663 if (mode == SFmode)
41664 vmode = V4SFmode;
41665 else if (mode == DFmode)
41666 vmode = V2DFmode;
41667 else
41668 vmode = mode;
41669
41670 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
41671 if (!VECTOR_MODE_P (mode))
41672 {
41673 /* We need to generate a scalar mode mask in this case. */
41674 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41675 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41676 mask = gen_reg_rtx (mode);
41677 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41678 }
41679 }
41680 else
41681 mask = gen_rtx_NOT (mode, mask);
41682 emit_insn (gen_rtx_SET (VOIDmode, sgn,
41683 gen_rtx_AND (mode, mask, sign)));
41684 emit_insn (gen_rtx_SET (VOIDmode, result,
41685 gen_rtx_IOR (mode, abs_value, sgn)));
41686 }
41687
41688 /* Expand fabs (OP0) and return a new rtx that holds the result. The
41689 mask for masking out the sign-bit is stored in *SMASK, if that is
41690 non-null. */
41691 static rtx
41692 ix86_expand_sse_fabs (rtx op0, rtx *smask)
41693 {
41694 enum machine_mode vmode, mode = GET_MODE (op0);
41695 rtx xa, mask;
41696
41697 xa = gen_reg_rtx (mode);
41698 if (mode == SFmode)
41699 vmode = V4SFmode;
41700 else if (mode == DFmode)
41701 vmode = V2DFmode;
41702 else
41703 vmode = mode;
41704 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
41705 if (!VECTOR_MODE_P (mode))
41706 {
41707 /* We need to generate a scalar mode mask in this case. */
41708 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41709 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41710 mask = gen_reg_rtx (mode);
41711 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41712 }
41713 emit_insn (gen_rtx_SET (VOIDmode, xa,
41714 gen_rtx_AND (mode, op0, mask)));
41715
41716 if (smask)
41717 *smask = mask;
41718
41719 return xa;
41720 }
41721
41722 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
41723 swapping the operands if SWAP_OPERANDS is true. The expanded
41724 code is a forward jump to a newly created label in case the
41725 comparison is true. The generated label rtx is returned. */
41726 static rtx
41727 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
41728 bool swap_operands)
41729 {
41730 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
41731 rtx label, tmp;
41732
41733 if (swap_operands)
41734 {
41735 tmp = op0;
41736 op0 = op1;
41737 op1 = tmp;
41738 }
41739
41740 label = gen_label_rtx ();
41741 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
41742 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41743 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
41744 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
41745 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
41746 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
41747 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41748 JUMP_LABEL (tmp) = label;
41749
41750 return label;
41751 }
41752
41753 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
41754 using comparison code CODE. Operands are swapped for the comparison if
41755 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
41756 static rtx
41757 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
41758 bool swap_operands)
41759 {
41760 rtx (*insn)(rtx, rtx, rtx, rtx);
41761 enum machine_mode mode = GET_MODE (op0);
41762 rtx mask = gen_reg_rtx (mode);
41763
41764 if (swap_operands)
41765 {
41766 rtx tmp = op0;
41767 op0 = op1;
41768 op1 = tmp;
41769 }
41770
41771 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
41772
41773 emit_insn (insn (mask, op0, op1,
41774 gen_rtx_fmt_ee (code, mode, op0, op1)));
41775 return mask;
41776 }
41777
41778 /* Generate and return a rtx of mode MODE for 2**n where n is the number
41779 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
41780 static rtx
41781 ix86_gen_TWO52 (enum machine_mode mode)
41782 {
41783 REAL_VALUE_TYPE TWO52r;
41784 rtx TWO52;
41785
41786 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
41787 TWO52 = const_double_from_real_value (TWO52r, mode);
41788 TWO52 = force_reg (mode, TWO52);
41789
41790 return TWO52;
41791 }
41792
41793 /* Expand SSE sequence for computing lround from OP1 storing
41794 into OP0. */
41795 void
41796 ix86_expand_lround (rtx op0, rtx op1)
41797 {
41798 /* C code for the stuff we're doing below:
41799 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
41800 return (long)tmp;
41801 */
41802 enum machine_mode mode = GET_MODE (op1);
41803 const struct real_format *fmt;
41804 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
41805 rtx adj;
41806
41807 /* load nextafter (0.5, 0.0) */
41808 fmt = REAL_MODE_FORMAT (mode);
41809 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
41810 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
41811
41812 /* adj = copysign (0.5, op1) */
41813 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
41814 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
41815
41816 /* adj = op1 + adj */
41817 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
41818
41819 /* op0 = (imode)adj */
41820 expand_fix (op0, adj, 0);
41821 }
41822
41823 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
41824 into OPERAND0. */
41825 void
41826 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
41827 {
41828 /* C code for the stuff we're doing below (for do_floor):
41829 xi = (long)op1;
41830 xi -= (double)xi > op1 ? 1 : 0;
41831 return xi;
41832 */
41833 enum machine_mode fmode = GET_MODE (op1);
41834 enum machine_mode imode = GET_MODE (op0);
41835 rtx ireg, freg, label, tmp;
41836
41837 /* reg = (long)op1 */
41838 ireg = gen_reg_rtx (imode);
41839 expand_fix (ireg, op1, 0);
41840
41841 /* freg = (double)reg */
41842 freg = gen_reg_rtx (fmode);
41843 expand_float (freg, ireg, 0);
41844
41845 /* ireg = (freg > op1) ? ireg - 1 : ireg */
41846 label = ix86_expand_sse_compare_and_jump (UNLE,
41847 freg, op1, !do_floor);
41848 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
41849 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
41850 emit_move_insn (ireg, tmp);
41851
41852 emit_label (label);
41853 LABEL_NUSES (label) = 1;
41854
41855 emit_move_insn (op0, ireg);
41856 }
41857
41858 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
41859 result in OPERAND0. */
41860 void
41861 ix86_expand_rint (rtx operand0, rtx operand1)
41862 {
41863 /* C code for the stuff we're doing below:
41864 xa = fabs (operand1);
41865 if (!isless (xa, 2**52))
41866 return operand1;
41867 xa = xa + 2**52 - 2**52;
41868 return copysign (xa, operand1);
41869 */
41870 enum machine_mode mode = GET_MODE (operand0);
41871 rtx res, xa, label, TWO52, mask;
41872
41873 res = gen_reg_rtx (mode);
41874 emit_move_insn (res, operand1);
41875
41876 /* xa = abs (operand1) */
41877 xa = ix86_expand_sse_fabs (res, &mask);
41878
41879 /* if (!isless (xa, TWO52)) goto label; */
41880 TWO52 = ix86_gen_TWO52 (mode);
41881 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41882
41883 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
41884 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
41885
41886 ix86_sse_copysign_to_positive (res, xa, res, mask);
41887
41888 emit_label (label);
41889 LABEL_NUSES (label) = 1;
41890
41891 emit_move_insn (operand0, res);
41892 }
41893
41894 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
41895 into OPERAND0. */
41896 void
41897 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
41898 {
41899 /* C code for the stuff we expand below.
41900 double xa = fabs (x), x2;
41901 if (!isless (xa, TWO52))
41902 return x;
41903 xa = xa + TWO52 - TWO52;
41904 x2 = copysign (xa, x);
41905 Compensate. Floor:
41906 if (x2 > x)
41907 x2 -= 1;
41908 Compensate. Ceil:
41909 if (x2 < x)
41910 x2 -= -1;
41911 return x2;
41912 */
41913 enum machine_mode mode = GET_MODE (operand0);
41914 rtx xa, TWO52, tmp, label, one, res, mask;
41915
41916 TWO52 = ix86_gen_TWO52 (mode);
41917
41918 /* Temporary for holding the result, initialized to the input
41919 operand to ease control flow. */
41920 res = gen_reg_rtx (mode);
41921 emit_move_insn (res, operand1);
41922
41923 /* xa = abs (operand1) */
41924 xa = ix86_expand_sse_fabs (res, &mask);
41925
41926 /* if (!isless (xa, TWO52)) goto label; */
41927 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41928
41929 /* xa = xa + TWO52 - TWO52; */
41930 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
41931 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
41932
41933 /* xa = copysign (xa, operand1) */
41934 ix86_sse_copysign_to_positive (xa, xa, res, mask);
41935
41936 /* generate 1.0 or -1.0 */
41937 one = force_reg (mode,
41938 const_double_from_real_value (do_floor
41939 ? dconst1 : dconstm1, mode));
41940
41941 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
41942 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
41943 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41944 gen_rtx_AND (mode, one, tmp)));
41945 /* We always need to subtract here to preserve signed zero. */
41946 tmp = expand_simple_binop (mode, MINUS,
41947 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
41948 emit_move_insn (res, tmp);
41949
41950 emit_label (label);
41951 LABEL_NUSES (label) = 1;
41952
41953 emit_move_insn (operand0, res);
41954 }
41955
41956 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
41957 into OPERAND0. */
41958 void
41959 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
41960 {
41961 /* C code for the stuff we expand below.
41962 double xa = fabs (x), x2;
41963 if (!isless (xa, TWO52))
41964 return x;
41965 x2 = (double)(long)x;
41966 Compensate. Floor:
41967 if (x2 > x)
41968 x2 -= 1;
41969 Compensate. Ceil:
41970 if (x2 < x)
41971 x2 += 1;
41972 if (HONOR_SIGNED_ZEROS (mode))
41973 return copysign (x2, x);
41974 return x2;
41975 */
41976 enum machine_mode mode = GET_MODE (operand0);
41977 rtx xa, xi, TWO52, tmp, label, one, res, mask;
41978
41979 TWO52 = ix86_gen_TWO52 (mode);
41980
41981 /* Temporary for holding the result, initialized to the input
41982 operand to ease control flow. */
41983 res = gen_reg_rtx (mode);
41984 emit_move_insn (res, operand1);
41985
41986 /* xa = abs (operand1) */
41987 xa = ix86_expand_sse_fabs (res, &mask);
41988
41989 /* if (!isless (xa, TWO52)) goto label; */
41990 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41991
41992 /* xa = (double)(long)x */
41993 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
41994 expand_fix (xi, res, 0);
41995 expand_float (xa, xi, 0);
41996
41997 /* generate 1.0 */
41998 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
41999
42000 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
42001 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
42002 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42003 gen_rtx_AND (mode, one, tmp)));
42004 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
42005 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42006 emit_move_insn (res, tmp);
42007
42008 if (HONOR_SIGNED_ZEROS (mode))
42009 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42010
42011 emit_label (label);
42012 LABEL_NUSES (label) = 1;
42013
42014 emit_move_insn (operand0, res);
42015 }
42016
42017 /* Expand SSE sequence for computing round from OPERAND1 storing
42018 into OPERAND0. Sequence that works without relying on DImode truncation
42019 via cvttsd2siq that is only available on 64bit targets. */
42020 void
42021 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
42022 {
42023 /* C code for the stuff we expand below.
42024 double xa = fabs (x), xa2, x2;
42025 if (!isless (xa, TWO52))
42026 return x;
42027 Using the absolute value and copying back sign makes
42028 -0.0 -> -0.0 correct.
42029 xa2 = xa + TWO52 - TWO52;
42030 Compensate.
42031 dxa = xa2 - xa;
42032 if (dxa <= -0.5)
42033 xa2 += 1;
42034 else if (dxa > 0.5)
42035 xa2 -= 1;
42036 x2 = copysign (xa2, x);
42037 return x2;
42038 */
42039 enum machine_mode mode = GET_MODE (operand0);
42040 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
42041
42042 TWO52 = ix86_gen_TWO52 (mode);
42043
42044 /* Temporary for holding the result, initialized to the input
42045 operand to ease control flow. */
42046 res = gen_reg_rtx (mode);
42047 emit_move_insn (res, operand1);
42048
42049 /* xa = abs (operand1) */
42050 xa = ix86_expand_sse_fabs (res, &mask);
42051
42052 /* if (!isless (xa, TWO52)) goto label; */
42053 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42054
42055 /* xa2 = xa + TWO52 - TWO52; */
42056 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42057 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
42058
42059 /* dxa = xa2 - xa; */
42060 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
42061
42062 /* generate 0.5, 1.0 and -0.5 */
42063 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
42064 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
42065 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
42066 0, OPTAB_DIRECT);
42067
42068 /* Compensate. */
42069 tmp = gen_reg_rtx (mode);
42070 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
42071 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
42072 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42073 gen_rtx_AND (mode, one, tmp)));
42074 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42075 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
42076 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
42077 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42078 gen_rtx_AND (mode, one, tmp)));
42079 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42080
42081 /* res = copysign (xa2, operand1) */
42082 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
42083
42084 emit_label (label);
42085 LABEL_NUSES (label) = 1;
42086
42087 emit_move_insn (operand0, res);
42088 }
42089
42090 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42091 into OPERAND0. */
42092 void
42093 ix86_expand_trunc (rtx operand0, rtx operand1)
42094 {
42095 /* C code for SSE variant we expand below.
42096 double xa = fabs (x), x2;
42097 if (!isless (xa, TWO52))
42098 return x;
42099 x2 = (double)(long)x;
42100 if (HONOR_SIGNED_ZEROS (mode))
42101 return copysign (x2, x);
42102 return x2;
42103 */
42104 enum machine_mode mode = GET_MODE (operand0);
42105 rtx xa, xi, TWO52, label, res, mask;
42106
42107 TWO52 = ix86_gen_TWO52 (mode);
42108
42109 /* Temporary for holding the result, initialized to the input
42110 operand to ease control flow. */
42111 res = gen_reg_rtx (mode);
42112 emit_move_insn (res, operand1);
42113
42114 /* xa = abs (operand1) */
42115 xa = ix86_expand_sse_fabs (res, &mask);
42116
42117 /* if (!isless (xa, TWO52)) goto label; */
42118 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42119
42120 /* x = (double)(long)x */
42121 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42122 expand_fix (xi, res, 0);
42123 expand_float (res, xi, 0);
42124
42125 if (HONOR_SIGNED_ZEROS (mode))
42126 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42127
42128 emit_label (label);
42129 LABEL_NUSES (label) = 1;
42130
42131 emit_move_insn (operand0, res);
42132 }
42133
42134 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42135 into OPERAND0. */
42136 void
42137 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
42138 {
42139 enum machine_mode mode = GET_MODE (operand0);
42140 rtx xa, mask, TWO52, label, one, res, smask, tmp;
42141
42142 /* C code for SSE variant we expand below.
42143 double xa = fabs (x), x2;
42144 if (!isless (xa, TWO52))
42145 return x;
42146 xa2 = xa + TWO52 - TWO52;
42147 Compensate:
42148 if (xa2 > xa)
42149 xa2 -= 1.0;
42150 x2 = copysign (xa2, x);
42151 return x2;
42152 */
42153
42154 TWO52 = ix86_gen_TWO52 (mode);
42155
42156 /* Temporary for holding the result, initialized to the input
42157 operand to ease control flow. */
42158 res = gen_reg_rtx (mode);
42159 emit_move_insn (res, operand1);
42160
42161 /* xa = abs (operand1) */
42162 xa = ix86_expand_sse_fabs (res, &smask);
42163
42164 /* if (!isless (xa, TWO52)) goto label; */
42165 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42166
42167 /* res = xa + TWO52 - TWO52; */
42168 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42169 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
42170 emit_move_insn (res, tmp);
42171
42172 /* generate 1.0 */
42173 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42174
42175 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
42176 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
42177 emit_insn (gen_rtx_SET (VOIDmode, mask,
42178 gen_rtx_AND (mode, mask, one)));
42179 tmp = expand_simple_binop (mode, MINUS,
42180 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
42181 emit_move_insn (res, tmp);
42182
42183 /* res = copysign (res, operand1) */
42184 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
42185
42186 emit_label (label);
42187 LABEL_NUSES (label) = 1;
42188
42189 emit_move_insn (operand0, res);
42190 }
42191
42192 /* Expand SSE sequence for computing round from OPERAND1 storing
42193 into OPERAND0. */
42194 void
42195 ix86_expand_round (rtx operand0, rtx operand1)
42196 {
42197 /* C code for the stuff we're doing below:
42198 double xa = fabs (x);
42199 if (!isless (xa, TWO52))
42200 return x;
42201 xa = (double)(long)(xa + nextafter (0.5, 0.0));
42202 return copysign (xa, x);
42203 */
42204 enum machine_mode mode = GET_MODE (operand0);
42205 rtx res, TWO52, xa, label, xi, half, mask;
42206 const struct real_format *fmt;
42207 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42208
42209 /* Temporary for holding the result, initialized to the input
42210 operand to ease control flow. */
42211 res = gen_reg_rtx (mode);
42212 emit_move_insn (res, operand1);
42213
42214 TWO52 = ix86_gen_TWO52 (mode);
42215 xa = ix86_expand_sse_fabs (res, &mask);
42216 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42217
42218 /* load nextafter (0.5, 0.0) */
42219 fmt = REAL_MODE_FORMAT (mode);
42220 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42221 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42222
42223 /* xa = xa + 0.5 */
42224 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
42225 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
42226
42227 /* xa = (double)(int64_t)xa */
42228 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42229 expand_fix (xi, xa, 0);
42230 expand_float (xa, xi, 0);
42231
42232 /* res = copysign (xa, operand1) */
42233 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
42234
42235 emit_label (label);
42236 LABEL_NUSES (label) = 1;
42237
42238 emit_move_insn (operand0, res);
42239 }
42240
42241 /* Expand SSE sequence for computing round
42242 from OP1 storing into OP0 using sse4 round insn. */
42243 void
42244 ix86_expand_round_sse4 (rtx op0, rtx op1)
42245 {
42246 enum machine_mode mode = GET_MODE (op0);
42247 rtx e1, e2, res, half;
42248 const struct real_format *fmt;
42249 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42250 rtx (*gen_copysign) (rtx, rtx, rtx);
42251 rtx (*gen_round) (rtx, rtx, rtx);
42252
42253 switch (mode)
42254 {
42255 case SFmode:
42256 gen_copysign = gen_copysignsf3;
42257 gen_round = gen_sse4_1_roundsf2;
42258 break;
42259 case DFmode:
42260 gen_copysign = gen_copysigndf3;
42261 gen_round = gen_sse4_1_rounddf2;
42262 break;
42263 default:
42264 gcc_unreachable ();
42265 }
42266
42267 /* round (a) = trunc (a + copysign (0.5, a)) */
42268
42269 /* load nextafter (0.5, 0.0) */
42270 fmt = REAL_MODE_FORMAT (mode);
42271 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42272 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42273 half = const_double_from_real_value (pred_half, mode);
42274
42275 /* e1 = copysign (0.5, op1) */
42276 e1 = gen_reg_rtx (mode);
42277 emit_insn (gen_copysign (e1, half, op1));
42278
42279 /* e2 = op1 + e1 */
42280 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
42281
42282 /* res = trunc (e2) */
42283 res = gen_reg_rtx (mode);
42284 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
42285
42286 emit_move_insn (op0, res);
42287 }
42288 \f
42289
42290 /* Table of valid machine attributes. */
42291 static const struct attribute_spec ix86_attribute_table[] =
42292 {
42293 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
42294 affects_type_identity } */
42295 /* Stdcall attribute says callee is responsible for popping arguments
42296 if they are not variable. */
42297 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42298 true },
42299 /* Fastcall attribute says callee is responsible for popping arguments
42300 if they are not variable. */
42301 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42302 true },
42303 /* Thiscall attribute says callee is responsible for popping arguments
42304 if they are not variable. */
42305 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42306 true },
42307 /* Cdecl attribute says the callee is a normal C declaration */
42308 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42309 true },
42310 /* Regparm attribute specifies how many integer arguments are to be
42311 passed in registers. */
42312 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
42313 true },
42314 /* Sseregparm attribute says we are using x86_64 calling conventions
42315 for FP arguments. */
42316 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42317 true },
42318 /* The transactional memory builtins are implicitly regparm or fastcall
42319 depending on the ABI. Override the generic do-nothing attribute that
42320 these builtins were declared with. */
42321 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
42322 true },
42323 /* force_align_arg_pointer says this function realigns the stack at entry. */
42324 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
42325 false, true, true, ix86_handle_cconv_attribute, false },
42326 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42327 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
42328 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
42329 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
42330 false },
42331 #endif
42332 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42333 false },
42334 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42335 false },
42336 #ifdef SUBTARGET_ATTRIBUTE_TABLE
42337 SUBTARGET_ATTRIBUTE_TABLE,
42338 #endif
42339 /* ms_abi and sysv_abi calling convention function attributes. */
42340 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42341 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42342 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
42343 false },
42344 { "callee_pop_aggregate_return", 1, 1, false, true, true,
42345 ix86_handle_callee_pop_aggregate_return, true },
42346 /* End element. */
42347 { NULL, 0, 0, false, false, false, NULL, false }
42348 };
42349
42350 /* Implement targetm.vectorize.builtin_vectorization_cost. */
42351 static int
42352 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
42353 tree vectype,
42354 int misalign ATTRIBUTE_UNUSED)
42355 {
42356 unsigned elements;
42357
42358 switch (type_of_cost)
42359 {
42360 case scalar_stmt:
42361 return ix86_cost->scalar_stmt_cost;
42362
42363 case scalar_load:
42364 return ix86_cost->scalar_load_cost;
42365
42366 case scalar_store:
42367 return ix86_cost->scalar_store_cost;
42368
42369 case vector_stmt:
42370 return ix86_cost->vec_stmt_cost;
42371
42372 case vector_load:
42373 return ix86_cost->vec_align_load_cost;
42374
42375 case vector_store:
42376 return ix86_cost->vec_store_cost;
42377
42378 case vec_to_scalar:
42379 return ix86_cost->vec_to_scalar_cost;
42380
42381 case scalar_to_vec:
42382 return ix86_cost->scalar_to_vec_cost;
42383
42384 case unaligned_load:
42385 case unaligned_store:
42386 return ix86_cost->vec_unalign_load_cost;
42387
42388 case cond_branch_taken:
42389 return ix86_cost->cond_taken_branch_cost;
42390
42391 case cond_branch_not_taken:
42392 return ix86_cost->cond_not_taken_branch_cost;
42393
42394 case vec_perm:
42395 case vec_promote_demote:
42396 return ix86_cost->vec_stmt_cost;
42397
42398 case vec_construct:
42399 elements = TYPE_VECTOR_SUBPARTS (vectype);
42400 return elements / 2 + 1;
42401
42402 default:
42403 gcc_unreachable ();
42404 }
42405 }
42406
42407 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
42408 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
42409 insn every time. */
42410
42411 static GTY(()) rtx vselect_insn;
42412
42413 /* Initialize vselect_insn. */
42414
42415 static void
42416 init_vselect_insn (void)
42417 {
42418 unsigned i;
42419 rtx x;
42420
42421 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
42422 for (i = 0; i < MAX_VECT_LEN; ++i)
42423 XVECEXP (x, 0, i) = const0_rtx;
42424 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
42425 const0_rtx), x);
42426 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
42427 start_sequence ();
42428 vselect_insn = emit_insn (x);
42429 end_sequence ();
42430 }
42431
42432 /* Construct (set target (vec_select op0 (parallel perm))) and
42433 return true if that's a valid instruction in the active ISA. */
42434
42435 static bool
42436 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
42437 unsigned nelt, bool testing_p)
42438 {
42439 unsigned int i;
42440 rtx x, save_vconcat;
42441 int icode;
42442
42443 if (vselect_insn == NULL_RTX)
42444 init_vselect_insn ();
42445
42446 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
42447 PUT_NUM_ELEM (XVEC (x, 0), nelt);
42448 for (i = 0; i < nelt; ++i)
42449 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
42450 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42451 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
42452 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
42453 SET_DEST (PATTERN (vselect_insn)) = target;
42454 icode = recog_memoized (vselect_insn);
42455
42456 if (icode >= 0 && !testing_p)
42457 emit_insn (copy_rtx (PATTERN (vselect_insn)));
42458
42459 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
42460 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
42461 INSN_CODE (vselect_insn) = -1;
42462
42463 return icode >= 0;
42464 }
42465
42466 /* Similar, but generate a vec_concat from op0 and op1 as well. */
42467
42468 static bool
42469 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
42470 const unsigned char *perm, unsigned nelt,
42471 bool testing_p)
42472 {
42473 enum machine_mode v2mode;
42474 rtx x;
42475 bool ok;
42476
42477 if (vselect_insn == NULL_RTX)
42478 init_vselect_insn ();
42479
42480 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
42481 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42482 PUT_MODE (x, v2mode);
42483 XEXP (x, 0) = op0;
42484 XEXP (x, 1) = op1;
42485 ok = expand_vselect (target, x, perm, nelt, testing_p);
42486 XEXP (x, 0) = const0_rtx;
42487 XEXP (x, 1) = const0_rtx;
42488 return ok;
42489 }
42490
42491 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42492 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
42493
42494 static bool
42495 expand_vec_perm_blend (struct expand_vec_perm_d *d)
42496 {
42497 enum machine_mode vmode = d->vmode;
42498 unsigned i, mask, nelt = d->nelt;
42499 rtx target, op0, op1, x;
42500 rtx rperm[32], vperm;
42501
42502 if (d->one_operand_p)
42503 return false;
42504 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
42505 ;
42506 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
42507 ;
42508 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
42509 ;
42510 else
42511 return false;
42512
42513 /* This is a blend, not a permute. Elements must stay in their
42514 respective lanes. */
42515 for (i = 0; i < nelt; ++i)
42516 {
42517 unsigned e = d->perm[i];
42518 if (!(e == i || e == i + nelt))
42519 return false;
42520 }
42521
42522 if (d->testing_p)
42523 return true;
42524
42525 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
42526 decision should be extracted elsewhere, so that we only try that
42527 sequence once all budget==3 options have been tried. */
42528 target = d->target;
42529 op0 = d->op0;
42530 op1 = d->op1;
42531 mask = 0;
42532
42533 switch (vmode)
42534 {
42535 case V4DFmode:
42536 case V8SFmode:
42537 case V2DFmode:
42538 case V4SFmode:
42539 case V8HImode:
42540 case V8SImode:
42541 for (i = 0; i < nelt; ++i)
42542 mask |= (d->perm[i] >= nelt) << i;
42543 break;
42544
42545 case V2DImode:
42546 for (i = 0; i < 2; ++i)
42547 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
42548 vmode = V8HImode;
42549 goto do_subreg;
42550
42551 case V4SImode:
42552 for (i = 0; i < 4; ++i)
42553 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42554 vmode = V8HImode;
42555 goto do_subreg;
42556
42557 case V16QImode:
42558 /* See if bytes move in pairs so we can use pblendw with
42559 an immediate argument, rather than pblendvb with a vector
42560 argument. */
42561 for (i = 0; i < 16; i += 2)
42562 if (d->perm[i] + 1 != d->perm[i + 1])
42563 {
42564 use_pblendvb:
42565 for (i = 0; i < nelt; ++i)
42566 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
42567
42568 finish_pblendvb:
42569 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
42570 vperm = force_reg (vmode, vperm);
42571
42572 if (GET_MODE_SIZE (vmode) == 16)
42573 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
42574 else
42575 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
42576 if (target != d->target)
42577 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42578 return true;
42579 }
42580
42581 for (i = 0; i < 8; ++i)
42582 mask |= (d->perm[i * 2] >= 16) << i;
42583 vmode = V8HImode;
42584 /* FALLTHRU */
42585
42586 do_subreg:
42587 target = gen_reg_rtx (vmode);
42588 op0 = gen_lowpart (vmode, op0);
42589 op1 = gen_lowpart (vmode, op1);
42590 break;
42591
42592 case V32QImode:
42593 /* See if bytes move in pairs. If not, vpblendvb must be used. */
42594 for (i = 0; i < 32; i += 2)
42595 if (d->perm[i] + 1 != d->perm[i + 1])
42596 goto use_pblendvb;
42597 /* See if bytes move in quadruplets. If yes, vpblendd
42598 with immediate can be used. */
42599 for (i = 0; i < 32; i += 4)
42600 if (d->perm[i] + 2 != d->perm[i + 2])
42601 break;
42602 if (i < 32)
42603 {
42604 /* See if bytes move the same in both lanes. If yes,
42605 vpblendw with immediate can be used. */
42606 for (i = 0; i < 16; i += 2)
42607 if (d->perm[i] + 16 != d->perm[i + 16])
42608 goto use_pblendvb;
42609
42610 /* Use vpblendw. */
42611 for (i = 0; i < 16; ++i)
42612 mask |= (d->perm[i * 2] >= 32) << i;
42613 vmode = V16HImode;
42614 goto do_subreg;
42615 }
42616
42617 /* Use vpblendd. */
42618 for (i = 0; i < 8; ++i)
42619 mask |= (d->perm[i * 4] >= 32) << i;
42620 vmode = V8SImode;
42621 goto do_subreg;
42622
42623 case V16HImode:
42624 /* See if words move in pairs. If yes, vpblendd can be used. */
42625 for (i = 0; i < 16; i += 2)
42626 if (d->perm[i] + 1 != d->perm[i + 1])
42627 break;
42628 if (i < 16)
42629 {
42630 /* See if words move the same in both lanes. If not,
42631 vpblendvb must be used. */
42632 for (i = 0; i < 8; i++)
42633 if (d->perm[i] + 8 != d->perm[i + 8])
42634 {
42635 /* Use vpblendvb. */
42636 for (i = 0; i < 32; ++i)
42637 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
42638
42639 vmode = V32QImode;
42640 nelt = 32;
42641 target = gen_reg_rtx (vmode);
42642 op0 = gen_lowpart (vmode, op0);
42643 op1 = gen_lowpart (vmode, op1);
42644 goto finish_pblendvb;
42645 }
42646
42647 /* Use vpblendw. */
42648 for (i = 0; i < 16; ++i)
42649 mask |= (d->perm[i] >= 16) << i;
42650 break;
42651 }
42652
42653 /* Use vpblendd. */
42654 for (i = 0; i < 8; ++i)
42655 mask |= (d->perm[i * 2] >= 16) << i;
42656 vmode = V8SImode;
42657 goto do_subreg;
42658
42659 case V4DImode:
42660 /* Use vpblendd. */
42661 for (i = 0; i < 4; ++i)
42662 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42663 vmode = V8SImode;
42664 goto do_subreg;
42665
42666 default:
42667 gcc_unreachable ();
42668 }
42669
42670 /* This matches five different patterns with the different modes. */
42671 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
42672 x = gen_rtx_SET (VOIDmode, target, x);
42673 emit_insn (x);
42674 if (target != d->target)
42675 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42676
42677 return true;
42678 }
42679
42680 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42681 in terms of the variable form of vpermilps.
42682
42683 Note that we will have already failed the immediate input vpermilps,
42684 which requires that the high and low part shuffle be identical; the
42685 variable form doesn't require that. */
42686
42687 static bool
42688 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
42689 {
42690 rtx rperm[8], vperm;
42691 unsigned i;
42692
42693 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
42694 return false;
42695
42696 /* We can only permute within the 128-bit lane. */
42697 for (i = 0; i < 8; ++i)
42698 {
42699 unsigned e = d->perm[i];
42700 if (i < 4 ? e >= 4 : e < 4)
42701 return false;
42702 }
42703
42704 if (d->testing_p)
42705 return true;
42706
42707 for (i = 0; i < 8; ++i)
42708 {
42709 unsigned e = d->perm[i];
42710
42711 /* Within each 128-bit lane, the elements of op0 are numbered
42712 from 0 and the elements of op1 are numbered from 4. */
42713 if (e >= 8 + 4)
42714 e -= 8;
42715 else if (e >= 4)
42716 e -= 4;
42717
42718 rperm[i] = GEN_INT (e);
42719 }
42720
42721 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
42722 vperm = force_reg (V8SImode, vperm);
42723 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
42724
42725 return true;
42726 }
42727
42728 /* Return true if permutation D can be performed as VMODE permutation
42729 instead. */
42730
42731 static bool
42732 valid_perm_using_mode_p (enum machine_mode vmode, struct expand_vec_perm_d *d)
42733 {
42734 unsigned int i, j, chunk;
42735
42736 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
42737 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
42738 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
42739 return false;
42740
42741 if (GET_MODE_NUNITS (vmode) >= d->nelt)
42742 return true;
42743
42744 chunk = d->nelt / GET_MODE_NUNITS (vmode);
42745 for (i = 0; i < d->nelt; i += chunk)
42746 if (d->perm[i] & (chunk - 1))
42747 return false;
42748 else
42749 for (j = 1; j < chunk; ++j)
42750 if (d->perm[i] + j != d->perm[i + j])
42751 return false;
42752
42753 return true;
42754 }
42755
42756 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42757 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
42758
42759 static bool
42760 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
42761 {
42762 unsigned i, nelt, eltsz, mask;
42763 unsigned char perm[32];
42764 enum machine_mode vmode = V16QImode;
42765 rtx rperm[32], vperm, target, op0, op1;
42766
42767 nelt = d->nelt;
42768
42769 if (!d->one_operand_p)
42770 {
42771 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
42772 {
42773 if (TARGET_AVX2
42774 && valid_perm_using_mode_p (V2TImode, d))
42775 {
42776 if (d->testing_p)
42777 return true;
42778
42779 /* Use vperm2i128 insn. The pattern uses
42780 V4DImode instead of V2TImode. */
42781 target = d->target;
42782 if (d->vmode != V4DImode)
42783 target = gen_reg_rtx (V4DImode);
42784 op0 = gen_lowpart (V4DImode, d->op0);
42785 op1 = gen_lowpart (V4DImode, d->op1);
42786 rperm[0]
42787 = GEN_INT (((d->perm[0] & (nelt / 2)) ? 1 : 0)
42788 || ((d->perm[nelt / 2] & (nelt / 2)) ? 2 : 0));
42789 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
42790 if (target != d->target)
42791 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42792 return true;
42793 }
42794 return false;
42795 }
42796 }
42797 else
42798 {
42799 if (GET_MODE_SIZE (d->vmode) == 16)
42800 {
42801 if (!TARGET_SSSE3)
42802 return false;
42803 }
42804 else if (GET_MODE_SIZE (d->vmode) == 32)
42805 {
42806 if (!TARGET_AVX2)
42807 return false;
42808
42809 /* V4DImode should be already handled through
42810 expand_vselect by vpermq instruction. */
42811 gcc_assert (d->vmode != V4DImode);
42812
42813 vmode = V32QImode;
42814 if (d->vmode == V8SImode
42815 || d->vmode == V16HImode
42816 || d->vmode == V32QImode)
42817 {
42818 /* First see if vpermq can be used for
42819 V8SImode/V16HImode/V32QImode. */
42820 if (valid_perm_using_mode_p (V4DImode, d))
42821 {
42822 for (i = 0; i < 4; i++)
42823 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
42824 if (d->testing_p)
42825 return true;
42826 target = gen_reg_rtx (V4DImode);
42827 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
42828 perm, 4, false))
42829 {
42830 emit_move_insn (d->target,
42831 gen_lowpart (d->vmode, target));
42832 return true;
42833 }
42834 return false;
42835 }
42836
42837 /* Next see if vpermd can be used. */
42838 if (valid_perm_using_mode_p (V8SImode, d))
42839 vmode = V8SImode;
42840 }
42841 /* Or if vpermps can be used. */
42842 else if (d->vmode == V8SFmode)
42843 vmode = V8SImode;
42844
42845 if (vmode == V32QImode)
42846 {
42847 /* vpshufb only works intra lanes, it is not
42848 possible to shuffle bytes in between the lanes. */
42849 for (i = 0; i < nelt; ++i)
42850 if ((d->perm[i] ^ i) & (nelt / 2))
42851 return false;
42852 }
42853 }
42854 else
42855 return false;
42856 }
42857
42858 if (d->testing_p)
42859 return true;
42860
42861 if (vmode == V8SImode)
42862 for (i = 0; i < 8; ++i)
42863 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
42864 else
42865 {
42866 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
42867 if (!d->one_operand_p)
42868 mask = 2 * nelt - 1;
42869 else if (vmode == V16QImode)
42870 mask = nelt - 1;
42871 else
42872 mask = nelt / 2 - 1;
42873
42874 for (i = 0; i < nelt; ++i)
42875 {
42876 unsigned j, e = d->perm[i] & mask;
42877 for (j = 0; j < eltsz; ++j)
42878 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
42879 }
42880 }
42881
42882 vperm = gen_rtx_CONST_VECTOR (vmode,
42883 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
42884 vperm = force_reg (vmode, vperm);
42885
42886 target = d->target;
42887 if (d->vmode != vmode)
42888 target = gen_reg_rtx (vmode);
42889 op0 = gen_lowpart (vmode, d->op0);
42890 if (d->one_operand_p)
42891 {
42892 if (vmode == V16QImode)
42893 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
42894 else if (vmode == V32QImode)
42895 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
42896 else if (vmode == V8SFmode)
42897 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
42898 else
42899 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
42900 }
42901 else
42902 {
42903 op1 = gen_lowpart (vmode, d->op1);
42904 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
42905 }
42906 if (target != d->target)
42907 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42908
42909 return true;
42910 }
42911
42912 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
42913 in a single instruction. */
42914
42915 static bool
42916 expand_vec_perm_1 (struct expand_vec_perm_d *d)
42917 {
42918 unsigned i, nelt = d->nelt;
42919 unsigned char perm2[MAX_VECT_LEN];
42920
42921 /* Check plain VEC_SELECT first, because AVX has instructions that could
42922 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
42923 input where SEL+CONCAT may not. */
42924 if (d->one_operand_p)
42925 {
42926 int mask = nelt - 1;
42927 bool identity_perm = true;
42928 bool broadcast_perm = true;
42929
42930 for (i = 0; i < nelt; i++)
42931 {
42932 perm2[i] = d->perm[i] & mask;
42933 if (perm2[i] != i)
42934 identity_perm = false;
42935 if (perm2[i])
42936 broadcast_perm = false;
42937 }
42938
42939 if (identity_perm)
42940 {
42941 if (!d->testing_p)
42942 emit_move_insn (d->target, d->op0);
42943 return true;
42944 }
42945 else if (broadcast_perm && TARGET_AVX2)
42946 {
42947 /* Use vpbroadcast{b,w,d}. */
42948 rtx (*gen) (rtx, rtx) = NULL;
42949 switch (d->vmode)
42950 {
42951 case V32QImode:
42952 gen = gen_avx2_pbroadcastv32qi_1;
42953 break;
42954 case V16HImode:
42955 gen = gen_avx2_pbroadcastv16hi_1;
42956 break;
42957 case V8SImode:
42958 gen = gen_avx2_pbroadcastv8si_1;
42959 break;
42960 case V16QImode:
42961 gen = gen_avx2_pbroadcastv16qi;
42962 break;
42963 case V8HImode:
42964 gen = gen_avx2_pbroadcastv8hi;
42965 break;
42966 case V8SFmode:
42967 gen = gen_avx2_vec_dupv8sf_1;
42968 break;
42969 /* For other modes prefer other shuffles this function creates. */
42970 default: break;
42971 }
42972 if (gen != NULL)
42973 {
42974 if (!d->testing_p)
42975 emit_insn (gen (d->target, d->op0));
42976 return true;
42977 }
42978 }
42979
42980 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
42981 return true;
42982
42983 /* There are plenty of patterns in sse.md that are written for
42984 SEL+CONCAT and are not replicated for a single op. Perhaps
42985 that should be changed, to avoid the nastiness here. */
42986
42987 /* Recognize interleave style patterns, which means incrementing
42988 every other permutation operand. */
42989 for (i = 0; i < nelt; i += 2)
42990 {
42991 perm2[i] = d->perm[i] & mask;
42992 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
42993 }
42994 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
42995 d->testing_p))
42996 return true;
42997
42998 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
42999 if (nelt >= 4)
43000 {
43001 for (i = 0; i < nelt; i += 4)
43002 {
43003 perm2[i + 0] = d->perm[i + 0] & mask;
43004 perm2[i + 1] = d->perm[i + 1] & mask;
43005 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
43006 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
43007 }
43008
43009 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
43010 d->testing_p))
43011 return true;
43012 }
43013 }
43014
43015 /* Finally, try the fully general two operand permute. */
43016 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
43017 d->testing_p))
43018 return true;
43019
43020 /* Recognize interleave style patterns with reversed operands. */
43021 if (!d->one_operand_p)
43022 {
43023 for (i = 0; i < nelt; ++i)
43024 {
43025 unsigned e = d->perm[i];
43026 if (e >= nelt)
43027 e -= nelt;
43028 else
43029 e += nelt;
43030 perm2[i] = e;
43031 }
43032
43033 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
43034 d->testing_p))
43035 return true;
43036 }
43037
43038 /* Try the SSE4.1 blend variable merge instructions. */
43039 if (expand_vec_perm_blend (d))
43040 return true;
43041
43042 /* Try one of the AVX vpermil variable permutations. */
43043 if (expand_vec_perm_vpermil (d))
43044 return true;
43045
43046 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
43047 vpshufb, vpermd, vpermps or vpermq variable permutation. */
43048 if (expand_vec_perm_pshufb (d))
43049 return true;
43050
43051 /* Try the AVX512F vpermi2 instructions. */
43052 rtx vec[64];
43053 enum machine_mode mode = d->vmode;
43054 if (mode == V8DFmode)
43055 mode = V8DImode;
43056 else if (mode == V16SFmode)
43057 mode = V16SImode;
43058 for (i = 0; i < nelt; ++i)
43059 vec[i] = GEN_INT (d->perm[i]);
43060 rtx mask = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt, vec));
43061 if (ix86_expand_vec_perm_vpermi2 (d->target, d->op0, mask, d->op1))
43062 return true;
43063
43064 return false;
43065 }
43066
43067 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
43068 in terms of a pair of pshuflw + pshufhw instructions. */
43069
43070 static bool
43071 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
43072 {
43073 unsigned char perm2[MAX_VECT_LEN];
43074 unsigned i;
43075 bool ok;
43076
43077 if (d->vmode != V8HImode || !d->one_operand_p)
43078 return false;
43079
43080 /* The two permutations only operate in 64-bit lanes. */
43081 for (i = 0; i < 4; ++i)
43082 if (d->perm[i] >= 4)
43083 return false;
43084 for (i = 4; i < 8; ++i)
43085 if (d->perm[i] < 4)
43086 return false;
43087
43088 if (d->testing_p)
43089 return true;
43090
43091 /* Emit the pshuflw. */
43092 memcpy (perm2, d->perm, 4);
43093 for (i = 4; i < 8; ++i)
43094 perm2[i] = i;
43095 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
43096 gcc_assert (ok);
43097
43098 /* Emit the pshufhw. */
43099 memcpy (perm2 + 4, d->perm + 4, 4);
43100 for (i = 0; i < 4; ++i)
43101 perm2[i] = i;
43102 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
43103 gcc_assert (ok);
43104
43105 return true;
43106 }
43107
43108 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43109 the permutation using the SSSE3 palignr instruction. This succeeds
43110 when all of the elements in PERM fit within one vector and we merely
43111 need to shift them down so that a single vector permutation has a
43112 chance to succeed. */
43113
43114 static bool
43115 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
43116 {
43117 unsigned i, nelt = d->nelt;
43118 unsigned min, max;
43119 bool in_order, ok;
43120 rtx shift, target;
43121 struct expand_vec_perm_d dcopy;
43122
43123 /* Even with AVX, palignr only operates on 128-bit vectors. */
43124 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
43125 return false;
43126
43127 min = nelt, max = 0;
43128 for (i = 0; i < nelt; ++i)
43129 {
43130 unsigned e = d->perm[i];
43131 if (e < min)
43132 min = e;
43133 if (e > max)
43134 max = e;
43135 }
43136 if (min == 0 || max - min >= nelt)
43137 return false;
43138
43139 /* Given that we have SSSE3, we know we'll be able to implement the
43140 single operand permutation after the palignr with pshufb. */
43141 if (d->testing_p)
43142 return true;
43143
43144 dcopy = *d;
43145 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
43146 target = gen_reg_rtx (TImode);
43147 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, d->op1),
43148 gen_lowpart (TImode, d->op0), shift));
43149
43150 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
43151 dcopy.one_operand_p = true;
43152
43153 in_order = true;
43154 for (i = 0; i < nelt; ++i)
43155 {
43156 unsigned e = dcopy.perm[i] - min;
43157 if (e != i)
43158 in_order = false;
43159 dcopy.perm[i] = e;
43160 }
43161
43162 /* Test for the degenerate case where the alignment by itself
43163 produces the desired permutation. */
43164 if (in_order)
43165 {
43166 emit_move_insn (d->target, dcopy.op0);
43167 return true;
43168 }
43169
43170 ok = expand_vec_perm_1 (&dcopy);
43171 gcc_assert (ok);
43172
43173 return ok;
43174 }
43175
43176 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
43177
43178 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43179 a two vector permutation into a single vector permutation by using
43180 an interleave operation to merge the vectors. */
43181
43182 static bool
43183 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
43184 {
43185 struct expand_vec_perm_d dremap, dfinal;
43186 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
43187 unsigned HOST_WIDE_INT contents;
43188 unsigned char remap[2 * MAX_VECT_LEN];
43189 rtx seq;
43190 bool ok, same_halves = false;
43191
43192 if (GET_MODE_SIZE (d->vmode) == 16)
43193 {
43194 if (d->one_operand_p)
43195 return false;
43196 }
43197 else if (GET_MODE_SIZE (d->vmode) == 32)
43198 {
43199 if (!TARGET_AVX)
43200 return false;
43201 /* For 32-byte modes allow even d->one_operand_p.
43202 The lack of cross-lane shuffling in some instructions
43203 might prevent a single insn shuffle. */
43204 dfinal = *d;
43205 dfinal.testing_p = true;
43206 /* If expand_vec_perm_interleave3 can expand this into
43207 a 3 insn sequence, give up and let it be expanded as
43208 3 insn sequence. While that is one insn longer,
43209 it doesn't need a memory operand and in the common
43210 case that both interleave low and high permutations
43211 with the same operands are adjacent needs 4 insns
43212 for both after CSE. */
43213 if (expand_vec_perm_interleave3 (&dfinal))
43214 return false;
43215 }
43216 else
43217 return false;
43218
43219 /* Examine from whence the elements come. */
43220 contents = 0;
43221 for (i = 0; i < nelt; ++i)
43222 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
43223
43224 memset (remap, 0xff, sizeof (remap));
43225 dremap = *d;
43226
43227 if (GET_MODE_SIZE (d->vmode) == 16)
43228 {
43229 unsigned HOST_WIDE_INT h1, h2, h3, h4;
43230
43231 /* Split the two input vectors into 4 halves. */
43232 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
43233 h2 = h1 << nelt2;
43234 h3 = h2 << nelt2;
43235 h4 = h3 << nelt2;
43236
43237 /* If the elements from the low halves use interleave low, and similarly
43238 for interleave high. If the elements are from mis-matched halves, we
43239 can use shufps for V4SF/V4SI or do a DImode shuffle. */
43240 if ((contents & (h1 | h3)) == contents)
43241 {
43242 /* punpckl* */
43243 for (i = 0; i < nelt2; ++i)
43244 {
43245 remap[i] = i * 2;
43246 remap[i + nelt] = i * 2 + 1;
43247 dremap.perm[i * 2] = i;
43248 dremap.perm[i * 2 + 1] = i + nelt;
43249 }
43250 if (!TARGET_SSE2 && d->vmode == V4SImode)
43251 dremap.vmode = V4SFmode;
43252 }
43253 else if ((contents & (h2 | h4)) == contents)
43254 {
43255 /* punpckh* */
43256 for (i = 0; i < nelt2; ++i)
43257 {
43258 remap[i + nelt2] = i * 2;
43259 remap[i + nelt + nelt2] = i * 2 + 1;
43260 dremap.perm[i * 2] = i + nelt2;
43261 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
43262 }
43263 if (!TARGET_SSE2 && d->vmode == V4SImode)
43264 dremap.vmode = V4SFmode;
43265 }
43266 else if ((contents & (h1 | h4)) == contents)
43267 {
43268 /* shufps */
43269 for (i = 0; i < nelt2; ++i)
43270 {
43271 remap[i] = i;
43272 remap[i + nelt + nelt2] = i + nelt2;
43273 dremap.perm[i] = i;
43274 dremap.perm[i + nelt2] = i + nelt + nelt2;
43275 }
43276 if (nelt != 4)
43277 {
43278 /* shufpd */
43279 dremap.vmode = V2DImode;
43280 dremap.nelt = 2;
43281 dremap.perm[0] = 0;
43282 dremap.perm[1] = 3;
43283 }
43284 }
43285 else if ((contents & (h2 | h3)) == contents)
43286 {
43287 /* shufps */
43288 for (i = 0; i < nelt2; ++i)
43289 {
43290 remap[i + nelt2] = i;
43291 remap[i + nelt] = i + nelt2;
43292 dremap.perm[i] = i + nelt2;
43293 dremap.perm[i + nelt2] = i + nelt;
43294 }
43295 if (nelt != 4)
43296 {
43297 /* shufpd */
43298 dremap.vmode = V2DImode;
43299 dremap.nelt = 2;
43300 dremap.perm[0] = 1;
43301 dremap.perm[1] = 2;
43302 }
43303 }
43304 else
43305 return false;
43306 }
43307 else
43308 {
43309 unsigned int nelt4 = nelt / 4, nzcnt = 0;
43310 unsigned HOST_WIDE_INT q[8];
43311 unsigned int nonzero_halves[4];
43312
43313 /* Split the two input vectors into 8 quarters. */
43314 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
43315 for (i = 1; i < 8; ++i)
43316 q[i] = q[0] << (nelt4 * i);
43317 for (i = 0; i < 4; ++i)
43318 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
43319 {
43320 nonzero_halves[nzcnt] = i;
43321 ++nzcnt;
43322 }
43323
43324 if (nzcnt == 1)
43325 {
43326 gcc_assert (d->one_operand_p);
43327 nonzero_halves[1] = nonzero_halves[0];
43328 same_halves = true;
43329 }
43330 else if (d->one_operand_p)
43331 {
43332 gcc_assert (nonzero_halves[0] == 0);
43333 gcc_assert (nonzero_halves[1] == 1);
43334 }
43335
43336 if (nzcnt <= 2)
43337 {
43338 if (d->perm[0] / nelt2 == nonzero_halves[1])
43339 {
43340 /* Attempt to increase the likelihood that dfinal
43341 shuffle will be intra-lane. */
43342 char tmph = nonzero_halves[0];
43343 nonzero_halves[0] = nonzero_halves[1];
43344 nonzero_halves[1] = tmph;
43345 }
43346
43347 /* vperm2f128 or vperm2i128. */
43348 for (i = 0; i < nelt2; ++i)
43349 {
43350 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
43351 remap[i + nonzero_halves[0] * nelt2] = i;
43352 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
43353 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
43354 }
43355
43356 if (d->vmode != V8SFmode
43357 && d->vmode != V4DFmode
43358 && d->vmode != V8SImode)
43359 {
43360 dremap.vmode = V8SImode;
43361 dremap.nelt = 8;
43362 for (i = 0; i < 4; ++i)
43363 {
43364 dremap.perm[i] = i + nonzero_halves[0] * 4;
43365 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
43366 }
43367 }
43368 }
43369 else if (d->one_operand_p)
43370 return false;
43371 else if (TARGET_AVX2
43372 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
43373 {
43374 /* vpunpckl* */
43375 for (i = 0; i < nelt4; ++i)
43376 {
43377 remap[i] = i * 2;
43378 remap[i + nelt] = i * 2 + 1;
43379 remap[i + nelt2] = i * 2 + nelt2;
43380 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
43381 dremap.perm[i * 2] = i;
43382 dremap.perm[i * 2 + 1] = i + nelt;
43383 dremap.perm[i * 2 + nelt2] = i + nelt2;
43384 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
43385 }
43386 }
43387 else if (TARGET_AVX2
43388 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
43389 {
43390 /* vpunpckh* */
43391 for (i = 0; i < nelt4; ++i)
43392 {
43393 remap[i + nelt4] = i * 2;
43394 remap[i + nelt + nelt4] = i * 2 + 1;
43395 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
43396 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
43397 dremap.perm[i * 2] = i + nelt4;
43398 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
43399 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
43400 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
43401 }
43402 }
43403 else
43404 return false;
43405 }
43406
43407 /* Use the remapping array set up above to move the elements from their
43408 swizzled locations into their final destinations. */
43409 dfinal = *d;
43410 for (i = 0; i < nelt; ++i)
43411 {
43412 unsigned e = remap[d->perm[i]];
43413 gcc_assert (e < nelt);
43414 /* If same_halves is true, both halves of the remapped vector are the
43415 same. Avoid cross-lane accesses if possible. */
43416 if (same_halves && i >= nelt2)
43417 {
43418 gcc_assert (e < nelt2);
43419 dfinal.perm[i] = e + nelt2;
43420 }
43421 else
43422 dfinal.perm[i] = e;
43423 }
43424 dremap.target = gen_reg_rtx (dremap.vmode);
43425 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43426 dfinal.op1 = dfinal.op0;
43427 dfinal.one_operand_p = true;
43428
43429 /* Test if the final remap can be done with a single insn. For V4SFmode or
43430 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
43431 start_sequence ();
43432 ok = expand_vec_perm_1 (&dfinal);
43433 seq = get_insns ();
43434 end_sequence ();
43435
43436 if (!ok)
43437 return false;
43438
43439 if (d->testing_p)
43440 return true;
43441
43442 if (dremap.vmode != dfinal.vmode)
43443 {
43444 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
43445 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
43446 }
43447
43448 ok = expand_vec_perm_1 (&dremap);
43449 gcc_assert (ok);
43450
43451 emit_insn (seq);
43452 return true;
43453 }
43454
43455 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43456 a single vector cross-lane permutation into vpermq followed
43457 by any of the single insn permutations. */
43458
43459 static bool
43460 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
43461 {
43462 struct expand_vec_perm_d dremap, dfinal;
43463 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
43464 unsigned contents[2];
43465 bool ok;
43466
43467 if (!(TARGET_AVX2
43468 && (d->vmode == V32QImode || d->vmode == V16HImode)
43469 && d->one_operand_p))
43470 return false;
43471
43472 contents[0] = 0;
43473 contents[1] = 0;
43474 for (i = 0; i < nelt2; ++i)
43475 {
43476 contents[0] |= 1u << (d->perm[i] / nelt4);
43477 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
43478 }
43479
43480 for (i = 0; i < 2; ++i)
43481 {
43482 unsigned int cnt = 0;
43483 for (j = 0; j < 4; ++j)
43484 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
43485 return false;
43486 }
43487
43488 if (d->testing_p)
43489 return true;
43490
43491 dremap = *d;
43492 dremap.vmode = V4DImode;
43493 dremap.nelt = 4;
43494 dremap.target = gen_reg_rtx (V4DImode);
43495 dremap.op0 = gen_lowpart (V4DImode, d->op0);
43496 dremap.op1 = dremap.op0;
43497 dremap.one_operand_p = true;
43498 for (i = 0; i < 2; ++i)
43499 {
43500 unsigned int cnt = 0;
43501 for (j = 0; j < 4; ++j)
43502 if ((contents[i] & (1u << j)) != 0)
43503 dremap.perm[2 * i + cnt++] = j;
43504 for (; cnt < 2; ++cnt)
43505 dremap.perm[2 * i + cnt] = 0;
43506 }
43507
43508 dfinal = *d;
43509 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43510 dfinal.op1 = dfinal.op0;
43511 dfinal.one_operand_p = true;
43512 for (i = 0, j = 0; i < nelt; ++i)
43513 {
43514 if (i == nelt2)
43515 j = 2;
43516 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
43517 if ((d->perm[i] / nelt4) == dremap.perm[j])
43518 ;
43519 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
43520 dfinal.perm[i] |= nelt4;
43521 else
43522 gcc_unreachable ();
43523 }
43524
43525 ok = expand_vec_perm_1 (&dremap);
43526 gcc_assert (ok);
43527
43528 ok = expand_vec_perm_1 (&dfinal);
43529 gcc_assert (ok);
43530
43531 return true;
43532 }
43533
43534 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
43535 a vector permutation using two instructions, vperm2f128 resp.
43536 vperm2i128 followed by any single in-lane permutation. */
43537
43538 static bool
43539 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
43540 {
43541 struct expand_vec_perm_d dfirst, dsecond;
43542 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
43543 bool ok;
43544
43545 if (!TARGET_AVX
43546 || GET_MODE_SIZE (d->vmode) != 32
43547 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
43548 return false;
43549
43550 dsecond = *d;
43551 dsecond.one_operand_p = false;
43552 dsecond.testing_p = true;
43553
43554 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
43555 immediate. For perm < 16 the second permutation uses
43556 d->op0 as first operand, for perm >= 16 it uses d->op1
43557 as first operand. The second operand is the result of
43558 vperm2[fi]128. */
43559 for (perm = 0; perm < 32; perm++)
43560 {
43561 /* Ignore permutations which do not move anything cross-lane. */
43562 if (perm < 16)
43563 {
43564 /* The second shuffle for e.g. V4DFmode has
43565 0123 and ABCD operands.
43566 Ignore AB23, as 23 is already in the second lane
43567 of the first operand. */
43568 if ((perm & 0xc) == (1 << 2)) continue;
43569 /* And 01CD, as 01 is in the first lane of the first
43570 operand. */
43571 if ((perm & 3) == 0) continue;
43572 /* And 4567, as then the vperm2[fi]128 doesn't change
43573 anything on the original 4567 second operand. */
43574 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
43575 }
43576 else
43577 {
43578 /* The second shuffle for e.g. V4DFmode has
43579 4567 and ABCD operands.
43580 Ignore AB67, as 67 is already in the second lane
43581 of the first operand. */
43582 if ((perm & 0xc) == (3 << 2)) continue;
43583 /* And 45CD, as 45 is in the first lane of the first
43584 operand. */
43585 if ((perm & 3) == 2) continue;
43586 /* And 0123, as then the vperm2[fi]128 doesn't change
43587 anything on the original 0123 first operand. */
43588 if ((perm & 0xf) == (1 << 2)) continue;
43589 }
43590
43591 for (i = 0; i < nelt; i++)
43592 {
43593 j = d->perm[i] / nelt2;
43594 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
43595 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
43596 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
43597 dsecond.perm[i] = d->perm[i] & (nelt - 1);
43598 else
43599 break;
43600 }
43601
43602 if (i == nelt)
43603 {
43604 start_sequence ();
43605 ok = expand_vec_perm_1 (&dsecond);
43606 end_sequence ();
43607 }
43608 else
43609 ok = false;
43610
43611 if (ok)
43612 {
43613 if (d->testing_p)
43614 return true;
43615
43616 /* Found a usable second shuffle. dfirst will be
43617 vperm2f128 on d->op0 and d->op1. */
43618 dsecond.testing_p = false;
43619 dfirst = *d;
43620 dfirst.target = gen_reg_rtx (d->vmode);
43621 for (i = 0; i < nelt; i++)
43622 dfirst.perm[i] = (i & (nelt2 - 1))
43623 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
43624
43625 ok = expand_vec_perm_1 (&dfirst);
43626 gcc_assert (ok);
43627
43628 /* And dsecond is some single insn shuffle, taking
43629 d->op0 and result of vperm2f128 (if perm < 16) or
43630 d->op1 and result of vperm2f128 (otherwise). */
43631 dsecond.op1 = dfirst.target;
43632 if (perm >= 16)
43633 dsecond.op0 = dfirst.op1;
43634
43635 ok = expand_vec_perm_1 (&dsecond);
43636 gcc_assert (ok);
43637
43638 return true;
43639 }
43640
43641 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
43642 if (d->one_operand_p)
43643 return false;
43644 }
43645
43646 return false;
43647 }
43648
43649 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43650 a two vector permutation using 2 intra-lane interleave insns
43651 and cross-lane shuffle for 32-byte vectors. */
43652
43653 static bool
43654 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
43655 {
43656 unsigned i, nelt;
43657 rtx (*gen) (rtx, rtx, rtx);
43658
43659 if (d->one_operand_p)
43660 return false;
43661 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
43662 ;
43663 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
43664 ;
43665 else
43666 return false;
43667
43668 nelt = d->nelt;
43669 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
43670 return false;
43671 for (i = 0; i < nelt; i += 2)
43672 if (d->perm[i] != d->perm[0] + i / 2
43673 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
43674 return false;
43675
43676 if (d->testing_p)
43677 return true;
43678
43679 switch (d->vmode)
43680 {
43681 case V32QImode:
43682 if (d->perm[0])
43683 gen = gen_vec_interleave_highv32qi;
43684 else
43685 gen = gen_vec_interleave_lowv32qi;
43686 break;
43687 case V16HImode:
43688 if (d->perm[0])
43689 gen = gen_vec_interleave_highv16hi;
43690 else
43691 gen = gen_vec_interleave_lowv16hi;
43692 break;
43693 case V8SImode:
43694 if (d->perm[0])
43695 gen = gen_vec_interleave_highv8si;
43696 else
43697 gen = gen_vec_interleave_lowv8si;
43698 break;
43699 case V4DImode:
43700 if (d->perm[0])
43701 gen = gen_vec_interleave_highv4di;
43702 else
43703 gen = gen_vec_interleave_lowv4di;
43704 break;
43705 case V8SFmode:
43706 if (d->perm[0])
43707 gen = gen_vec_interleave_highv8sf;
43708 else
43709 gen = gen_vec_interleave_lowv8sf;
43710 break;
43711 case V4DFmode:
43712 if (d->perm[0])
43713 gen = gen_vec_interleave_highv4df;
43714 else
43715 gen = gen_vec_interleave_lowv4df;
43716 break;
43717 default:
43718 gcc_unreachable ();
43719 }
43720
43721 emit_insn (gen (d->target, d->op0, d->op1));
43722 return true;
43723 }
43724
43725 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
43726 a single vector permutation using a single intra-lane vector
43727 permutation, vperm2f128 swapping the lanes and vblend* insn blending
43728 the non-swapped and swapped vectors together. */
43729
43730 static bool
43731 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
43732 {
43733 struct expand_vec_perm_d dfirst, dsecond;
43734 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
43735 rtx seq;
43736 bool ok;
43737 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
43738
43739 if (!TARGET_AVX
43740 || TARGET_AVX2
43741 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
43742 || !d->one_operand_p)
43743 return false;
43744
43745 dfirst = *d;
43746 for (i = 0; i < nelt; i++)
43747 dfirst.perm[i] = 0xff;
43748 for (i = 0, msk = 0; i < nelt; i++)
43749 {
43750 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
43751 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
43752 return false;
43753 dfirst.perm[j] = d->perm[i];
43754 if (j != i)
43755 msk |= (1 << i);
43756 }
43757 for (i = 0; i < nelt; i++)
43758 if (dfirst.perm[i] == 0xff)
43759 dfirst.perm[i] = i;
43760
43761 if (!d->testing_p)
43762 dfirst.target = gen_reg_rtx (dfirst.vmode);
43763
43764 start_sequence ();
43765 ok = expand_vec_perm_1 (&dfirst);
43766 seq = get_insns ();
43767 end_sequence ();
43768
43769 if (!ok)
43770 return false;
43771
43772 if (d->testing_p)
43773 return true;
43774
43775 emit_insn (seq);
43776
43777 dsecond = *d;
43778 dsecond.op0 = dfirst.target;
43779 dsecond.op1 = dfirst.target;
43780 dsecond.one_operand_p = true;
43781 dsecond.target = gen_reg_rtx (dsecond.vmode);
43782 for (i = 0; i < nelt; i++)
43783 dsecond.perm[i] = i ^ nelt2;
43784
43785 ok = expand_vec_perm_1 (&dsecond);
43786 gcc_assert (ok);
43787
43788 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
43789 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
43790 return true;
43791 }
43792
43793 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
43794 permutation using two vperm2f128, followed by a vshufpd insn blending
43795 the two vectors together. */
43796
43797 static bool
43798 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
43799 {
43800 struct expand_vec_perm_d dfirst, dsecond, dthird;
43801 bool ok;
43802
43803 if (!TARGET_AVX || (d->vmode != V4DFmode))
43804 return false;
43805
43806 if (d->testing_p)
43807 return true;
43808
43809 dfirst = *d;
43810 dsecond = *d;
43811 dthird = *d;
43812
43813 dfirst.perm[0] = (d->perm[0] & ~1);
43814 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
43815 dfirst.perm[2] = (d->perm[2] & ~1);
43816 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
43817 dsecond.perm[0] = (d->perm[1] & ~1);
43818 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
43819 dsecond.perm[2] = (d->perm[3] & ~1);
43820 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
43821 dthird.perm[0] = (d->perm[0] % 2);
43822 dthird.perm[1] = (d->perm[1] % 2) + 4;
43823 dthird.perm[2] = (d->perm[2] % 2) + 2;
43824 dthird.perm[3] = (d->perm[3] % 2) + 6;
43825
43826 dfirst.target = gen_reg_rtx (dfirst.vmode);
43827 dsecond.target = gen_reg_rtx (dsecond.vmode);
43828 dthird.op0 = dfirst.target;
43829 dthird.op1 = dsecond.target;
43830 dthird.one_operand_p = false;
43831
43832 canonicalize_perm (&dfirst);
43833 canonicalize_perm (&dsecond);
43834
43835 ok = expand_vec_perm_1 (&dfirst)
43836 && expand_vec_perm_1 (&dsecond)
43837 && expand_vec_perm_1 (&dthird);
43838
43839 gcc_assert (ok);
43840
43841 return true;
43842 }
43843
43844 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
43845 permutation with two pshufb insns and an ior. We should have already
43846 failed all two instruction sequences. */
43847
43848 static bool
43849 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
43850 {
43851 rtx rperm[2][16], vperm, l, h, op, m128;
43852 unsigned int i, nelt, eltsz;
43853
43854 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
43855 return false;
43856 gcc_assert (!d->one_operand_p);
43857
43858 nelt = d->nelt;
43859 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
43860
43861 /* Generate two permutation masks. If the required element is within
43862 the given vector it is shuffled into the proper lane. If the required
43863 element is in the other vector, force a zero into the lane by setting
43864 bit 7 in the permutation mask. */
43865 m128 = GEN_INT (-128);
43866 for (i = 0; i < nelt; ++i)
43867 {
43868 unsigned j, e = d->perm[i];
43869 unsigned which = (e >= nelt);
43870 if (e >= nelt)
43871 e -= nelt;
43872
43873 for (j = 0; j < eltsz; ++j)
43874 {
43875 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
43876 rperm[1-which][i*eltsz + j] = m128;
43877 }
43878 }
43879
43880 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
43881 vperm = force_reg (V16QImode, vperm);
43882
43883 l = gen_reg_rtx (V16QImode);
43884 op = gen_lowpart (V16QImode, d->op0);
43885 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
43886
43887 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
43888 vperm = force_reg (V16QImode, vperm);
43889
43890 h = gen_reg_rtx (V16QImode);
43891 op = gen_lowpart (V16QImode, d->op1);
43892 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
43893
43894 op = d->target;
43895 if (d->vmode != V16QImode)
43896 op = gen_reg_rtx (V16QImode);
43897 emit_insn (gen_iorv16qi3 (op, l, h));
43898 if (op != d->target)
43899 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
43900
43901 return true;
43902 }
43903
43904 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
43905 with two vpshufb insns, vpermq and vpor. We should have already failed
43906 all two or three instruction sequences. */
43907
43908 static bool
43909 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
43910 {
43911 rtx rperm[2][32], vperm, l, h, hp, op, m128;
43912 unsigned int i, nelt, eltsz;
43913
43914 if (!TARGET_AVX2
43915 || !d->one_operand_p
43916 || (d->vmode != V32QImode && d->vmode != V16HImode))
43917 return false;
43918
43919 if (d->testing_p)
43920 return true;
43921
43922 nelt = d->nelt;
43923 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
43924
43925 /* Generate two permutation masks. If the required element is within
43926 the same lane, it is shuffled in. If the required element from the
43927 other lane, force a zero by setting bit 7 in the permutation mask.
43928 In the other mask the mask has non-negative elements if element
43929 is requested from the other lane, but also moved to the other lane,
43930 so that the result of vpshufb can have the two V2TImode halves
43931 swapped. */
43932 m128 = GEN_INT (-128);
43933 for (i = 0; i < nelt; ++i)
43934 {
43935 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
43936 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
43937
43938 for (j = 0; j < eltsz; ++j)
43939 {
43940 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
43941 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
43942 }
43943 }
43944
43945 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
43946 vperm = force_reg (V32QImode, vperm);
43947
43948 h = gen_reg_rtx (V32QImode);
43949 op = gen_lowpart (V32QImode, d->op0);
43950 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
43951
43952 /* Swap the 128-byte lanes of h into hp. */
43953 hp = gen_reg_rtx (V4DImode);
43954 op = gen_lowpart (V4DImode, h);
43955 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
43956 const1_rtx));
43957
43958 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
43959 vperm = force_reg (V32QImode, vperm);
43960
43961 l = gen_reg_rtx (V32QImode);
43962 op = gen_lowpart (V32QImode, d->op0);
43963 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
43964
43965 op = d->target;
43966 if (d->vmode != V32QImode)
43967 op = gen_reg_rtx (V32QImode);
43968 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
43969 if (op != d->target)
43970 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
43971
43972 return true;
43973 }
43974
43975 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
43976 and extract-odd permutations of two V32QImode and V16QImode operand
43977 with two vpshufb insns, vpor and vpermq. We should have already
43978 failed all two or three instruction sequences. */
43979
43980 static bool
43981 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
43982 {
43983 rtx rperm[2][32], vperm, l, h, ior, op, m128;
43984 unsigned int i, nelt, eltsz;
43985
43986 if (!TARGET_AVX2
43987 || d->one_operand_p
43988 || (d->vmode != V32QImode && d->vmode != V16HImode))
43989 return false;
43990
43991 for (i = 0; i < d->nelt; ++i)
43992 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
43993 return false;
43994
43995 if (d->testing_p)
43996 return true;
43997
43998 nelt = d->nelt;
43999 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44000
44001 /* Generate two permutation masks. In the first permutation mask
44002 the first quarter will contain indexes for the first half
44003 of the op0, the second quarter will contain bit 7 set, third quarter
44004 will contain indexes for the second half of the op0 and the
44005 last quarter bit 7 set. In the second permutation mask
44006 the first quarter will contain bit 7 set, the second quarter
44007 indexes for the first half of the op1, the third quarter bit 7 set
44008 and last quarter indexes for the second half of the op1.
44009 I.e. the first mask e.g. for V32QImode extract even will be:
44010 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
44011 (all values masked with 0xf except for -128) and second mask
44012 for extract even will be
44013 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
44014 m128 = GEN_INT (-128);
44015 for (i = 0; i < nelt; ++i)
44016 {
44017 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44018 unsigned which = d->perm[i] >= nelt;
44019 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
44020
44021 for (j = 0; j < eltsz; ++j)
44022 {
44023 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
44024 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
44025 }
44026 }
44027
44028 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
44029 vperm = force_reg (V32QImode, vperm);
44030
44031 l = gen_reg_rtx (V32QImode);
44032 op = gen_lowpart (V32QImode, d->op0);
44033 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
44034
44035 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
44036 vperm = force_reg (V32QImode, vperm);
44037
44038 h = gen_reg_rtx (V32QImode);
44039 op = gen_lowpart (V32QImode, d->op1);
44040 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
44041
44042 ior = gen_reg_rtx (V32QImode);
44043 emit_insn (gen_iorv32qi3 (ior, l, h));
44044
44045 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
44046 op = gen_reg_rtx (V4DImode);
44047 ior = gen_lowpart (V4DImode, ior);
44048 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
44049 const1_rtx, GEN_INT (3)));
44050 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44051
44052 return true;
44053 }
44054
44055 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
44056 and extract-odd permutations. */
44057
44058 static bool
44059 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
44060 {
44061 rtx t1, t2, t3, t4, t5;
44062
44063 switch (d->vmode)
44064 {
44065 case V4DFmode:
44066 t1 = gen_reg_rtx (V4DFmode);
44067 t2 = gen_reg_rtx (V4DFmode);
44068
44069 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44070 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
44071 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
44072
44073 /* Now an unpck[lh]pd will produce the result required. */
44074 if (odd)
44075 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
44076 else
44077 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
44078 emit_insn (t3);
44079 break;
44080
44081 case V8SFmode:
44082 {
44083 int mask = odd ? 0xdd : 0x88;
44084
44085 t1 = gen_reg_rtx (V8SFmode);
44086 t2 = gen_reg_rtx (V8SFmode);
44087 t3 = gen_reg_rtx (V8SFmode);
44088
44089 /* Shuffle within the 128-bit lanes to produce:
44090 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
44091 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
44092 GEN_INT (mask)));
44093
44094 /* Shuffle the lanes around to produce:
44095 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
44096 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
44097 GEN_INT (0x3)));
44098
44099 /* Shuffle within the 128-bit lanes to produce:
44100 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
44101 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
44102
44103 /* Shuffle within the 128-bit lanes to produce:
44104 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
44105 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
44106
44107 /* Shuffle the lanes around to produce:
44108 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
44109 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
44110 GEN_INT (0x20)));
44111 }
44112 break;
44113
44114 case V2DFmode:
44115 case V4SFmode:
44116 case V2DImode:
44117 case V4SImode:
44118 /* These are always directly implementable by expand_vec_perm_1. */
44119 gcc_unreachable ();
44120
44121 case V8HImode:
44122 if (TARGET_SSSE3)
44123 return expand_vec_perm_pshufb2 (d);
44124 else
44125 {
44126 /* We need 2*log2(N)-1 operations to achieve odd/even
44127 with interleave. */
44128 t1 = gen_reg_rtx (V8HImode);
44129 t2 = gen_reg_rtx (V8HImode);
44130 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
44131 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
44132 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
44133 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
44134 if (odd)
44135 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
44136 else
44137 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
44138 emit_insn (t3);
44139 }
44140 break;
44141
44142 case V16QImode:
44143 if (TARGET_SSSE3)
44144 return expand_vec_perm_pshufb2 (d);
44145 else
44146 {
44147 t1 = gen_reg_rtx (V16QImode);
44148 t2 = gen_reg_rtx (V16QImode);
44149 t3 = gen_reg_rtx (V16QImode);
44150 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
44151 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
44152 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
44153 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
44154 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
44155 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
44156 if (odd)
44157 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
44158 else
44159 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
44160 emit_insn (t3);
44161 }
44162 break;
44163
44164 case V16HImode:
44165 case V32QImode:
44166 return expand_vec_perm_vpshufb2_vpermq_even_odd (d);
44167
44168 case V4DImode:
44169 if (!TARGET_AVX2)
44170 {
44171 struct expand_vec_perm_d d_copy = *d;
44172 d_copy.vmode = V4DFmode;
44173 d_copy.target = gen_reg_rtx (V4DFmode);
44174 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
44175 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
44176 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44177 {
44178 if (!d->testing_p)
44179 emit_move_insn (d->target,
44180 gen_lowpart (V4DImode, d_copy.target));
44181 return true;
44182 }
44183 return false;
44184 }
44185
44186 t1 = gen_reg_rtx (V4DImode);
44187 t2 = gen_reg_rtx (V4DImode);
44188
44189 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44190 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
44191 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
44192
44193 /* Now an vpunpck[lh]qdq will produce the result required. */
44194 if (odd)
44195 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
44196 else
44197 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
44198 emit_insn (t3);
44199 break;
44200
44201 case V8SImode:
44202 if (!TARGET_AVX2)
44203 {
44204 struct expand_vec_perm_d d_copy = *d;
44205 d_copy.vmode = V8SFmode;
44206 d_copy.target = gen_reg_rtx (V8SFmode);
44207 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
44208 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
44209 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44210 {
44211 if (!d->testing_p)
44212 emit_move_insn (d->target,
44213 gen_lowpart (V8SImode, d_copy.target));
44214 return true;
44215 }
44216 return false;
44217 }
44218
44219 t1 = gen_reg_rtx (V8SImode);
44220 t2 = gen_reg_rtx (V8SImode);
44221 t3 = gen_reg_rtx (V4DImode);
44222 t4 = gen_reg_rtx (V4DImode);
44223 t5 = gen_reg_rtx (V4DImode);
44224
44225 /* Shuffle the lanes around into
44226 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
44227 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
44228 gen_lowpart (V4DImode, d->op1),
44229 GEN_INT (0x20)));
44230 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
44231 gen_lowpart (V4DImode, d->op1),
44232 GEN_INT (0x31)));
44233
44234 /* Swap the 2nd and 3rd position in each lane into
44235 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
44236 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
44237 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44238 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
44239 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44240
44241 /* Now an vpunpck[lh]qdq will produce
44242 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
44243 if (odd)
44244 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
44245 gen_lowpart (V4DImode, t2));
44246 else
44247 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
44248 gen_lowpart (V4DImode, t2));
44249 emit_insn (t3);
44250 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
44251 break;
44252
44253 default:
44254 gcc_unreachable ();
44255 }
44256
44257 return true;
44258 }
44259
44260 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44261 extract-even and extract-odd permutations. */
44262
44263 static bool
44264 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
44265 {
44266 unsigned i, odd, nelt = d->nelt;
44267
44268 odd = d->perm[0];
44269 if (odd != 0 && odd != 1)
44270 return false;
44271
44272 for (i = 1; i < nelt; ++i)
44273 if (d->perm[i] != 2 * i + odd)
44274 return false;
44275
44276 return expand_vec_perm_even_odd_1 (d, odd);
44277 }
44278
44279 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
44280 permutations. We assume that expand_vec_perm_1 has already failed. */
44281
44282 static bool
44283 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
44284 {
44285 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
44286 enum machine_mode vmode = d->vmode;
44287 unsigned char perm2[4];
44288 rtx op0 = d->op0, dest;
44289 bool ok;
44290
44291 switch (vmode)
44292 {
44293 case V4DFmode:
44294 case V8SFmode:
44295 /* These are special-cased in sse.md so that we can optionally
44296 use the vbroadcast instruction. They expand to two insns
44297 if the input happens to be in a register. */
44298 gcc_unreachable ();
44299
44300 case V2DFmode:
44301 case V2DImode:
44302 case V4SFmode:
44303 case V4SImode:
44304 /* These are always implementable using standard shuffle patterns. */
44305 gcc_unreachable ();
44306
44307 case V8HImode:
44308 case V16QImode:
44309 /* These can be implemented via interleave. We save one insn by
44310 stopping once we have promoted to V4SImode and then use pshufd. */
44311 do
44312 {
44313 rtx dest;
44314 rtx (*gen) (rtx, rtx, rtx)
44315 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
44316 : gen_vec_interleave_lowv8hi;
44317
44318 if (elt >= nelt2)
44319 {
44320 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
44321 : gen_vec_interleave_highv8hi;
44322 elt -= nelt2;
44323 }
44324 nelt2 /= 2;
44325
44326 dest = gen_reg_rtx (vmode);
44327 emit_insn (gen (dest, op0, op0));
44328 vmode = get_mode_wider_vector (vmode);
44329 op0 = gen_lowpart (vmode, dest);
44330 }
44331 while (vmode != V4SImode);
44332
44333 memset (perm2, elt, 4);
44334 dest = gen_reg_rtx (V4SImode);
44335 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
44336 gcc_assert (ok);
44337 if (!d->testing_p)
44338 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
44339 return true;
44340
44341 case V32QImode:
44342 case V16HImode:
44343 case V8SImode:
44344 case V4DImode:
44345 /* For AVX2 broadcasts of the first element vpbroadcast* or
44346 vpermq should be used by expand_vec_perm_1. */
44347 gcc_assert (!TARGET_AVX2 || d->perm[0]);
44348 return false;
44349
44350 default:
44351 gcc_unreachable ();
44352 }
44353 }
44354
44355 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44356 broadcast permutations. */
44357
44358 static bool
44359 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
44360 {
44361 unsigned i, elt, nelt = d->nelt;
44362
44363 if (!d->one_operand_p)
44364 return false;
44365
44366 elt = d->perm[0];
44367 for (i = 1; i < nelt; ++i)
44368 if (d->perm[i] != elt)
44369 return false;
44370
44371 return expand_vec_perm_broadcast_1 (d);
44372 }
44373
44374 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
44375 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
44376 all the shorter instruction sequences. */
44377
44378 static bool
44379 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
44380 {
44381 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
44382 unsigned int i, nelt, eltsz;
44383 bool used[4];
44384
44385 if (!TARGET_AVX2
44386 || d->one_operand_p
44387 || (d->vmode != V32QImode && d->vmode != V16HImode))
44388 return false;
44389
44390 if (d->testing_p)
44391 return true;
44392
44393 nelt = d->nelt;
44394 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44395
44396 /* Generate 4 permutation masks. If the required element is within
44397 the same lane, it is shuffled in. If the required element from the
44398 other lane, force a zero by setting bit 7 in the permutation mask.
44399 In the other mask the mask has non-negative elements if element
44400 is requested from the other lane, but also moved to the other lane,
44401 so that the result of vpshufb can have the two V2TImode halves
44402 swapped. */
44403 m128 = GEN_INT (-128);
44404 for (i = 0; i < 32; ++i)
44405 {
44406 rperm[0][i] = m128;
44407 rperm[1][i] = m128;
44408 rperm[2][i] = m128;
44409 rperm[3][i] = m128;
44410 }
44411 used[0] = false;
44412 used[1] = false;
44413 used[2] = false;
44414 used[3] = false;
44415 for (i = 0; i < nelt; ++i)
44416 {
44417 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44418 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
44419 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
44420
44421 for (j = 0; j < eltsz; ++j)
44422 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
44423 used[which] = true;
44424 }
44425
44426 for (i = 0; i < 2; ++i)
44427 {
44428 if (!used[2 * i + 1])
44429 {
44430 h[i] = NULL_RTX;
44431 continue;
44432 }
44433 vperm = gen_rtx_CONST_VECTOR (V32QImode,
44434 gen_rtvec_v (32, rperm[2 * i + 1]));
44435 vperm = force_reg (V32QImode, vperm);
44436 h[i] = gen_reg_rtx (V32QImode);
44437 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44438 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
44439 }
44440
44441 /* Swap the 128-byte lanes of h[X]. */
44442 for (i = 0; i < 2; ++i)
44443 {
44444 if (h[i] == NULL_RTX)
44445 continue;
44446 op = gen_reg_rtx (V4DImode);
44447 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
44448 const2_rtx, GEN_INT (3), const0_rtx,
44449 const1_rtx));
44450 h[i] = gen_lowpart (V32QImode, op);
44451 }
44452
44453 for (i = 0; i < 2; ++i)
44454 {
44455 if (!used[2 * i])
44456 {
44457 l[i] = NULL_RTX;
44458 continue;
44459 }
44460 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
44461 vperm = force_reg (V32QImode, vperm);
44462 l[i] = gen_reg_rtx (V32QImode);
44463 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44464 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
44465 }
44466
44467 for (i = 0; i < 2; ++i)
44468 {
44469 if (h[i] && l[i])
44470 {
44471 op = gen_reg_rtx (V32QImode);
44472 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
44473 l[i] = op;
44474 }
44475 else if (h[i])
44476 l[i] = h[i];
44477 }
44478
44479 gcc_assert (l[0] && l[1]);
44480 op = d->target;
44481 if (d->vmode != V32QImode)
44482 op = gen_reg_rtx (V32QImode);
44483 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
44484 if (op != d->target)
44485 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44486 return true;
44487 }
44488
44489 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
44490 With all of the interface bits taken care of, perform the expansion
44491 in D and return true on success. */
44492
44493 static bool
44494 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
44495 {
44496 /* Try a single instruction expansion. */
44497 if (expand_vec_perm_1 (d))
44498 return true;
44499
44500 /* Try sequences of two instructions. */
44501
44502 if (expand_vec_perm_pshuflw_pshufhw (d))
44503 return true;
44504
44505 if (expand_vec_perm_palignr (d))
44506 return true;
44507
44508 if (expand_vec_perm_interleave2 (d))
44509 return true;
44510
44511 if (expand_vec_perm_broadcast (d))
44512 return true;
44513
44514 if (expand_vec_perm_vpermq_perm_1 (d))
44515 return true;
44516
44517 if (expand_vec_perm_vperm2f128 (d))
44518 return true;
44519
44520 /* Try sequences of three instructions. */
44521
44522 if (expand_vec_perm_2vperm2f128_vshuf (d))
44523 return true;
44524
44525 if (expand_vec_perm_pshufb2 (d))
44526 return true;
44527
44528 if (expand_vec_perm_interleave3 (d))
44529 return true;
44530
44531 if (expand_vec_perm_vperm2f128_vblend (d))
44532 return true;
44533
44534 /* Try sequences of four instructions. */
44535
44536 if (expand_vec_perm_vpshufb2_vpermq (d))
44537 return true;
44538
44539 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
44540 return true;
44541
44542 /* ??? Look for narrow permutations whose element orderings would
44543 allow the promotion to a wider mode. */
44544
44545 /* ??? Look for sequences of interleave or a wider permute that place
44546 the data into the correct lanes for a half-vector shuffle like
44547 pshuf[lh]w or vpermilps. */
44548
44549 /* ??? Look for sequences of interleave that produce the desired results.
44550 The combinatorics of punpck[lh] get pretty ugly... */
44551
44552 if (expand_vec_perm_even_odd (d))
44553 return true;
44554
44555 /* Even longer sequences. */
44556 if (expand_vec_perm_vpshufb4_vpermq2 (d))
44557 return true;
44558
44559 return false;
44560 }
44561
44562 /* If a permutation only uses one operand, make it clear. Returns true
44563 if the permutation references both operands. */
44564
44565 static bool
44566 canonicalize_perm (struct expand_vec_perm_d *d)
44567 {
44568 int i, which, nelt = d->nelt;
44569
44570 for (i = which = 0; i < nelt; ++i)
44571 which |= (d->perm[i] < nelt ? 1 : 2);
44572
44573 d->one_operand_p = true;
44574 switch (which)
44575 {
44576 default:
44577 gcc_unreachable();
44578
44579 case 3:
44580 if (!rtx_equal_p (d->op0, d->op1))
44581 {
44582 d->one_operand_p = false;
44583 break;
44584 }
44585 /* The elements of PERM do not suggest that only the first operand
44586 is used, but both operands are identical. Allow easier matching
44587 of the permutation by folding the permutation into the single
44588 input vector. */
44589 /* FALLTHRU */
44590
44591 case 2:
44592 for (i = 0; i < nelt; ++i)
44593 d->perm[i] &= nelt - 1;
44594 d->op0 = d->op1;
44595 break;
44596
44597 case 1:
44598 d->op1 = d->op0;
44599 break;
44600 }
44601
44602 return (which == 3);
44603 }
44604
44605 bool
44606 ix86_expand_vec_perm_const (rtx operands[4])
44607 {
44608 struct expand_vec_perm_d d;
44609 unsigned char perm[MAX_VECT_LEN];
44610 int i, nelt;
44611 bool two_args;
44612 rtx sel;
44613
44614 d.target = operands[0];
44615 d.op0 = operands[1];
44616 d.op1 = operands[2];
44617 sel = operands[3];
44618
44619 d.vmode = GET_MODE (d.target);
44620 gcc_assert (VECTOR_MODE_P (d.vmode));
44621 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44622 d.testing_p = false;
44623
44624 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
44625 gcc_assert (XVECLEN (sel, 0) == nelt);
44626 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
44627
44628 for (i = 0; i < nelt; ++i)
44629 {
44630 rtx e = XVECEXP (sel, 0, i);
44631 int ei = INTVAL (e) & (2 * nelt - 1);
44632 d.perm[i] = ei;
44633 perm[i] = ei;
44634 }
44635
44636 two_args = canonicalize_perm (&d);
44637
44638 if (ix86_expand_vec_perm_const_1 (&d))
44639 return true;
44640
44641 /* If the selector says both arguments are needed, but the operands are the
44642 same, the above tried to expand with one_operand_p and flattened selector.
44643 If that didn't work, retry without one_operand_p; we succeeded with that
44644 during testing. */
44645 if (two_args && d.one_operand_p)
44646 {
44647 d.one_operand_p = false;
44648 memcpy (d.perm, perm, sizeof (perm));
44649 return ix86_expand_vec_perm_const_1 (&d);
44650 }
44651
44652 return false;
44653 }
44654
44655 /* Implement targetm.vectorize.vec_perm_const_ok. */
44656
44657 static bool
44658 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
44659 const unsigned char *sel)
44660 {
44661 struct expand_vec_perm_d d;
44662 unsigned int i, nelt, which;
44663 bool ret;
44664
44665 d.vmode = vmode;
44666 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44667 d.testing_p = true;
44668
44669 /* Given sufficient ISA support we can just return true here
44670 for selected vector modes. */
44671 if (d.vmode == V16SImode || d.vmode == V16SFmode
44672 || d.vmode == V8DFmode || d.vmode == V8DImode)
44673 /* All implementable with a single vpermi2 insn. */
44674 return true;
44675 if (GET_MODE_SIZE (d.vmode) == 16)
44676 {
44677 /* All implementable with a single vpperm insn. */
44678 if (TARGET_XOP)
44679 return true;
44680 /* All implementable with 2 pshufb + 1 ior. */
44681 if (TARGET_SSSE3)
44682 return true;
44683 /* All implementable with shufpd or unpck[lh]pd. */
44684 if (d.nelt == 2)
44685 return true;
44686 }
44687
44688 /* Extract the values from the vector CST into the permutation
44689 array in D. */
44690 memcpy (d.perm, sel, nelt);
44691 for (i = which = 0; i < nelt; ++i)
44692 {
44693 unsigned char e = d.perm[i];
44694 gcc_assert (e < 2 * nelt);
44695 which |= (e < nelt ? 1 : 2);
44696 }
44697
44698 /* For all elements from second vector, fold the elements to first. */
44699 if (which == 2)
44700 for (i = 0; i < nelt; ++i)
44701 d.perm[i] -= nelt;
44702
44703 /* Check whether the mask can be applied to the vector type. */
44704 d.one_operand_p = (which != 3);
44705
44706 /* Implementable with shufps or pshufd. */
44707 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
44708 return true;
44709
44710 /* Otherwise we have to go through the motions and see if we can
44711 figure out how to generate the requested permutation. */
44712 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
44713 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
44714 if (!d.one_operand_p)
44715 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
44716
44717 start_sequence ();
44718 ret = ix86_expand_vec_perm_const_1 (&d);
44719 end_sequence ();
44720
44721 return ret;
44722 }
44723
44724 void
44725 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
44726 {
44727 struct expand_vec_perm_d d;
44728 unsigned i, nelt;
44729
44730 d.target = targ;
44731 d.op0 = op0;
44732 d.op1 = op1;
44733 d.vmode = GET_MODE (targ);
44734 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44735 d.one_operand_p = false;
44736 d.testing_p = false;
44737
44738 for (i = 0; i < nelt; ++i)
44739 d.perm[i] = i * 2 + odd;
44740
44741 /* We'll either be able to implement the permutation directly... */
44742 if (expand_vec_perm_1 (&d))
44743 return;
44744
44745 /* ... or we use the special-case patterns. */
44746 expand_vec_perm_even_odd_1 (&d, odd);
44747 }
44748
44749 static void
44750 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
44751 {
44752 struct expand_vec_perm_d d;
44753 unsigned i, nelt, base;
44754 bool ok;
44755
44756 d.target = targ;
44757 d.op0 = op0;
44758 d.op1 = op1;
44759 d.vmode = GET_MODE (targ);
44760 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44761 d.one_operand_p = false;
44762 d.testing_p = false;
44763
44764 base = high_p ? nelt / 2 : 0;
44765 for (i = 0; i < nelt / 2; ++i)
44766 {
44767 d.perm[i * 2] = i + base;
44768 d.perm[i * 2 + 1] = i + base + nelt;
44769 }
44770
44771 /* Note that for AVX this isn't one instruction. */
44772 ok = ix86_expand_vec_perm_const_1 (&d);
44773 gcc_assert (ok);
44774 }
44775
44776
44777 /* Expand a vector operation CODE for a V*QImode in terms of the
44778 same operation on V*HImode. */
44779
44780 void
44781 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
44782 {
44783 enum machine_mode qimode = GET_MODE (dest);
44784 enum machine_mode himode;
44785 rtx (*gen_il) (rtx, rtx, rtx);
44786 rtx (*gen_ih) (rtx, rtx, rtx);
44787 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
44788 struct expand_vec_perm_d d;
44789 bool ok, full_interleave;
44790 bool uns_p = false;
44791 int i;
44792
44793 switch (qimode)
44794 {
44795 case V16QImode:
44796 himode = V8HImode;
44797 gen_il = gen_vec_interleave_lowv16qi;
44798 gen_ih = gen_vec_interleave_highv16qi;
44799 break;
44800 case V32QImode:
44801 himode = V16HImode;
44802 gen_il = gen_avx2_interleave_lowv32qi;
44803 gen_ih = gen_avx2_interleave_highv32qi;
44804 break;
44805 default:
44806 gcc_unreachable ();
44807 }
44808
44809 op2_l = op2_h = op2;
44810 switch (code)
44811 {
44812 case MULT:
44813 /* Unpack data such that we've got a source byte in each low byte of
44814 each word. We don't care what goes into the high byte of each word.
44815 Rather than trying to get zero in there, most convenient is to let
44816 it be a copy of the low byte. */
44817 op2_l = gen_reg_rtx (qimode);
44818 op2_h = gen_reg_rtx (qimode);
44819 emit_insn (gen_il (op2_l, op2, op2));
44820 emit_insn (gen_ih (op2_h, op2, op2));
44821 /* FALLTHRU */
44822
44823 op1_l = gen_reg_rtx (qimode);
44824 op1_h = gen_reg_rtx (qimode);
44825 emit_insn (gen_il (op1_l, op1, op1));
44826 emit_insn (gen_ih (op1_h, op1, op1));
44827 full_interleave = qimode == V16QImode;
44828 break;
44829
44830 case ASHIFT:
44831 case LSHIFTRT:
44832 uns_p = true;
44833 /* FALLTHRU */
44834 case ASHIFTRT:
44835 op1_l = gen_reg_rtx (himode);
44836 op1_h = gen_reg_rtx (himode);
44837 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
44838 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
44839 full_interleave = true;
44840 break;
44841 default:
44842 gcc_unreachable ();
44843 }
44844
44845 /* Perform the operation. */
44846 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
44847 1, OPTAB_DIRECT);
44848 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
44849 1, OPTAB_DIRECT);
44850 gcc_assert (res_l && res_h);
44851
44852 /* Merge the data back into the right place. */
44853 d.target = dest;
44854 d.op0 = gen_lowpart (qimode, res_l);
44855 d.op1 = gen_lowpart (qimode, res_h);
44856 d.vmode = qimode;
44857 d.nelt = GET_MODE_NUNITS (qimode);
44858 d.one_operand_p = false;
44859 d.testing_p = false;
44860
44861 if (full_interleave)
44862 {
44863 /* For SSE2, we used an full interleave, so the desired
44864 results are in the even elements. */
44865 for (i = 0; i < 32; ++i)
44866 d.perm[i] = i * 2;
44867 }
44868 else
44869 {
44870 /* For AVX, the interleave used above was not cross-lane. So the
44871 extraction is evens but with the second and third quarter swapped.
44872 Happily, that is even one insn shorter than even extraction. */
44873 for (i = 0; i < 32; ++i)
44874 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
44875 }
44876
44877 ok = ix86_expand_vec_perm_const_1 (&d);
44878 gcc_assert (ok);
44879
44880 set_unique_reg_note (get_last_insn (), REG_EQUAL,
44881 gen_rtx_fmt_ee (code, qimode, op1, op2));
44882 }
44883
44884 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
44885 if op is CONST_VECTOR with all odd elements equal to their
44886 preceding element. */
44887
44888 static bool
44889 const_vector_equal_evenodd_p (rtx op)
44890 {
44891 enum machine_mode mode = GET_MODE (op);
44892 int i, nunits = GET_MODE_NUNITS (mode);
44893 if (GET_CODE (op) != CONST_VECTOR
44894 || nunits != CONST_VECTOR_NUNITS (op))
44895 return false;
44896 for (i = 0; i < nunits; i += 2)
44897 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
44898 return false;
44899 return true;
44900 }
44901
44902 void
44903 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
44904 bool uns_p, bool odd_p)
44905 {
44906 enum machine_mode mode = GET_MODE (op1);
44907 enum machine_mode wmode = GET_MODE (dest);
44908 rtx x;
44909 rtx orig_op1 = op1, orig_op2 = op2;
44910
44911 if (!nonimmediate_operand (op1, mode))
44912 op1 = force_reg (mode, op1);
44913 if (!nonimmediate_operand (op2, mode))
44914 op2 = force_reg (mode, op2);
44915
44916 /* We only play even/odd games with vectors of SImode. */
44917 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
44918
44919 /* If we're looking for the odd results, shift those members down to
44920 the even slots. For some cpus this is faster than a PSHUFD. */
44921 if (odd_p)
44922 {
44923 /* For XOP use vpmacsdqh, but only for smult, as it is only
44924 signed. */
44925 if (TARGET_XOP && mode == V4SImode && !uns_p)
44926 {
44927 x = force_reg (wmode, CONST0_RTX (wmode));
44928 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
44929 return;
44930 }
44931
44932 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
44933 if (!const_vector_equal_evenodd_p (orig_op1))
44934 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
44935 x, NULL, 1, OPTAB_DIRECT);
44936 if (!const_vector_equal_evenodd_p (orig_op2))
44937 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
44938 x, NULL, 1, OPTAB_DIRECT);
44939 op1 = gen_lowpart (mode, op1);
44940 op2 = gen_lowpart (mode, op2);
44941 }
44942
44943 if (mode == V16SImode)
44944 {
44945 if (uns_p)
44946 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
44947 else
44948 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
44949 }
44950 else if (mode == V8SImode)
44951 {
44952 if (uns_p)
44953 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
44954 else
44955 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
44956 }
44957 else if (uns_p)
44958 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
44959 else if (TARGET_SSE4_1)
44960 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
44961 else
44962 {
44963 rtx s1, s2, t0, t1, t2;
44964
44965 /* The easiest way to implement this without PMULDQ is to go through
44966 the motions as if we are performing a full 64-bit multiply. With
44967 the exception that we need to do less shuffling of the elements. */
44968
44969 /* Compute the sign-extension, aka highparts, of the two operands. */
44970 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
44971 op1, pc_rtx, pc_rtx);
44972 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
44973 op2, pc_rtx, pc_rtx);
44974
44975 /* Multiply LO(A) * HI(B), and vice-versa. */
44976 t1 = gen_reg_rtx (wmode);
44977 t2 = gen_reg_rtx (wmode);
44978 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
44979 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
44980
44981 /* Multiply LO(A) * LO(B). */
44982 t0 = gen_reg_rtx (wmode);
44983 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
44984
44985 /* Combine and shift the highparts into place. */
44986 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
44987 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
44988 1, OPTAB_DIRECT);
44989
44990 /* Combine high and low parts. */
44991 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
44992 return;
44993 }
44994 emit_insn (x);
44995 }
44996
44997 void
44998 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
44999 bool uns_p, bool high_p)
45000 {
45001 enum machine_mode wmode = GET_MODE (dest);
45002 enum machine_mode mode = GET_MODE (op1);
45003 rtx t1, t2, t3, t4, mask;
45004
45005 switch (mode)
45006 {
45007 case V4SImode:
45008 t1 = gen_reg_rtx (mode);
45009 t2 = gen_reg_rtx (mode);
45010 if (TARGET_XOP && !uns_p)
45011 {
45012 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
45013 shuffle the elements once so that all elements are in the right
45014 place for immediate use: { A C B D }. */
45015 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
45016 const1_rtx, GEN_INT (3)));
45017 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
45018 const1_rtx, GEN_INT (3)));
45019 }
45020 else
45021 {
45022 /* Put the elements into place for the multiply. */
45023 ix86_expand_vec_interleave (t1, op1, op1, high_p);
45024 ix86_expand_vec_interleave (t2, op2, op2, high_p);
45025 high_p = false;
45026 }
45027 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
45028 break;
45029
45030 case V8SImode:
45031 /* Shuffle the elements between the lanes. After this we
45032 have { A B E F | C D G H } for each operand. */
45033 t1 = gen_reg_rtx (V4DImode);
45034 t2 = gen_reg_rtx (V4DImode);
45035 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
45036 const0_rtx, const2_rtx,
45037 const1_rtx, GEN_INT (3)));
45038 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
45039 const0_rtx, const2_rtx,
45040 const1_rtx, GEN_INT (3)));
45041
45042 /* Shuffle the elements within the lanes. After this we
45043 have { A A B B | C C D D } or { E E F F | G G H H }. */
45044 t3 = gen_reg_rtx (V8SImode);
45045 t4 = gen_reg_rtx (V8SImode);
45046 mask = GEN_INT (high_p
45047 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
45048 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
45049 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
45050 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
45051
45052 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
45053 break;
45054
45055 case V8HImode:
45056 case V16HImode:
45057 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
45058 uns_p, OPTAB_DIRECT);
45059 t2 = expand_binop (mode,
45060 uns_p ? umul_highpart_optab : smul_highpart_optab,
45061 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
45062 gcc_assert (t1 && t2);
45063
45064 t3 = gen_reg_rtx (mode);
45065 ix86_expand_vec_interleave (t3, t1, t2, high_p);
45066 emit_move_insn (dest, gen_lowpart (wmode, t3));
45067 break;
45068
45069 case V16QImode:
45070 case V32QImode:
45071 t1 = gen_reg_rtx (wmode);
45072 t2 = gen_reg_rtx (wmode);
45073 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
45074 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
45075
45076 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
45077 break;
45078
45079 default:
45080 gcc_unreachable ();
45081 }
45082 }
45083
45084 void
45085 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
45086 {
45087 rtx res_1, res_2, res_3, res_4;
45088
45089 res_1 = gen_reg_rtx (V4SImode);
45090 res_2 = gen_reg_rtx (V4SImode);
45091 res_3 = gen_reg_rtx (V2DImode);
45092 res_4 = gen_reg_rtx (V2DImode);
45093 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
45094 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
45095
45096 /* Move the results in element 2 down to element 1; we don't care
45097 what goes in elements 2 and 3. Then we can merge the parts
45098 back together with an interleave.
45099
45100 Note that two other sequences were tried:
45101 (1) Use interleaves at the start instead of psrldq, which allows
45102 us to use a single shufps to merge things back at the end.
45103 (2) Use shufps here to combine the two vectors, then pshufd to
45104 put the elements in the correct order.
45105 In both cases the cost of the reformatting stall was too high
45106 and the overall sequence slower. */
45107
45108 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
45109 const0_rtx, const2_rtx,
45110 const0_rtx, const0_rtx));
45111 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
45112 const0_rtx, const2_rtx,
45113 const0_rtx, const0_rtx));
45114 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
45115
45116 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
45117 }
45118
45119 void
45120 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
45121 {
45122 enum machine_mode mode = GET_MODE (op0);
45123 rtx t1, t2, t3, t4, t5, t6;
45124
45125 if (TARGET_XOP && mode == V2DImode)
45126 {
45127 /* op1: A,B,C,D, op2: E,F,G,H */
45128 op1 = gen_lowpart (V4SImode, op1);
45129 op2 = gen_lowpart (V4SImode, op2);
45130
45131 t1 = gen_reg_rtx (V4SImode);
45132 t2 = gen_reg_rtx (V4SImode);
45133 t3 = gen_reg_rtx (V2DImode);
45134 t4 = gen_reg_rtx (V2DImode);
45135
45136 /* t1: B,A,D,C */
45137 emit_insn (gen_sse2_pshufd_1 (t1, op1,
45138 GEN_INT (1),
45139 GEN_INT (0),
45140 GEN_INT (3),
45141 GEN_INT (2)));
45142
45143 /* t2: (B*E),(A*F),(D*G),(C*H) */
45144 emit_insn (gen_mulv4si3 (t2, t1, op2));
45145
45146 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
45147 emit_insn (gen_xop_phadddq (t3, t2));
45148
45149 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
45150 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
45151
45152 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
45153 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
45154 }
45155 else
45156 {
45157 enum machine_mode nmode;
45158 rtx (*umul) (rtx, rtx, rtx);
45159
45160 if (mode == V2DImode)
45161 {
45162 umul = gen_vec_widen_umult_even_v4si;
45163 nmode = V4SImode;
45164 }
45165 else if (mode == V4DImode)
45166 {
45167 umul = gen_vec_widen_umult_even_v8si;
45168 nmode = V8SImode;
45169 }
45170 else if (mode == V8DImode)
45171 {
45172 umul = gen_vec_widen_umult_even_v16si;
45173 nmode = V16SImode;
45174 }
45175 else
45176 gcc_unreachable ();
45177
45178
45179 /* Multiply low parts. */
45180 t1 = gen_reg_rtx (mode);
45181 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
45182
45183 /* Shift input vectors right 32 bits so we can multiply high parts. */
45184 t6 = GEN_INT (32);
45185 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
45186 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
45187
45188 /* Multiply high parts by low parts. */
45189 t4 = gen_reg_rtx (mode);
45190 t5 = gen_reg_rtx (mode);
45191 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
45192 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
45193
45194 /* Combine and shift the highparts back. */
45195 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
45196 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
45197
45198 /* Combine high and low parts. */
45199 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
45200 }
45201
45202 set_unique_reg_note (get_last_insn (), REG_EQUAL,
45203 gen_rtx_MULT (mode, op1, op2));
45204 }
45205
45206 /* Calculate integer abs() using only SSE2 instructions. */
45207
45208 void
45209 ix86_expand_sse2_abs (rtx target, rtx input)
45210 {
45211 enum machine_mode mode = GET_MODE (target);
45212 rtx tmp0, tmp1, x;
45213
45214 switch (mode)
45215 {
45216 /* For 32-bit signed integer X, the best way to calculate the absolute
45217 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
45218 case V4SImode:
45219 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
45220 GEN_INT (GET_MODE_BITSIZE
45221 (GET_MODE_INNER (mode)) - 1),
45222 NULL, 0, OPTAB_DIRECT);
45223 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
45224 NULL, 0, OPTAB_DIRECT);
45225 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
45226 target, 0, OPTAB_DIRECT);
45227 break;
45228
45229 /* For 16-bit signed integer X, the best way to calculate the absolute
45230 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
45231 case V8HImode:
45232 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45233
45234 x = expand_simple_binop (mode, SMAX, tmp0, input,
45235 target, 0, OPTAB_DIRECT);
45236 break;
45237
45238 /* For 8-bit signed integer X, the best way to calculate the absolute
45239 value of X is min ((unsigned char) X, (unsigned char) (-X)),
45240 as SSE2 provides the PMINUB insn. */
45241 case V16QImode:
45242 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45243
45244 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
45245 target, 0, OPTAB_DIRECT);
45246 break;
45247
45248 default:
45249 gcc_unreachable ();
45250 }
45251
45252 if (x != target)
45253 emit_move_insn (target, x);
45254 }
45255
45256 /* Expand an insert into a vector register through pinsr insn.
45257 Return true if successful. */
45258
45259 bool
45260 ix86_expand_pinsr (rtx *operands)
45261 {
45262 rtx dst = operands[0];
45263 rtx src = operands[3];
45264
45265 unsigned int size = INTVAL (operands[1]);
45266 unsigned int pos = INTVAL (operands[2]);
45267
45268 if (GET_CODE (dst) == SUBREG)
45269 {
45270 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
45271 dst = SUBREG_REG (dst);
45272 }
45273
45274 if (GET_CODE (src) == SUBREG)
45275 src = SUBREG_REG (src);
45276
45277 switch (GET_MODE (dst))
45278 {
45279 case V16QImode:
45280 case V8HImode:
45281 case V4SImode:
45282 case V2DImode:
45283 {
45284 enum machine_mode srcmode, dstmode;
45285 rtx (*pinsr)(rtx, rtx, rtx, rtx);
45286
45287 srcmode = mode_for_size (size, MODE_INT, 0);
45288
45289 switch (srcmode)
45290 {
45291 case QImode:
45292 if (!TARGET_SSE4_1)
45293 return false;
45294 dstmode = V16QImode;
45295 pinsr = gen_sse4_1_pinsrb;
45296 break;
45297
45298 case HImode:
45299 if (!TARGET_SSE2)
45300 return false;
45301 dstmode = V8HImode;
45302 pinsr = gen_sse2_pinsrw;
45303 break;
45304
45305 case SImode:
45306 if (!TARGET_SSE4_1)
45307 return false;
45308 dstmode = V4SImode;
45309 pinsr = gen_sse4_1_pinsrd;
45310 break;
45311
45312 case DImode:
45313 gcc_assert (TARGET_64BIT);
45314 if (!TARGET_SSE4_1)
45315 return false;
45316 dstmode = V2DImode;
45317 pinsr = gen_sse4_1_pinsrq;
45318 break;
45319
45320 default:
45321 return false;
45322 }
45323
45324 rtx d = dst;
45325 if (GET_MODE (dst) != dstmode)
45326 d = gen_reg_rtx (dstmode);
45327 src = gen_lowpart (srcmode, src);
45328
45329 pos /= size;
45330
45331 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
45332 GEN_INT (1 << pos)));
45333 if (d != dst)
45334 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
45335 return true;
45336 }
45337
45338 default:
45339 return false;
45340 }
45341 }
45342 \f
45343 /* This function returns the calling abi specific va_list type node.
45344 It returns the FNDECL specific va_list type. */
45345
45346 static tree
45347 ix86_fn_abi_va_list (tree fndecl)
45348 {
45349 if (!TARGET_64BIT)
45350 return va_list_type_node;
45351 gcc_assert (fndecl != NULL_TREE);
45352
45353 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
45354 return ms_va_list_type_node;
45355 else
45356 return sysv_va_list_type_node;
45357 }
45358
45359 /* Returns the canonical va_list type specified by TYPE. If there
45360 is no valid TYPE provided, it return NULL_TREE. */
45361
45362 static tree
45363 ix86_canonical_va_list_type (tree type)
45364 {
45365 tree wtype, htype;
45366
45367 /* Resolve references and pointers to va_list type. */
45368 if (TREE_CODE (type) == MEM_REF)
45369 type = TREE_TYPE (type);
45370 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
45371 type = TREE_TYPE (type);
45372 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
45373 type = TREE_TYPE (type);
45374
45375 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
45376 {
45377 wtype = va_list_type_node;
45378 gcc_assert (wtype != NULL_TREE);
45379 htype = type;
45380 if (TREE_CODE (wtype) == ARRAY_TYPE)
45381 {
45382 /* If va_list is an array type, the argument may have decayed
45383 to a pointer type, e.g. by being passed to another function.
45384 In that case, unwrap both types so that we can compare the
45385 underlying records. */
45386 if (TREE_CODE (htype) == ARRAY_TYPE
45387 || POINTER_TYPE_P (htype))
45388 {
45389 wtype = TREE_TYPE (wtype);
45390 htype = TREE_TYPE (htype);
45391 }
45392 }
45393 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45394 return va_list_type_node;
45395 wtype = sysv_va_list_type_node;
45396 gcc_assert (wtype != NULL_TREE);
45397 htype = type;
45398 if (TREE_CODE (wtype) == ARRAY_TYPE)
45399 {
45400 /* If va_list is an array type, the argument may have decayed
45401 to a pointer type, e.g. by being passed to another function.
45402 In that case, unwrap both types so that we can compare the
45403 underlying records. */
45404 if (TREE_CODE (htype) == ARRAY_TYPE
45405 || POINTER_TYPE_P (htype))
45406 {
45407 wtype = TREE_TYPE (wtype);
45408 htype = TREE_TYPE (htype);
45409 }
45410 }
45411 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45412 return sysv_va_list_type_node;
45413 wtype = ms_va_list_type_node;
45414 gcc_assert (wtype != NULL_TREE);
45415 htype = type;
45416 if (TREE_CODE (wtype) == ARRAY_TYPE)
45417 {
45418 /* If va_list is an array type, the argument may have decayed
45419 to a pointer type, e.g. by being passed to another function.
45420 In that case, unwrap both types so that we can compare the
45421 underlying records. */
45422 if (TREE_CODE (htype) == ARRAY_TYPE
45423 || POINTER_TYPE_P (htype))
45424 {
45425 wtype = TREE_TYPE (wtype);
45426 htype = TREE_TYPE (htype);
45427 }
45428 }
45429 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45430 return ms_va_list_type_node;
45431 return NULL_TREE;
45432 }
45433 return std_canonical_va_list_type (type);
45434 }
45435
45436 /* Iterate through the target-specific builtin types for va_list.
45437 IDX denotes the iterator, *PTREE is set to the result type of
45438 the va_list builtin, and *PNAME to its internal type.
45439 Returns zero if there is no element for this index, otherwise
45440 IDX should be increased upon the next call.
45441 Note, do not iterate a base builtin's name like __builtin_va_list.
45442 Used from c_common_nodes_and_builtins. */
45443
45444 static int
45445 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
45446 {
45447 if (TARGET_64BIT)
45448 {
45449 switch (idx)
45450 {
45451 default:
45452 break;
45453
45454 case 0:
45455 *ptree = ms_va_list_type_node;
45456 *pname = "__builtin_ms_va_list";
45457 return 1;
45458
45459 case 1:
45460 *ptree = sysv_va_list_type_node;
45461 *pname = "__builtin_sysv_va_list";
45462 return 1;
45463 }
45464 }
45465
45466 return 0;
45467 }
45468
45469 #undef TARGET_SCHED_DISPATCH
45470 #define TARGET_SCHED_DISPATCH has_dispatch
45471 #undef TARGET_SCHED_DISPATCH_DO
45472 #define TARGET_SCHED_DISPATCH_DO do_dispatch
45473 #undef TARGET_SCHED_REASSOCIATION_WIDTH
45474 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
45475 #undef TARGET_SCHED_REORDER
45476 #define TARGET_SCHED_REORDER ix86_sched_reorder
45477 #undef TARGET_SCHED_ADJUST_PRIORITY
45478 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
45479 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
45480 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
45481 ix86_dependencies_evaluation_hook
45482
45483 /* The size of the dispatch window is the total number of bytes of
45484 object code allowed in a window. */
45485 #define DISPATCH_WINDOW_SIZE 16
45486
45487 /* Number of dispatch windows considered for scheduling. */
45488 #define MAX_DISPATCH_WINDOWS 3
45489
45490 /* Maximum number of instructions in a window. */
45491 #define MAX_INSN 4
45492
45493 /* Maximum number of immediate operands in a window. */
45494 #define MAX_IMM 4
45495
45496 /* Maximum number of immediate bits allowed in a window. */
45497 #define MAX_IMM_SIZE 128
45498
45499 /* Maximum number of 32 bit immediates allowed in a window. */
45500 #define MAX_IMM_32 4
45501
45502 /* Maximum number of 64 bit immediates allowed in a window. */
45503 #define MAX_IMM_64 2
45504
45505 /* Maximum total of loads or prefetches allowed in a window. */
45506 #define MAX_LOAD 2
45507
45508 /* Maximum total of stores allowed in a window. */
45509 #define MAX_STORE 1
45510
45511 #undef BIG
45512 #define BIG 100
45513
45514
45515 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
45516 enum dispatch_group {
45517 disp_no_group = 0,
45518 disp_load,
45519 disp_store,
45520 disp_load_store,
45521 disp_prefetch,
45522 disp_imm,
45523 disp_imm_32,
45524 disp_imm_64,
45525 disp_branch,
45526 disp_cmp,
45527 disp_jcc,
45528 disp_last
45529 };
45530
45531 /* Number of allowable groups in a dispatch window. It is an array
45532 indexed by dispatch_group enum. 100 is used as a big number,
45533 because the number of these kind of operations does not have any
45534 effect in dispatch window, but we need them for other reasons in
45535 the table. */
45536 static unsigned int num_allowable_groups[disp_last] = {
45537 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
45538 };
45539
45540 char group_name[disp_last + 1][16] = {
45541 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
45542 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
45543 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
45544 };
45545
45546 /* Instruction path. */
45547 enum insn_path {
45548 no_path = 0,
45549 path_single, /* Single micro op. */
45550 path_double, /* Double micro op. */
45551 path_multi, /* Instructions with more than 2 micro op.. */
45552 last_path
45553 };
45554
45555 /* sched_insn_info defines a window to the instructions scheduled in
45556 the basic block. It contains a pointer to the insn_info table and
45557 the instruction scheduled.
45558
45559 Windows are allocated for each basic block and are linked
45560 together. */
45561 typedef struct sched_insn_info_s {
45562 rtx insn;
45563 enum dispatch_group group;
45564 enum insn_path path;
45565 int byte_len;
45566 int imm_bytes;
45567 } sched_insn_info;
45568
45569 /* Linked list of dispatch windows. This is a two way list of
45570 dispatch windows of a basic block. It contains information about
45571 the number of uops in the window and the total number of
45572 instructions and of bytes in the object code for this dispatch
45573 window. */
45574 typedef struct dispatch_windows_s {
45575 int num_insn; /* Number of insn in the window. */
45576 int num_uops; /* Number of uops in the window. */
45577 int window_size; /* Number of bytes in the window. */
45578 int window_num; /* Window number between 0 or 1. */
45579 int num_imm; /* Number of immediates in an insn. */
45580 int num_imm_32; /* Number of 32 bit immediates in an insn. */
45581 int num_imm_64; /* Number of 64 bit immediates in an insn. */
45582 int imm_size; /* Total immediates in the window. */
45583 int num_loads; /* Total memory loads in the window. */
45584 int num_stores; /* Total memory stores in the window. */
45585 int violation; /* Violation exists in window. */
45586 sched_insn_info *window; /* Pointer to the window. */
45587 struct dispatch_windows_s *next;
45588 struct dispatch_windows_s *prev;
45589 } dispatch_windows;
45590
45591 /* Immediate valuse used in an insn. */
45592 typedef struct imm_info_s
45593 {
45594 int imm;
45595 int imm32;
45596 int imm64;
45597 } imm_info;
45598
45599 static dispatch_windows *dispatch_window_list;
45600 static dispatch_windows *dispatch_window_list1;
45601
45602 /* Get dispatch group of insn. */
45603
45604 static enum dispatch_group
45605 get_mem_group (rtx insn)
45606 {
45607 enum attr_memory memory;
45608
45609 if (INSN_CODE (insn) < 0)
45610 return disp_no_group;
45611 memory = get_attr_memory (insn);
45612 if (memory == MEMORY_STORE)
45613 return disp_store;
45614
45615 if (memory == MEMORY_LOAD)
45616 return disp_load;
45617
45618 if (memory == MEMORY_BOTH)
45619 return disp_load_store;
45620
45621 return disp_no_group;
45622 }
45623
45624 /* Return true if insn is a compare instruction. */
45625
45626 static bool
45627 is_cmp (rtx insn)
45628 {
45629 enum attr_type type;
45630
45631 type = get_attr_type (insn);
45632 return (type == TYPE_TEST
45633 || type == TYPE_ICMP
45634 || type == TYPE_FCMP
45635 || GET_CODE (PATTERN (insn)) == COMPARE);
45636 }
45637
45638 /* Return true if a dispatch violation encountered. */
45639
45640 static bool
45641 dispatch_violation (void)
45642 {
45643 if (dispatch_window_list->next)
45644 return dispatch_window_list->next->violation;
45645 return dispatch_window_list->violation;
45646 }
45647
45648 /* Return true if insn is a branch instruction. */
45649
45650 static bool
45651 is_branch (rtx insn)
45652 {
45653 return (CALL_P (insn) || JUMP_P (insn));
45654 }
45655
45656 /* Return true if insn is a prefetch instruction. */
45657
45658 static bool
45659 is_prefetch (rtx insn)
45660 {
45661 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
45662 }
45663
45664 /* This function initializes a dispatch window and the list container holding a
45665 pointer to the window. */
45666
45667 static void
45668 init_window (int window_num)
45669 {
45670 int i;
45671 dispatch_windows *new_list;
45672
45673 if (window_num == 0)
45674 new_list = dispatch_window_list;
45675 else
45676 new_list = dispatch_window_list1;
45677
45678 new_list->num_insn = 0;
45679 new_list->num_uops = 0;
45680 new_list->window_size = 0;
45681 new_list->next = NULL;
45682 new_list->prev = NULL;
45683 new_list->window_num = window_num;
45684 new_list->num_imm = 0;
45685 new_list->num_imm_32 = 0;
45686 new_list->num_imm_64 = 0;
45687 new_list->imm_size = 0;
45688 new_list->num_loads = 0;
45689 new_list->num_stores = 0;
45690 new_list->violation = false;
45691
45692 for (i = 0; i < MAX_INSN; i++)
45693 {
45694 new_list->window[i].insn = NULL;
45695 new_list->window[i].group = disp_no_group;
45696 new_list->window[i].path = no_path;
45697 new_list->window[i].byte_len = 0;
45698 new_list->window[i].imm_bytes = 0;
45699 }
45700 return;
45701 }
45702
45703 /* This function allocates and initializes a dispatch window and the
45704 list container holding a pointer to the window. */
45705
45706 static dispatch_windows *
45707 allocate_window (void)
45708 {
45709 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
45710 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
45711
45712 return new_list;
45713 }
45714
45715 /* This routine initializes the dispatch scheduling information. It
45716 initiates building dispatch scheduler tables and constructs the
45717 first dispatch window. */
45718
45719 static void
45720 init_dispatch_sched (void)
45721 {
45722 /* Allocate a dispatch list and a window. */
45723 dispatch_window_list = allocate_window ();
45724 dispatch_window_list1 = allocate_window ();
45725 init_window (0);
45726 init_window (1);
45727 }
45728
45729 /* This function returns true if a branch is detected. End of a basic block
45730 does not have to be a branch, but here we assume only branches end a
45731 window. */
45732
45733 static bool
45734 is_end_basic_block (enum dispatch_group group)
45735 {
45736 return group == disp_branch;
45737 }
45738
45739 /* This function is called when the end of a window processing is reached. */
45740
45741 static void
45742 process_end_window (void)
45743 {
45744 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
45745 if (dispatch_window_list->next)
45746 {
45747 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
45748 gcc_assert (dispatch_window_list->window_size
45749 + dispatch_window_list1->window_size <= 48);
45750 init_window (1);
45751 }
45752 init_window (0);
45753 }
45754
45755 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
45756 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
45757 for 48 bytes of instructions. Note that these windows are not dispatch
45758 windows that their sizes are DISPATCH_WINDOW_SIZE. */
45759
45760 static dispatch_windows *
45761 allocate_next_window (int window_num)
45762 {
45763 if (window_num == 0)
45764 {
45765 if (dispatch_window_list->next)
45766 init_window (1);
45767 init_window (0);
45768 return dispatch_window_list;
45769 }
45770
45771 dispatch_window_list->next = dispatch_window_list1;
45772 dispatch_window_list1->prev = dispatch_window_list;
45773
45774 return dispatch_window_list1;
45775 }
45776
45777 /* Increment the number of immediate operands of an instruction. */
45778
45779 static int
45780 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
45781 {
45782 if (*in_rtx == 0)
45783 return 0;
45784
45785 switch ( GET_CODE (*in_rtx))
45786 {
45787 case CONST:
45788 case SYMBOL_REF:
45789 case CONST_INT:
45790 (imm_values->imm)++;
45791 if (x86_64_immediate_operand (*in_rtx, SImode))
45792 (imm_values->imm32)++;
45793 else
45794 (imm_values->imm64)++;
45795 break;
45796
45797 case CONST_DOUBLE:
45798 (imm_values->imm)++;
45799 (imm_values->imm64)++;
45800 break;
45801
45802 case CODE_LABEL:
45803 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
45804 {
45805 (imm_values->imm)++;
45806 (imm_values->imm32)++;
45807 }
45808 break;
45809
45810 default:
45811 break;
45812 }
45813
45814 return 0;
45815 }
45816
45817 /* Compute number of immediate operands of an instruction. */
45818
45819 static void
45820 find_constant (rtx in_rtx, imm_info *imm_values)
45821 {
45822 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
45823 (rtx_function) find_constant_1, (void *) imm_values);
45824 }
45825
45826 /* Return total size of immediate operands of an instruction along with number
45827 of corresponding immediate-operands. It initializes its parameters to zero
45828 befor calling FIND_CONSTANT.
45829 INSN is the input instruction. IMM is the total of immediates.
45830 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
45831 bit immediates. */
45832
45833 static int
45834 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
45835 {
45836 imm_info imm_values = {0, 0, 0};
45837
45838 find_constant (insn, &imm_values);
45839 *imm = imm_values.imm;
45840 *imm32 = imm_values.imm32;
45841 *imm64 = imm_values.imm64;
45842 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
45843 }
45844
45845 /* This function indicates if an operand of an instruction is an
45846 immediate. */
45847
45848 static bool
45849 has_immediate (rtx insn)
45850 {
45851 int num_imm_operand;
45852 int num_imm32_operand;
45853 int num_imm64_operand;
45854
45855 if (insn)
45856 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
45857 &num_imm64_operand);
45858 return false;
45859 }
45860
45861 /* Return single or double path for instructions. */
45862
45863 static enum insn_path
45864 get_insn_path (rtx insn)
45865 {
45866 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
45867
45868 if ((int)path == 0)
45869 return path_single;
45870
45871 if ((int)path == 1)
45872 return path_double;
45873
45874 return path_multi;
45875 }
45876
45877 /* Return insn dispatch group. */
45878
45879 static enum dispatch_group
45880 get_insn_group (rtx insn)
45881 {
45882 enum dispatch_group group = get_mem_group (insn);
45883 if (group)
45884 return group;
45885
45886 if (is_branch (insn))
45887 return disp_branch;
45888
45889 if (is_cmp (insn))
45890 return disp_cmp;
45891
45892 if (has_immediate (insn))
45893 return disp_imm;
45894
45895 if (is_prefetch (insn))
45896 return disp_prefetch;
45897
45898 return disp_no_group;
45899 }
45900
45901 /* Count number of GROUP restricted instructions in a dispatch
45902 window WINDOW_LIST. */
45903
45904 static int
45905 count_num_restricted (rtx insn, dispatch_windows *window_list)
45906 {
45907 enum dispatch_group group = get_insn_group (insn);
45908 int imm_size;
45909 int num_imm_operand;
45910 int num_imm32_operand;
45911 int num_imm64_operand;
45912
45913 if (group == disp_no_group)
45914 return 0;
45915
45916 if (group == disp_imm)
45917 {
45918 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
45919 &num_imm64_operand);
45920 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
45921 || num_imm_operand + window_list->num_imm > MAX_IMM
45922 || (num_imm32_operand > 0
45923 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
45924 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
45925 || (num_imm64_operand > 0
45926 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
45927 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
45928 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
45929 && num_imm64_operand > 0
45930 && ((window_list->num_imm_64 > 0
45931 && window_list->num_insn >= 2)
45932 || window_list->num_insn >= 3)))
45933 return BIG;
45934
45935 return 1;
45936 }
45937
45938 if ((group == disp_load_store
45939 && (window_list->num_loads >= MAX_LOAD
45940 || window_list->num_stores >= MAX_STORE))
45941 || ((group == disp_load
45942 || group == disp_prefetch)
45943 && window_list->num_loads >= MAX_LOAD)
45944 || (group == disp_store
45945 && window_list->num_stores >= MAX_STORE))
45946 return BIG;
45947
45948 return 1;
45949 }
45950
45951 /* This function returns true if insn satisfies dispatch rules on the
45952 last window scheduled. */
45953
45954 static bool
45955 fits_dispatch_window (rtx insn)
45956 {
45957 dispatch_windows *window_list = dispatch_window_list;
45958 dispatch_windows *window_list_next = dispatch_window_list->next;
45959 unsigned int num_restrict;
45960 enum dispatch_group group = get_insn_group (insn);
45961 enum insn_path path = get_insn_path (insn);
45962 int sum;
45963
45964 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
45965 instructions should be given the lowest priority in the
45966 scheduling process in Haifa scheduler to make sure they will be
45967 scheduled in the same dispatch window as the reference to them. */
45968 if (group == disp_jcc || group == disp_cmp)
45969 return false;
45970
45971 /* Check nonrestricted. */
45972 if (group == disp_no_group || group == disp_branch)
45973 return true;
45974
45975 /* Get last dispatch window. */
45976 if (window_list_next)
45977 window_list = window_list_next;
45978
45979 if (window_list->window_num == 1)
45980 {
45981 sum = window_list->prev->window_size + window_list->window_size;
45982
45983 if (sum == 32
45984 || (min_insn_size (insn) + sum) >= 48)
45985 /* Window 1 is full. Go for next window. */
45986 return true;
45987 }
45988
45989 num_restrict = count_num_restricted (insn, window_list);
45990
45991 if (num_restrict > num_allowable_groups[group])
45992 return false;
45993
45994 /* See if it fits in the first window. */
45995 if (window_list->window_num == 0)
45996 {
45997 /* The first widow should have only single and double path
45998 uops. */
45999 if (path == path_double
46000 && (window_list->num_uops + 2) > MAX_INSN)
46001 return false;
46002 else if (path != path_single)
46003 return false;
46004 }
46005 return true;
46006 }
46007
46008 /* Add an instruction INSN with NUM_UOPS micro-operations to the
46009 dispatch window WINDOW_LIST. */
46010
46011 static void
46012 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
46013 {
46014 int byte_len = min_insn_size (insn);
46015 int num_insn = window_list->num_insn;
46016 int imm_size;
46017 sched_insn_info *window = window_list->window;
46018 enum dispatch_group group = get_insn_group (insn);
46019 enum insn_path path = get_insn_path (insn);
46020 int num_imm_operand;
46021 int num_imm32_operand;
46022 int num_imm64_operand;
46023
46024 if (!window_list->violation && group != disp_cmp
46025 && !fits_dispatch_window (insn))
46026 window_list->violation = true;
46027
46028 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46029 &num_imm64_operand);
46030
46031 /* Initialize window with new instruction. */
46032 window[num_insn].insn = insn;
46033 window[num_insn].byte_len = byte_len;
46034 window[num_insn].group = group;
46035 window[num_insn].path = path;
46036 window[num_insn].imm_bytes = imm_size;
46037
46038 window_list->window_size += byte_len;
46039 window_list->num_insn = num_insn + 1;
46040 window_list->num_uops = window_list->num_uops + num_uops;
46041 window_list->imm_size += imm_size;
46042 window_list->num_imm += num_imm_operand;
46043 window_list->num_imm_32 += num_imm32_operand;
46044 window_list->num_imm_64 += num_imm64_operand;
46045
46046 if (group == disp_store)
46047 window_list->num_stores += 1;
46048 else if (group == disp_load
46049 || group == disp_prefetch)
46050 window_list->num_loads += 1;
46051 else if (group == disp_load_store)
46052 {
46053 window_list->num_stores += 1;
46054 window_list->num_loads += 1;
46055 }
46056 }
46057
46058 /* Adds a scheduled instruction, INSN, to the current dispatch window.
46059 If the total bytes of instructions or the number of instructions in
46060 the window exceed allowable, it allocates a new window. */
46061
46062 static void
46063 add_to_dispatch_window (rtx insn)
46064 {
46065 int byte_len;
46066 dispatch_windows *window_list;
46067 dispatch_windows *next_list;
46068 dispatch_windows *window0_list;
46069 enum insn_path path;
46070 enum dispatch_group insn_group;
46071 bool insn_fits;
46072 int num_insn;
46073 int num_uops;
46074 int window_num;
46075 int insn_num_uops;
46076 int sum;
46077
46078 if (INSN_CODE (insn) < 0)
46079 return;
46080
46081 byte_len = min_insn_size (insn);
46082 window_list = dispatch_window_list;
46083 next_list = window_list->next;
46084 path = get_insn_path (insn);
46085 insn_group = get_insn_group (insn);
46086
46087 /* Get the last dispatch window. */
46088 if (next_list)
46089 window_list = dispatch_window_list->next;
46090
46091 if (path == path_single)
46092 insn_num_uops = 1;
46093 else if (path == path_double)
46094 insn_num_uops = 2;
46095 else
46096 insn_num_uops = (int) path;
46097
46098 /* If current window is full, get a new window.
46099 Window number zero is full, if MAX_INSN uops are scheduled in it.
46100 Window number one is full, if window zero's bytes plus window
46101 one's bytes is 32, or if the bytes of the new instruction added
46102 to the total makes it greater than 48, or it has already MAX_INSN
46103 instructions in it. */
46104 num_insn = window_list->num_insn;
46105 num_uops = window_list->num_uops;
46106 window_num = window_list->window_num;
46107 insn_fits = fits_dispatch_window (insn);
46108
46109 if (num_insn >= MAX_INSN
46110 || num_uops + insn_num_uops > MAX_INSN
46111 || !(insn_fits))
46112 {
46113 window_num = ~window_num & 1;
46114 window_list = allocate_next_window (window_num);
46115 }
46116
46117 if (window_num == 0)
46118 {
46119 add_insn_window (insn, window_list, insn_num_uops);
46120 if (window_list->num_insn >= MAX_INSN
46121 && insn_group == disp_branch)
46122 {
46123 process_end_window ();
46124 return;
46125 }
46126 }
46127 else if (window_num == 1)
46128 {
46129 window0_list = window_list->prev;
46130 sum = window0_list->window_size + window_list->window_size;
46131 if (sum == 32
46132 || (byte_len + sum) >= 48)
46133 {
46134 process_end_window ();
46135 window_list = dispatch_window_list;
46136 }
46137
46138 add_insn_window (insn, window_list, insn_num_uops);
46139 }
46140 else
46141 gcc_unreachable ();
46142
46143 if (is_end_basic_block (insn_group))
46144 {
46145 /* End of basic block is reached do end-basic-block process. */
46146 process_end_window ();
46147 return;
46148 }
46149 }
46150
46151 /* Print the dispatch window, WINDOW_NUM, to FILE. */
46152
46153 DEBUG_FUNCTION static void
46154 debug_dispatch_window_file (FILE *file, int window_num)
46155 {
46156 dispatch_windows *list;
46157 int i;
46158
46159 if (window_num == 0)
46160 list = dispatch_window_list;
46161 else
46162 list = dispatch_window_list1;
46163
46164 fprintf (file, "Window #%d:\n", list->window_num);
46165 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
46166 list->num_insn, list->num_uops, list->window_size);
46167 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46168 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
46169
46170 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
46171 list->num_stores);
46172 fprintf (file, " insn info:\n");
46173
46174 for (i = 0; i < MAX_INSN; i++)
46175 {
46176 if (!list->window[i].insn)
46177 break;
46178 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
46179 i, group_name[list->window[i].group],
46180 i, (void *)list->window[i].insn,
46181 i, list->window[i].path,
46182 i, list->window[i].byte_len,
46183 i, list->window[i].imm_bytes);
46184 }
46185 }
46186
46187 /* Print to stdout a dispatch window. */
46188
46189 DEBUG_FUNCTION void
46190 debug_dispatch_window (int window_num)
46191 {
46192 debug_dispatch_window_file (stdout, window_num);
46193 }
46194
46195 /* Print INSN dispatch information to FILE. */
46196
46197 DEBUG_FUNCTION static void
46198 debug_insn_dispatch_info_file (FILE *file, rtx insn)
46199 {
46200 int byte_len;
46201 enum insn_path path;
46202 enum dispatch_group group;
46203 int imm_size;
46204 int num_imm_operand;
46205 int num_imm32_operand;
46206 int num_imm64_operand;
46207
46208 if (INSN_CODE (insn) < 0)
46209 return;
46210
46211 byte_len = min_insn_size (insn);
46212 path = get_insn_path (insn);
46213 group = get_insn_group (insn);
46214 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46215 &num_imm64_operand);
46216
46217 fprintf (file, " insn info:\n");
46218 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
46219 group_name[group], path, byte_len);
46220 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46221 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
46222 }
46223
46224 /* Print to STDERR the status of the ready list with respect to
46225 dispatch windows. */
46226
46227 DEBUG_FUNCTION void
46228 debug_ready_dispatch (void)
46229 {
46230 int i;
46231 int no_ready = number_in_ready ();
46232
46233 fprintf (stdout, "Number of ready: %d\n", no_ready);
46234
46235 for (i = 0; i < no_ready; i++)
46236 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
46237 }
46238
46239 /* This routine is the driver of the dispatch scheduler. */
46240
46241 static void
46242 do_dispatch (rtx insn, int mode)
46243 {
46244 if (mode == DISPATCH_INIT)
46245 init_dispatch_sched ();
46246 else if (mode == ADD_TO_DISPATCH_WINDOW)
46247 add_to_dispatch_window (insn);
46248 }
46249
46250 /* Return TRUE if Dispatch Scheduling is supported. */
46251
46252 static bool
46253 has_dispatch (rtx insn, int action)
46254 {
46255 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
46256 && flag_dispatch_scheduler)
46257 switch (action)
46258 {
46259 default:
46260 return false;
46261
46262 case IS_DISPATCH_ON:
46263 return true;
46264 break;
46265
46266 case IS_CMP:
46267 return is_cmp (insn);
46268
46269 case DISPATCH_VIOLATION:
46270 return dispatch_violation ();
46271
46272 case FITS_DISPATCH_WINDOW:
46273 return fits_dispatch_window (insn);
46274 }
46275
46276 return false;
46277 }
46278
46279 /* Implementation of reassociation_width target hook used by
46280 reassoc phase to identify parallelism level in reassociated
46281 tree. Statements tree_code is passed in OPC. Arguments type
46282 is passed in MODE.
46283
46284 Currently parallel reassociation is enabled for Atom
46285 processors only and we set reassociation width to be 2
46286 because Atom may issue up to 2 instructions per cycle.
46287
46288 Return value should be fixed if parallel reassociation is
46289 enabled for other processors. */
46290
46291 static int
46292 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
46293 enum machine_mode mode)
46294 {
46295 int res = 1;
46296
46297 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
46298 res = 2;
46299 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
46300 res = 2;
46301
46302 return res;
46303 }
46304
46305 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
46306 place emms and femms instructions. */
46307
46308 static enum machine_mode
46309 ix86_preferred_simd_mode (enum machine_mode mode)
46310 {
46311 if (!TARGET_SSE)
46312 return word_mode;
46313
46314 switch (mode)
46315 {
46316 case QImode:
46317 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
46318 case HImode:
46319 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
46320 case SImode:
46321 return TARGET_AVX512F ? V16SImode :
46322 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
46323 case DImode:
46324 return TARGET_AVX512F ? V8DImode :
46325 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
46326
46327 case SFmode:
46328 if (TARGET_AVX512F)
46329 return V16SFmode;
46330 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46331 return V8SFmode;
46332 else
46333 return V4SFmode;
46334
46335 case DFmode:
46336 if (!TARGET_VECTORIZE_DOUBLE)
46337 return word_mode;
46338 else if (TARGET_AVX512F)
46339 return V8DFmode;
46340 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46341 return V4DFmode;
46342 else if (TARGET_SSE2)
46343 return V2DFmode;
46344 /* FALLTHRU */
46345
46346 default:
46347 return word_mode;
46348 }
46349 }
46350
46351 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
46352 vectors. If AVX512F is enabled then try vectorizing with 512bit,
46353 256bit and 128bit vectors. */
46354
46355 static unsigned int
46356 ix86_autovectorize_vector_sizes (void)
46357 {
46358 return TARGET_AVX512F ? 64 | 32 | 16 :
46359 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
46360 }
46361
46362 \f
46363
46364 /* Return class of registers which could be used for pseudo of MODE
46365 and of class RCLASS for spilling instead of memory. Return NO_REGS
46366 if it is not possible or non-profitable. */
46367 static reg_class_t
46368 ix86_spill_class (reg_class_t rclass, enum machine_mode mode)
46369 {
46370 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
46371 && (mode == SImode || (TARGET_64BIT && mode == DImode))
46372 && INTEGER_CLASS_P (rclass))
46373 return ALL_SSE_REGS;
46374 return NO_REGS;
46375 }
46376
46377 /* Implement targetm.vectorize.init_cost. */
46378
46379 static void *
46380 ix86_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
46381 {
46382 unsigned *cost = XNEWVEC (unsigned, 3);
46383 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
46384 return cost;
46385 }
46386
46387 /* Implement targetm.vectorize.add_stmt_cost. */
46388
46389 static unsigned
46390 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
46391 struct _stmt_vec_info *stmt_info, int misalign,
46392 enum vect_cost_model_location where)
46393 {
46394 unsigned *cost = (unsigned *) data;
46395 unsigned retval = 0;
46396
46397 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
46398 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
46399
46400 /* Statements in an inner loop relative to the loop being
46401 vectorized are weighted more heavily. The value here is
46402 arbitrary and could potentially be improved with analysis. */
46403 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
46404 count *= 50; /* FIXME. */
46405
46406 retval = (unsigned) (count * stmt_cost);
46407 cost[where] += retval;
46408
46409 return retval;
46410 }
46411
46412 /* Implement targetm.vectorize.finish_cost. */
46413
46414 static void
46415 ix86_finish_cost (void *data, unsigned *prologue_cost,
46416 unsigned *body_cost, unsigned *epilogue_cost)
46417 {
46418 unsigned *cost = (unsigned *) data;
46419 *prologue_cost = cost[vect_prologue];
46420 *body_cost = cost[vect_body];
46421 *epilogue_cost = cost[vect_epilogue];
46422 }
46423
46424 /* Implement targetm.vectorize.destroy_cost_data. */
46425
46426 static void
46427 ix86_destroy_cost_data (void *data)
46428 {
46429 free (data);
46430 }
46431
46432 /* Validate target specific memory model bits in VAL. */
46433
46434 static unsigned HOST_WIDE_INT
46435 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
46436 {
46437 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
46438 bool strong;
46439
46440 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
46441 |MEMMODEL_MASK)
46442 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
46443 {
46444 warning (OPT_Winvalid_memory_model,
46445 "Unknown architecture specific memory model");
46446 return MEMMODEL_SEQ_CST;
46447 }
46448 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
46449 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
46450 {
46451 warning (OPT_Winvalid_memory_model,
46452 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
46453 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
46454 }
46455 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
46456 {
46457 warning (OPT_Winvalid_memory_model,
46458 "HLE_RELEASE not used with RELEASE or stronger memory model");
46459 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
46460 }
46461 return val;
46462 }
46463
46464 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
46465 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
46466 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
46467 or number of vecsize_mangle variants that should be emitted. */
46468
46469 static int
46470 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
46471 struct cgraph_simd_clone *clonei,
46472 tree base_type, int num)
46473 {
46474 int ret = 1;
46475
46476 if (clonei->simdlen
46477 && (clonei->simdlen < 2
46478 || clonei->simdlen > 16
46479 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
46480 {
46481 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46482 "unsupported simdlen %d", clonei->simdlen);
46483 return 0;
46484 }
46485
46486 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
46487 if (TREE_CODE (ret_type) != VOID_TYPE)
46488 switch (TYPE_MODE (ret_type))
46489 {
46490 case QImode:
46491 case HImode:
46492 case SImode:
46493 case DImode:
46494 case SFmode:
46495 case DFmode:
46496 /* case SCmode: */
46497 /* case DCmode: */
46498 break;
46499 default:
46500 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46501 "unsupported return type %qT for simd\n", ret_type);
46502 return 0;
46503 }
46504
46505 tree t;
46506 int i;
46507
46508 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
46509 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
46510 switch (TYPE_MODE (TREE_TYPE (t)))
46511 {
46512 case QImode:
46513 case HImode:
46514 case SImode:
46515 case DImode:
46516 case SFmode:
46517 case DFmode:
46518 /* case SCmode: */
46519 /* case DCmode: */
46520 break;
46521 default:
46522 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46523 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
46524 return 0;
46525 }
46526
46527 if (clonei->cilk_elemental)
46528 {
46529 /* Parse here processor clause. If not present, default to 'b'. */
46530 clonei->vecsize_mangle = 'b';
46531 }
46532 else if (!TREE_PUBLIC (node->decl))
46533 {
46534 /* If the function isn't exported, we can pick up just one ISA
46535 for the clones. */
46536 if (TARGET_AVX2)
46537 clonei->vecsize_mangle = 'd';
46538 else if (TARGET_AVX)
46539 clonei->vecsize_mangle = 'c';
46540 else
46541 clonei->vecsize_mangle = 'b';
46542 ret = 1;
46543 }
46544 else
46545 {
46546 clonei->vecsize_mangle = "bcd"[num];
46547 ret = 3;
46548 }
46549 switch (clonei->vecsize_mangle)
46550 {
46551 case 'b':
46552 clonei->vecsize_int = 128;
46553 clonei->vecsize_float = 128;
46554 break;
46555 case 'c':
46556 clonei->vecsize_int = 128;
46557 clonei->vecsize_float = 256;
46558 break;
46559 case 'd':
46560 clonei->vecsize_int = 256;
46561 clonei->vecsize_float = 256;
46562 break;
46563 }
46564 if (clonei->simdlen == 0)
46565 {
46566 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
46567 clonei->simdlen = clonei->vecsize_int;
46568 else
46569 clonei->simdlen = clonei->vecsize_float;
46570 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
46571 if (clonei->simdlen > 16)
46572 clonei->simdlen = 16;
46573 }
46574 return ret;
46575 }
46576
46577 /* Add target attribute to SIMD clone NODE if needed. */
46578
46579 static void
46580 ix86_simd_clone_adjust (struct cgraph_node *node)
46581 {
46582 const char *str = NULL;
46583 gcc_assert (node->decl == cfun->decl);
46584 switch (node->simdclone->vecsize_mangle)
46585 {
46586 case 'b':
46587 if (!TARGET_SSE2)
46588 str = "sse2";
46589 break;
46590 case 'c':
46591 if (!TARGET_AVX)
46592 str = "avx";
46593 break;
46594 case 'd':
46595 if (!TARGET_AVX2)
46596 str = "avx2";
46597 break;
46598 default:
46599 gcc_unreachable ();
46600 }
46601 if (str == NULL)
46602 return;
46603 push_cfun (NULL);
46604 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
46605 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
46606 gcc_assert (ok);
46607 pop_cfun ();
46608 ix86_previous_fndecl = NULL_TREE;
46609 ix86_set_current_function (node->decl);
46610 }
46611
46612 /* If SIMD clone NODE can't be used in a vectorized loop
46613 in current function, return -1, otherwise return a badness of using it
46614 (0 if it is most desirable from vecsize_mangle point of view, 1
46615 slightly less desirable, etc.). */
46616
46617 static int
46618 ix86_simd_clone_usable (struct cgraph_node *node)
46619 {
46620 switch (node->simdclone->vecsize_mangle)
46621 {
46622 case 'b':
46623 if (!TARGET_SSE2)
46624 return -1;
46625 if (!TARGET_AVX)
46626 return 0;
46627 return TARGET_AVX2 ? 2 : 1;
46628 case 'c':
46629 if (!TARGET_AVX)
46630 return -1;
46631 return TARGET_AVX2 ? 1 : 0;
46632 break;
46633 case 'd':
46634 if (!TARGET_AVX2)
46635 return -1;
46636 return 0;
46637 default:
46638 gcc_unreachable ();
46639 }
46640 }
46641
46642 /* This function gives out the number of memory references.
46643 This value determines the unrolling factor for
46644 bdver3 and bdver4 architectures. */
46645
46646 static int
46647 ix86_loop_memcount (rtx *x, unsigned *mem_count)
46648 {
46649 if (*x != NULL_RTX && MEM_P (*x))
46650 {
46651 enum machine_mode mode;
46652 unsigned int n_words;
46653
46654 mode = GET_MODE (*x);
46655 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
46656
46657 if (n_words > 4)
46658 (*mem_count)+=2;
46659 else
46660 (*mem_count)+=1;
46661 }
46662 return 0;
46663 }
46664
46665 /* This function adjusts the unroll factor based on
46666 the hardware capabilities. For ex, bdver3 has
46667 a loop buffer which makes unrolling of smaller
46668 loops less important. This function decides the
46669 unroll factor using number of memory references
46670 (value 32 is used) as a heuristic. */
46671
46672 static unsigned
46673 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
46674 {
46675 basic_block *bbs;
46676 rtx insn;
46677 unsigned i;
46678 unsigned mem_count = 0;
46679
46680 if (!TARGET_ADJUST_UNROLL)
46681 return nunroll;
46682
46683 /* Count the number of memory references within the loop body. */
46684 bbs = get_loop_body (loop);
46685 for (i = 0; i < loop->num_nodes; i++)
46686 {
46687 for (insn = BB_HEAD (bbs[i]); insn != BB_END (bbs[i]); insn = NEXT_INSN (insn))
46688 if (NONDEBUG_INSN_P (insn))
46689 for_each_rtx (&insn, (rtx_function) ix86_loop_memcount, &mem_count);
46690 }
46691 free (bbs);
46692
46693 if (mem_count && mem_count <=32)
46694 return 32/mem_count;
46695
46696 return nunroll;
46697 }
46698
46699
46700 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
46701
46702 static bool
46703 ix86_float_exceptions_rounding_supported_p (void)
46704 {
46705 /* For x87 floating point with standard excess precision handling,
46706 there is no adddf3 pattern (since x87 floating point only has
46707 XFmode operations) so the default hook implementation gets this
46708 wrong. */
46709 return TARGET_80387 || TARGET_SSE_MATH;
46710 }
46711
46712 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
46713
46714 static void
46715 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
46716 {
46717 if (!TARGET_80387 && !TARGET_SSE_MATH)
46718 return;
46719 tree exceptions_var = create_tmp_var (integer_type_node, NULL);
46720 if (TARGET_80387)
46721 {
46722 tree fenv_index_type = build_index_type (size_int (6));
46723 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
46724 tree fenv_var = create_tmp_var (fenv_type, NULL);
46725 mark_addressable (fenv_var);
46726 tree fenv_ptr = build_pointer_type (fenv_type);
46727 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
46728 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
46729 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
46730 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
46731 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
46732 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
46733 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
46734 tree hold_fnclex = build_call_expr (fnclex, 0);
46735 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
46736 hold_fnclex);
46737 *clear = build_call_expr (fnclex, 0);
46738 tree sw_var = create_tmp_var (short_unsigned_type_node, NULL);
46739 mark_addressable (sw_var);
46740 tree su_ptr = build_pointer_type (short_unsigned_type_node);
46741 tree sw_addr = build1 (ADDR_EXPR, su_ptr, sw_var);
46742 tree fnstsw_call = build_call_expr (fnstsw, 1, sw_addr);
46743 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
46744 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
46745 exceptions_var, exceptions_x87);
46746 *update = build2 (COMPOUND_EXPR, integer_type_node,
46747 fnstsw_call, update_mod);
46748 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
46749 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
46750 }
46751 if (TARGET_SSE_MATH)
46752 {
46753 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node, NULL);
46754 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node, NULL);
46755 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
46756 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
46757 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
46758 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
46759 mxcsr_orig_var, stmxcsr_hold_call);
46760 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
46761 mxcsr_orig_var,
46762 build_int_cst (unsigned_type_node, 0x1f80));
46763 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
46764 build_int_cst (unsigned_type_node, 0xffffffc0));
46765 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
46766 mxcsr_mod_var, hold_mod_val);
46767 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
46768 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
46769 hold_assign_orig, hold_assign_mod);
46770 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
46771 ldmxcsr_hold_call);
46772 if (*hold)
46773 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
46774 else
46775 *hold = hold_all;
46776 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
46777 if (*clear)
46778 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
46779 ldmxcsr_clear_call);
46780 else
46781 *clear = ldmxcsr_clear_call;
46782 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
46783 tree exceptions_sse = fold_convert (integer_type_node,
46784 stxmcsr_update_call);
46785 if (*update)
46786 {
46787 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
46788 exceptions_var, exceptions_sse);
46789 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
46790 exceptions_var, exceptions_mod);
46791 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
46792 exceptions_assign);
46793 }
46794 else
46795 *update = build2 (MODIFY_EXPR, integer_type_node,
46796 exceptions_var, exceptions_sse);
46797 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
46798 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
46799 ldmxcsr_update_call);
46800 }
46801 tree atomic_feraiseexcept
46802 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
46803 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
46804 1, exceptions_var);
46805 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
46806 atomic_feraiseexcept_call);
46807 }
46808
46809 /* Initialize the GCC target structure. */
46810 #undef TARGET_RETURN_IN_MEMORY
46811 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
46812
46813 #undef TARGET_LEGITIMIZE_ADDRESS
46814 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
46815
46816 #undef TARGET_ATTRIBUTE_TABLE
46817 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
46818 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
46819 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
46820 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46821 # undef TARGET_MERGE_DECL_ATTRIBUTES
46822 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
46823 #endif
46824
46825 #undef TARGET_COMP_TYPE_ATTRIBUTES
46826 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
46827
46828 #undef TARGET_INIT_BUILTINS
46829 #define TARGET_INIT_BUILTINS ix86_init_builtins
46830 #undef TARGET_BUILTIN_DECL
46831 #define TARGET_BUILTIN_DECL ix86_builtin_decl
46832 #undef TARGET_EXPAND_BUILTIN
46833 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
46834
46835 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
46836 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
46837 ix86_builtin_vectorized_function
46838
46839 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
46840 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
46841
46842 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
46843 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
46844
46845 #undef TARGET_VECTORIZE_BUILTIN_GATHER
46846 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
46847
46848 #undef TARGET_BUILTIN_RECIPROCAL
46849 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
46850
46851 #undef TARGET_ASM_FUNCTION_EPILOGUE
46852 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
46853
46854 #undef TARGET_ENCODE_SECTION_INFO
46855 #ifndef SUBTARGET_ENCODE_SECTION_INFO
46856 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
46857 #else
46858 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
46859 #endif
46860
46861 #undef TARGET_ASM_OPEN_PAREN
46862 #define TARGET_ASM_OPEN_PAREN ""
46863 #undef TARGET_ASM_CLOSE_PAREN
46864 #define TARGET_ASM_CLOSE_PAREN ""
46865
46866 #undef TARGET_ASM_BYTE_OP
46867 #define TARGET_ASM_BYTE_OP ASM_BYTE
46868
46869 #undef TARGET_ASM_ALIGNED_HI_OP
46870 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
46871 #undef TARGET_ASM_ALIGNED_SI_OP
46872 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
46873 #ifdef ASM_QUAD
46874 #undef TARGET_ASM_ALIGNED_DI_OP
46875 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
46876 #endif
46877
46878 #undef TARGET_PROFILE_BEFORE_PROLOGUE
46879 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
46880
46881 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
46882 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
46883
46884 #undef TARGET_ASM_UNALIGNED_HI_OP
46885 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
46886 #undef TARGET_ASM_UNALIGNED_SI_OP
46887 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
46888 #undef TARGET_ASM_UNALIGNED_DI_OP
46889 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
46890
46891 #undef TARGET_PRINT_OPERAND
46892 #define TARGET_PRINT_OPERAND ix86_print_operand
46893 #undef TARGET_PRINT_OPERAND_ADDRESS
46894 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
46895 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
46896 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
46897 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
46898 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
46899
46900 #undef TARGET_SCHED_INIT_GLOBAL
46901 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
46902 #undef TARGET_SCHED_ADJUST_COST
46903 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
46904 #undef TARGET_SCHED_ISSUE_RATE
46905 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
46906 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
46907 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
46908 ia32_multipass_dfa_lookahead
46909 #undef TARGET_SCHED_MACRO_FUSION_P
46910 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
46911 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
46912 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
46913
46914 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
46915 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
46916
46917 #undef TARGET_MEMMODEL_CHECK
46918 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
46919
46920 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
46921 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
46922
46923 #ifdef HAVE_AS_TLS
46924 #undef TARGET_HAVE_TLS
46925 #define TARGET_HAVE_TLS true
46926 #endif
46927 #undef TARGET_CANNOT_FORCE_CONST_MEM
46928 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
46929 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
46930 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
46931
46932 #undef TARGET_DELEGITIMIZE_ADDRESS
46933 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
46934
46935 #undef TARGET_MS_BITFIELD_LAYOUT_P
46936 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
46937
46938 #if TARGET_MACHO
46939 #undef TARGET_BINDS_LOCAL_P
46940 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
46941 #endif
46942 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46943 #undef TARGET_BINDS_LOCAL_P
46944 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
46945 #endif
46946
46947 #undef TARGET_ASM_OUTPUT_MI_THUNK
46948 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
46949 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
46950 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
46951
46952 #undef TARGET_ASM_FILE_START
46953 #define TARGET_ASM_FILE_START x86_file_start
46954
46955 #undef TARGET_OPTION_OVERRIDE
46956 #define TARGET_OPTION_OVERRIDE ix86_option_override
46957
46958 #undef TARGET_REGISTER_MOVE_COST
46959 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
46960 #undef TARGET_MEMORY_MOVE_COST
46961 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
46962 #undef TARGET_RTX_COSTS
46963 #define TARGET_RTX_COSTS ix86_rtx_costs
46964 #undef TARGET_ADDRESS_COST
46965 #define TARGET_ADDRESS_COST ix86_address_cost
46966
46967 #undef TARGET_FIXED_CONDITION_CODE_REGS
46968 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
46969 #undef TARGET_CC_MODES_COMPATIBLE
46970 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
46971
46972 #undef TARGET_MACHINE_DEPENDENT_REORG
46973 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
46974
46975 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
46976 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
46977
46978 #undef TARGET_BUILD_BUILTIN_VA_LIST
46979 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
46980
46981 #undef TARGET_FOLD_BUILTIN
46982 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
46983
46984 #undef TARGET_COMPARE_VERSION_PRIORITY
46985 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
46986
46987 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
46988 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
46989 ix86_generate_version_dispatcher_body
46990
46991 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
46992 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
46993 ix86_get_function_versions_dispatcher
46994
46995 #undef TARGET_ENUM_VA_LIST_P
46996 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
46997
46998 #undef TARGET_FN_ABI_VA_LIST
46999 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
47000
47001 #undef TARGET_CANONICAL_VA_LIST_TYPE
47002 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
47003
47004 #undef TARGET_EXPAND_BUILTIN_VA_START
47005 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
47006
47007 #undef TARGET_MD_ASM_CLOBBERS
47008 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
47009
47010 #undef TARGET_PROMOTE_PROTOTYPES
47011 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
47012 #undef TARGET_SETUP_INCOMING_VARARGS
47013 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
47014 #undef TARGET_MUST_PASS_IN_STACK
47015 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
47016 #undef TARGET_FUNCTION_ARG_ADVANCE
47017 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
47018 #undef TARGET_FUNCTION_ARG
47019 #define TARGET_FUNCTION_ARG ix86_function_arg
47020 #undef TARGET_FUNCTION_ARG_BOUNDARY
47021 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
47022 #undef TARGET_PASS_BY_REFERENCE
47023 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
47024 #undef TARGET_INTERNAL_ARG_POINTER
47025 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
47026 #undef TARGET_UPDATE_STACK_BOUNDARY
47027 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
47028 #undef TARGET_GET_DRAP_RTX
47029 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
47030 #undef TARGET_STRICT_ARGUMENT_NAMING
47031 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
47032 #undef TARGET_STATIC_CHAIN
47033 #define TARGET_STATIC_CHAIN ix86_static_chain
47034 #undef TARGET_TRAMPOLINE_INIT
47035 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
47036 #undef TARGET_RETURN_POPS_ARGS
47037 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
47038
47039 #undef TARGET_LEGITIMATE_COMBINED_INSN
47040 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
47041
47042 #undef TARGET_ASAN_SHADOW_OFFSET
47043 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
47044
47045 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
47046 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
47047
47048 #undef TARGET_SCALAR_MODE_SUPPORTED_P
47049 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
47050
47051 #undef TARGET_VECTOR_MODE_SUPPORTED_P
47052 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
47053
47054 #undef TARGET_C_MODE_FOR_SUFFIX
47055 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
47056
47057 #ifdef HAVE_AS_TLS
47058 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
47059 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
47060 #endif
47061
47062 #ifdef SUBTARGET_INSERT_ATTRIBUTES
47063 #undef TARGET_INSERT_ATTRIBUTES
47064 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
47065 #endif
47066
47067 #undef TARGET_MANGLE_TYPE
47068 #define TARGET_MANGLE_TYPE ix86_mangle_type
47069
47070 #if !TARGET_MACHO
47071 #undef TARGET_STACK_PROTECT_FAIL
47072 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
47073 #endif
47074
47075 #undef TARGET_FUNCTION_VALUE
47076 #define TARGET_FUNCTION_VALUE ix86_function_value
47077
47078 #undef TARGET_FUNCTION_VALUE_REGNO_P
47079 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
47080
47081 #undef TARGET_PROMOTE_FUNCTION_MODE
47082 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
47083
47084 #undef TARGET_MEMBER_TYPE_FORCES_BLK
47085 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
47086
47087 #undef TARGET_INSTANTIATE_DECLS
47088 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
47089
47090 #undef TARGET_SECONDARY_RELOAD
47091 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
47092
47093 #undef TARGET_CLASS_MAX_NREGS
47094 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
47095
47096 #undef TARGET_PREFERRED_RELOAD_CLASS
47097 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
47098 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
47099 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
47100 #undef TARGET_CLASS_LIKELY_SPILLED_P
47101 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
47102
47103 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
47104 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
47105 ix86_builtin_vectorization_cost
47106 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
47107 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
47108 ix86_vectorize_vec_perm_const_ok
47109 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
47110 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
47111 ix86_preferred_simd_mode
47112 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
47113 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
47114 ix86_autovectorize_vector_sizes
47115 #undef TARGET_VECTORIZE_INIT_COST
47116 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
47117 #undef TARGET_VECTORIZE_ADD_STMT_COST
47118 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
47119 #undef TARGET_VECTORIZE_FINISH_COST
47120 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
47121 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
47122 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
47123
47124 #undef TARGET_SET_CURRENT_FUNCTION
47125 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
47126
47127 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
47128 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
47129
47130 #undef TARGET_OPTION_SAVE
47131 #define TARGET_OPTION_SAVE ix86_function_specific_save
47132
47133 #undef TARGET_OPTION_RESTORE
47134 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
47135
47136 #undef TARGET_OPTION_PRINT
47137 #define TARGET_OPTION_PRINT ix86_function_specific_print
47138
47139 #undef TARGET_OPTION_FUNCTION_VERSIONS
47140 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
47141
47142 #undef TARGET_CAN_INLINE_P
47143 #define TARGET_CAN_INLINE_P ix86_can_inline_p
47144
47145 #undef TARGET_EXPAND_TO_RTL_HOOK
47146 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
47147
47148 #undef TARGET_LEGITIMATE_ADDRESS_P
47149 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
47150
47151 #undef TARGET_LRA_P
47152 #define TARGET_LRA_P hook_bool_void_true
47153
47154 #undef TARGET_REGISTER_PRIORITY
47155 #define TARGET_REGISTER_PRIORITY ix86_register_priority
47156
47157 #undef TARGET_REGISTER_USAGE_LEVELING_P
47158 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
47159
47160 #undef TARGET_LEGITIMATE_CONSTANT_P
47161 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
47162
47163 #undef TARGET_FRAME_POINTER_REQUIRED
47164 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
47165
47166 #undef TARGET_CAN_ELIMINATE
47167 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
47168
47169 #undef TARGET_EXTRA_LIVE_ON_ENTRY
47170 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
47171
47172 #undef TARGET_ASM_CODE_END
47173 #define TARGET_ASM_CODE_END ix86_code_end
47174
47175 #undef TARGET_CONDITIONAL_REGISTER_USAGE
47176 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
47177
47178 #if TARGET_MACHO
47179 #undef TARGET_INIT_LIBFUNCS
47180 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
47181 #endif
47182
47183 #undef TARGET_LOOP_UNROLL_ADJUST
47184 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
47185
47186 #undef TARGET_SPILL_CLASS
47187 #define TARGET_SPILL_CLASS ix86_spill_class
47188
47189 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
47190 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
47191 ix86_simd_clone_compute_vecsize_and_simdlen
47192
47193 #undef TARGET_SIMD_CLONE_ADJUST
47194 #define TARGET_SIMD_CLONE_ADJUST \
47195 ix86_simd_clone_adjust
47196
47197 #undef TARGET_SIMD_CLONE_USABLE
47198 #define TARGET_SIMD_CLONE_USABLE \
47199 ix86_simd_clone_usable
47200
47201 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
47202 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
47203 ix86_float_exceptions_rounding_supported_p
47204
47205 struct gcc_target targetm = TARGET_INITIALIZER;
47206 \f
47207 #include "gt-i386.h"