re PR target/63594 (ICE: in ix86_vector_duplicate_value, at config/i386/i386.c:39831...
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2014 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "stringpool.h"
27 #include "attribs.h"
28 #include "calls.h"
29 #include "stor-layout.h"
30 #include "varasm.h"
31 #include "tm_p.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-codes.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "except.h"
41 #include "hashtab.h"
42 #include "hash-set.h"
43 #include "vec.h"
44 #include "machmode.h"
45 #include "input.h"
46 #include "function.h"
47 #include "recog.h"
48 #include "expr.h"
49 #include "optabs.h"
50 #include "diagnostic-core.h"
51 #include "toplev.h"
52 #include "basic-block.h"
53 #include "ggc.h"
54 #include "target.h"
55 #include "target-def.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "cgraph.h"
60 #include "hash-table.h"
61 #include "basic-block.h"
62 #include "tree-ssa-alias.h"
63 #include "internal-fn.h"
64 #include "gimple-fold.h"
65 #include "tree-eh.h"
66 #include "gimple-expr.h"
67 #include "is-a.h"
68 #include "gimple.h"
69 #include "gimplify.h"
70 #include "cfgloop.h"
71 #include "dwarf2.h"
72 #include "df.h"
73 #include "tm-constrs.h"
74 #include "params.h"
75 #include "cselib.h"
76 #include "debug.h"
77 #include "sched-int.h"
78 #include "sbitmap.h"
79 #include "fibheap.h"
80 #include "opts.h"
81 #include "diagnostic.h"
82 #include "dumpfile.h"
83 #include "tree-pass.h"
84 #include "wide-int.h"
85 #include "context.h"
86 #include "pass_manager.h"
87 #include "target-globals.h"
88 #include "tree-vectorizer.h"
89 #include "shrink-wrap.h"
90 #include "builtins.h"
91
92 static rtx legitimize_dllimport_symbol (rtx, bool);
93 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
94 static rtx legitimize_pe_coff_symbol (rtx, bool);
95
96 #ifndef CHECK_STACK_LIMIT
97 #define CHECK_STACK_LIMIT (-1)
98 #endif
99
100 /* Return index of given mode in mult and division cost tables. */
101 #define MODE_INDEX(mode) \
102 ((mode) == QImode ? 0 \
103 : (mode) == HImode ? 1 \
104 : (mode) == SImode ? 2 \
105 : (mode) == DImode ? 3 \
106 : 4)
107
108 /* Processor costs (relative to an add) */
109 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
110 #define COSTS_N_BYTES(N) ((N) * 2)
111
112 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
113
114 static stringop_algs ix86_size_memcpy[2] = {
115 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
116 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
117 static stringop_algs ix86_size_memset[2] = {
118 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
119 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
120
121 const
122 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
123 COSTS_N_BYTES (2), /* cost of an add instruction */
124 COSTS_N_BYTES (3), /* cost of a lea instruction */
125 COSTS_N_BYTES (2), /* variable shift costs */
126 COSTS_N_BYTES (3), /* constant shift costs */
127 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
128 COSTS_N_BYTES (3), /* HI */
129 COSTS_N_BYTES (3), /* SI */
130 COSTS_N_BYTES (3), /* DI */
131 COSTS_N_BYTES (5)}, /* other */
132 0, /* cost of multiply per each bit set */
133 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
134 COSTS_N_BYTES (3), /* HI */
135 COSTS_N_BYTES (3), /* SI */
136 COSTS_N_BYTES (3), /* DI */
137 COSTS_N_BYTES (5)}, /* other */
138 COSTS_N_BYTES (3), /* cost of movsx */
139 COSTS_N_BYTES (3), /* cost of movzx */
140 0, /* "large" insn */
141 2, /* MOVE_RATIO */
142 2, /* cost for loading QImode using movzbl */
143 {2, 2, 2}, /* cost of loading integer registers
144 in QImode, HImode and SImode.
145 Relative to reg-reg move (2). */
146 {2, 2, 2}, /* cost of storing integer registers */
147 2, /* cost of reg,reg fld/fst */
148 {2, 2, 2}, /* cost of loading fp registers
149 in SFmode, DFmode and XFmode */
150 {2, 2, 2}, /* cost of storing fp registers
151 in SFmode, DFmode and XFmode */
152 3, /* cost of moving MMX register */
153 {3, 3}, /* cost of loading MMX registers
154 in SImode and DImode */
155 {3, 3}, /* cost of storing MMX registers
156 in SImode and DImode */
157 3, /* cost of moving SSE register */
158 {3, 3, 3}, /* cost of loading SSE registers
159 in SImode, DImode and TImode */
160 {3, 3, 3}, /* cost of storing SSE registers
161 in SImode, DImode and TImode */
162 3, /* MMX or SSE register to integer */
163 0, /* size of l1 cache */
164 0, /* size of l2 cache */
165 0, /* size of prefetch block */
166 0, /* number of parallel prefetches */
167 2, /* Branch cost */
168 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
169 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
170 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
171 COSTS_N_BYTES (2), /* cost of FABS instruction. */
172 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
173 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
174 ix86_size_memcpy,
175 ix86_size_memset,
176 1, /* scalar_stmt_cost. */
177 1, /* scalar load_cost. */
178 1, /* scalar_store_cost. */
179 1, /* vec_stmt_cost. */
180 1, /* vec_to_scalar_cost. */
181 1, /* scalar_to_vec_cost. */
182 1, /* vec_align_load_cost. */
183 1, /* vec_unalign_load_cost. */
184 1, /* vec_store_cost. */
185 1, /* cond_taken_branch_cost. */
186 1, /* cond_not_taken_branch_cost. */
187 };
188
189 /* Processor costs (relative to an add) */
190 static stringop_algs i386_memcpy[2] = {
191 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
192 DUMMY_STRINGOP_ALGS};
193 static stringop_algs i386_memset[2] = {
194 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
195 DUMMY_STRINGOP_ALGS};
196
197 static const
198 struct processor_costs i386_cost = { /* 386 specific costs */
199 COSTS_N_INSNS (1), /* cost of an add instruction */
200 COSTS_N_INSNS (1), /* cost of a lea instruction */
201 COSTS_N_INSNS (3), /* variable shift costs */
202 COSTS_N_INSNS (2), /* constant shift costs */
203 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
204 COSTS_N_INSNS (6), /* HI */
205 COSTS_N_INSNS (6), /* SI */
206 COSTS_N_INSNS (6), /* DI */
207 COSTS_N_INSNS (6)}, /* other */
208 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
209 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
210 COSTS_N_INSNS (23), /* HI */
211 COSTS_N_INSNS (23), /* SI */
212 COSTS_N_INSNS (23), /* DI */
213 COSTS_N_INSNS (23)}, /* other */
214 COSTS_N_INSNS (3), /* cost of movsx */
215 COSTS_N_INSNS (2), /* cost of movzx */
216 15, /* "large" insn */
217 3, /* MOVE_RATIO */
218 4, /* cost for loading QImode using movzbl */
219 {2, 4, 2}, /* cost of loading integer registers
220 in QImode, HImode and SImode.
221 Relative to reg-reg move (2). */
222 {2, 4, 2}, /* cost of storing integer registers */
223 2, /* cost of reg,reg fld/fst */
224 {8, 8, 8}, /* cost of loading fp registers
225 in SFmode, DFmode and XFmode */
226 {8, 8, 8}, /* cost of storing fp registers
227 in SFmode, DFmode and XFmode */
228 2, /* cost of moving MMX register */
229 {4, 8}, /* cost of loading MMX registers
230 in SImode and DImode */
231 {4, 8}, /* cost of storing MMX registers
232 in SImode and DImode */
233 2, /* cost of moving SSE register */
234 {4, 8, 16}, /* cost of loading SSE registers
235 in SImode, DImode and TImode */
236 {4, 8, 16}, /* cost of storing SSE registers
237 in SImode, DImode and TImode */
238 3, /* MMX or SSE register to integer */
239 0, /* size of l1 cache */
240 0, /* size of l2 cache */
241 0, /* size of prefetch block */
242 0, /* number of parallel prefetches */
243 1, /* Branch cost */
244 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
245 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
246 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
247 COSTS_N_INSNS (22), /* cost of FABS instruction. */
248 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
249 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
250 i386_memcpy,
251 i386_memset,
252 1, /* scalar_stmt_cost. */
253 1, /* scalar load_cost. */
254 1, /* scalar_store_cost. */
255 1, /* vec_stmt_cost. */
256 1, /* vec_to_scalar_cost. */
257 1, /* scalar_to_vec_cost. */
258 1, /* vec_align_load_cost. */
259 2, /* vec_unalign_load_cost. */
260 1, /* vec_store_cost. */
261 3, /* cond_taken_branch_cost. */
262 1, /* cond_not_taken_branch_cost. */
263 };
264
265 static stringop_algs i486_memcpy[2] = {
266 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
267 DUMMY_STRINGOP_ALGS};
268 static stringop_algs i486_memset[2] = {
269 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
270 DUMMY_STRINGOP_ALGS};
271
272 static const
273 struct processor_costs i486_cost = { /* 486 specific costs */
274 COSTS_N_INSNS (1), /* cost of an add instruction */
275 COSTS_N_INSNS (1), /* cost of a lea instruction */
276 COSTS_N_INSNS (3), /* variable shift costs */
277 COSTS_N_INSNS (2), /* constant shift costs */
278 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
279 COSTS_N_INSNS (12), /* HI */
280 COSTS_N_INSNS (12), /* SI */
281 COSTS_N_INSNS (12), /* DI */
282 COSTS_N_INSNS (12)}, /* other */
283 1, /* cost of multiply per each bit set */
284 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
285 COSTS_N_INSNS (40), /* HI */
286 COSTS_N_INSNS (40), /* SI */
287 COSTS_N_INSNS (40), /* DI */
288 COSTS_N_INSNS (40)}, /* other */
289 COSTS_N_INSNS (3), /* cost of movsx */
290 COSTS_N_INSNS (2), /* cost of movzx */
291 15, /* "large" insn */
292 3, /* MOVE_RATIO */
293 4, /* cost for loading QImode using movzbl */
294 {2, 4, 2}, /* cost of loading integer registers
295 in QImode, HImode and SImode.
296 Relative to reg-reg move (2). */
297 {2, 4, 2}, /* cost of storing integer registers */
298 2, /* cost of reg,reg fld/fst */
299 {8, 8, 8}, /* cost of loading fp registers
300 in SFmode, DFmode and XFmode */
301 {8, 8, 8}, /* cost of storing fp registers
302 in SFmode, DFmode and XFmode */
303 2, /* cost of moving MMX register */
304 {4, 8}, /* cost of loading MMX registers
305 in SImode and DImode */
306 {4, 8}, /* cost of storing MMX registers
307 in SImode and DImode */
308 2, /* cost of moving SSE register */
309 {4, 8, 16}, /* cost of loading SSE registers
310 in SImode, DImode and TImode */
311 {4, 8, 16}, /* cost of storing SSE registers
312 in SImode, DImode and TImode */
313 3, /* MMX or SSE register to integer */
314 4, /* size of l1 cache. 486 has 8kB cache
315 shared for code and data, so 4kB is
316 not really precise. */
317 4, /* size of l2 cache */
318 0, /* size of prefetch block */
319 0, /* number of parallel prefetches */
320 1, /* Branch cost */
321 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
322 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
323 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
324 COSTS_N_INSNS (3), /* cost of FABS instruction. */
325 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
326 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
327 i486_memcpy,
328 i486_memset,
329 1, /* scalar_stmt_cost. */
330 1, /* scalar load_cost. */
331 1, /* scalar_store_cost. */
332 1, /* vec_stmt_cost. */
333 1, /* vec_to_scalar_cost. */
334 1, /* scalar_to_vec_cost. */
335 1, /* vec_align_load_cost. */
336 2, /* vec_unalign_load_cost. */
337 1, /* vec_store_cost. */
338 3, /* cond_taken_branch_cost. */
339 1, /* cond_not_taken_branch_cost. */
340 };
341
342 static stringop_algs pentium_memcpy[2] = {
343 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
344 DUMMY_STRINGOP_ALGS};
345 static stringop_algs pentium_memset[2] = {
346 {libcall, {{-1, rep_prefix_4_byte, false}}},
347 DUMMY_STRINGOP_ALGS};
348
349 static const
350 struct processor_costs pentium_cost = {
351 COSTS_N_INSNS (1), /* cost of an add instruction */
352 COSTS_N_INSNS (1), /* cost of a lea instruction */
353 COSTS_N_INSNS (4), /* variable shift costs */
354 COSTS_N_INSNS (1), /* constant shift costs */
355 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
356 COSTS_N_INSNS (11), /* HI */
357 COSTS_N_INSNS (11), /* SI */
358 COSTS_N_INSNS (11), /* DI */
359 COSTS_N_INSNS (11)}, /* other */
360 0, /* cost of multiply per each bit set */
361 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
362 COSTS_N_INSNS (25), /* HI */
363 COSTS_N_INSNS (25), /* SI */
364 COSTS_N_INSNS (25), /* DI */
365 COSTS_N_INSNS (25)}, /* other */
366 COSTS_N_INSNS (3), /* cost of movsx */
367 COSTS_N_INSNS (2), /* cost of movzx */
368 8, /* "large" insn */
369 6, /* MOVE_RATIO */
370 6, /* cost for loading QImode using movzbl */
371 {2, 4, 2}, /* cost of loading integer registers
372 in QImode, HImode and SImode.
373 Relative to reg-reg move (2). */
374 {2, 4, 2}, /* cost of storing integer registers */
375 2, /* cost of reg,reg fld/fst */
376 {2, 2, 6}, /* cost of loading fp registers
377 in SFmode, DFmode and XFmode */
378 {4, 4, 6}, /* cost of storing fp registers
379 in SFmode, DFmode and XFmode */
380 8, /* cost of moving MMX register */
381 {8, 8}, /* cost of loading MMX registers
382 in SImode and DImode */
383 {8, 8}, /* cost of storing MMX registers
384 in SImode and DImode */
385 2, /* cost of moving SSE register */
386 {4, 8, 16}, /* cost of loading SSE registers
387 in SImode, DImode and TImode */
388 {4, 8, 16}, /* cost of storing SSE registers
389 in SImode, DImode and TImode */
390 3, /* MMX or SSE register to integer */
391 8, /* size of l1 cache. */
392 8, /* size of l2 cache */
393 0, /* size of prefetch block */
394 0, /* number of parallel prefetches */
395 2, /* Branch cost */
396 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
397 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
398 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
399 COSTS_N_INSNS (1), /* cost of FABS instruction. */
400 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
401 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
402 pentium_memcpy,
403 pentium_memset,
404 1, /* scalar_stmt_cost. */
405 1, /* scalar load_cost. */
406 1, /* scalar_store_cost. */
407 1, /* vec_stmt_cost. */
408 1, /* vec_to_scalar_cost. */
409 1, /* scalar_to_vec_cost. */
410 1, /* vec_align_load_cost. */
411 2, /* vec_unalign_load_cost. */
412 1, /* vec_store_cost. */
413 3, /* cond_taken_branch_cost. */
414 1, /* cond_not_taken_branch_cost. */
415 };
416
417 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
418 (we ensure the alignment). For small blocks inline loop is still a
419 noticeable win, for bigger blocks either rep movsl or rep movsb is
420 way to go. Rep movsb has apparently more expensive startup time in CPU,
421 but after 4K the difference is down in the noise. */
422 static stringop_algs pentiumpro_memcpy[2] = {
423 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
424 {8192, rep_prefix_4_byte, false},
425 {-1, rep_prefix_1_byte, false}}},
426 DUMMY_STRINGOP_ALGS};
427 static stringop_algs pentiumpro_memset[2] = {
428 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
429 {8192, rep_prefix_4_byte, false},
430 {-1, libcall, false}}},
431 DUMMY_STRINGOP_ALGS};
432 static const
433 struct processor_costs pentiumpro_cost = {
434 COSTS_N_INSNS (1), /* cost of an add instruction */
435 COSTS_N_INSNS (1), /* cost of a lea instruction */
436 COSTS_N_INSNS (1), /* variable shift costs */
437 COSTS_N_INSNS (1), /* constant shift costs */
438 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
439 COSTS_N_INSNS (4), /* HI */
440 COSTS_N_INSNS (4), /* SI */
441 COSTS_N_INSNS (4), /* DI */
442 COSTS_N_INSNS (4)}, /* other */
443 0, /* cost of multiply per each bit set */
444 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
445 COSTS_N_INSNS (17), /* HI */
446 COSTS_N_INSNS (17), /* SI */
447 COSTS_N_INSNS (17), /* DI */
448 COSTS_N_INSNS (17)}, /* other */
449 COSTS_N_INSNS (1), /* cost of movsx */
450 COSTS_N_INSNS (1), /* cost of movzx */
451 8, /* "large" insn */
452 6, /* MOVE_RATIO */
453 2, /* cost for loading QImode using movzbl */
454 {4, 4, 4}, /* cost of loading integer registers
455 in QImode, HImode and SImode.
456 Relative to reg-reg move (2). */
457 {2, 2, 2}, /* cost of storing integer registers */
458 2, /* cost of reg,reg fld/fst */
459 {2, 2, 6}, /* cost of loading fp registers
460 in SFmode, DFmode and XFmode */
461 {4, 4, 6}, /* cost of storing fp registers
462 in SFmode, DFmode and XFmode */
463 2, /* cost of moving MMX register */
464 {2, 2}, /* cost of loading MMX registers
465 in SImode and DImode */
466 {2, 2}, /* cost of storing MMX registers
467 in SImode and DImode */
468 2, /* cost of moving SSE register */
469 {2, 2, 8}, /* cost of loading SSE registers
470 in SImode, DImode and TImode */
471 {2, 2, 8}, /* cost of storing SSE registers
472 in SImode, DImode and TImode */
473 3, /* MMX or SSE register to integer */
474 8, /* size of l1 cache. */
475 256, /* size of l2 cache */
476 32, /* size of prefetch block */
477 6, /* number of parallel prefetches */
478 2, /* Branch cost */
479 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
480 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
481 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
482 COSTS_N_INSNS (2), /* cost of FABS instruction. */
483 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
484 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
485 pentiumpro_memcpy,
486 pentiumpro_memset,
487 1, /* scalar_stmt_cost. */
488 1, /* scalar load_cost. */
489 1, /* scalar_store_cost. */
490 1, /* vec_stmt_cost. */
491 1, /* vec_to_scalar_cost. */
492 1, /* scalar_to_vec_cost. */
493 1, /* vec_align_load_cost. */
494 2, /* vec_unalign_load_cost. */
495 1, /* vec_store_cost. */
496 3, /* cond_taken_branch_cost. */
497 1, /* cond_not_taken_branch_cost. */
498 };
499
500 static stringop_algs geode_memcpy[2] = {
501 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
502 DUMMY_STRINGOP_ALGS};
503 static stringop_algs geode_memset[2] = {
504 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
505 DUMMY_STRINGOP_ALGS};
506 static const
507 struct processor_costs geode_cost = {
508 COSTS_N_INSNS (1), /* cost of an add instruction */
509 COSTS_N_INSNS (1), /* cost of a lea instruction */
510 COSTS_N_INSNS (2), /* variable shift costs */
511 COSTS_N_INSNS (1), /* constant shift costs */
512 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
513 COSTS_N_INSNS (4), /* HI */
514 COSTS_N_INSNS (7), /* SI */
515 COSTS_N_INSNS (7), /* DI */
516 COSTS_N_INSNS (7)}, /* other */
517 0, /* cost of multiply per each bit set */
518 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
519 COSTS_N_INSNS (23), /* HI */
520 COSTS_N_INSNS (39), /* SI */
521 COSTS_N_INSNS (39), /* DI */
522 COSTS_N_INSNS (39)}, /* other */
523 COSTS_N_INSNS (1), /* cost of movsx */
524 COSTS_N_INSNS (1), /* cost of movzx */
525 8, /* "large" insn */
526 4, /* MOVE_RATIO */
527 1, /* cost for loading QImode using movzbl */
528 {1, 1, 1}, /* cost of loading integer registers
529 in QImode, HImode and SImode.
530 Relative to reg-reg move (2). */
531 {1, 1, 1}, /* cost of storing integer registers */
532 1, /* cost of reg,reg fld/fst */
533 {1, 1, 1}, /* cost of loading fp registers
534 in SFmode, DFmode and XFmode */
535 {4, 6, 6}, /* cost of storing fp registers
536 in SFmode, DFmode and XFmode */
537
538 1, /* cost of moving MMX register */
539 {1, 1}, /* cost of loading MMX registers
540 in SImode and DImode */
541 {1, 1}, /* cost of storing MMX registers
542 in SImode and DImode */
543 1, /* cost of moving SSE register */
544 {1, 1, 1}, /* cost of loading SSE registers
545 in SImode, DImode and TImode */
546 {1, 1, 1}, /* cost of storing SSE registers
547 in SImode, DImode and TImode */
548 1, /* MMX or SSE register to integer */
549 64, /* size of l1 cache. */
550 128, /* size of l2 cache. */
551 32, /* size of prefetch block */
552 1, /* number of parallel prefetches */
553 1, /* Branch cost */
554 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
555 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
556 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
557 COSTS_N_INSNS (1), /* cost of FABS instruction. */
558 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
559 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
560 geode_memcpy,
561 geode_memset,
562 1, /* scalar_stmt_cost. */
563 1, /* scalar load_cost. */
564 1, /* scalar_store_cost. */
565 1, /* vec_stmt_cost. */
566 1, /* vec_to_scalar_cost. */
567 1, /* scalar_to_vec_cost. */
568 1, /* vec_align_load_cost. */
569 2, /* vec_unalign_load_cost. */
570 1, /* vec_store_cost. */
571 3, /* cond_taken_branch_cost. */
572 1, /* cond_not_taken_branch_cost. */
573 };
574
575 static stringop_algs k6_memcpy[2] = {
576 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
577 DUMMY_STRINGOP_ALGS};
578 static stringop_algs k6_memset[2] = {
579 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
580 DUMMY_STRINGOP_ALGS};
581 static const
582 struct processor_costs k6_cost = {
583 COSTS_N_INSNS (1), /* cost of an add instruction */
584 COSTS_N_INSNS (2), /* cost of a lea instruction */
585 COSTS_N_INSNS (1), /* variable shift costs */
586 COSTS_N_INSNS (1), /* constant shift costs */
587 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
588 COSTS_N_INSNS (3), /* HI */
589 COSTS_N_INSNS (3), /* SI */
590 COSTS_N_INSNS (3), /* DI */
591 COSTS_N_INSNS (3)}, /* other */
592 0, /* cost of multiply per each bit set */
593 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
594 COSTS_N_INSNS (18), /* HI */
595 COSTS_N_INSNS (18), /* SI */
596 COSTS_N_INSNS (18), /* DI */
597 COSTS_N_INSNS (18)}, /* other */
598 COSTS_N_INSNS (2), /* cost of movsx */
599 COSTS_N_INSNS (2), /* cost of movzx */
600 8, /* "large" insn */
601 4, /* MOVE_RATIO */
602 3, /* cost for loading QImode using movzbl */
603 {4, 5, 4}, /* cost of loading integer registers
604 in QImode, HImode and SImode.
605 Relative to reg-reg move (2). */
606 {2, 3, 2}, /* cost of storing integer registers */
607 4, /* cost of reg,reg fld/fst */
608 {6, 6, 6}, /* cost of loading fp registers
609 in SFmode, DFmode and XFmode */
610 {4, 4, 4}, /* cost of storing fp registers
611 in SFmode, DFmode and XFmode */
612 2, /* cost of moving MMX register */
613 {2, 2}, /* cost of loading MMX registers
614 in SImode and DImode */
615 {2, 2}, /* cost of storing MMX registers
616 in SImode and DImode */
617 2, /* cost of moving SSE register */
618 {2, 2, 8}, /* cost of loading SSE registers
619 in SImode, DImode and TImode */
620 {2, 2, 8}, /* cost of storing SSE registers
621 in SImode, DImode and TImode */
622 6, /* MMX or SSE register to integer */
623 32, /* size of l1 cache. */
624 32, /* size of l2 cache. Some models
625 have integrated l2 cache, but
626 optimizing for k6 is not important
627 enough to worry about that. */
628 32, /* size of prefetch block */
629 1, /* number of parallel prefetches */
630 1, /* Branch cost */
631 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
632 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
633 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
634 COSTS_N_INSNS (2), /* cost of FABS instruction. */
635 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
636 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
637 k6_memcpy,
638 k6_memset,
639 1, /* scalar_stmt_cost. */
640 1, /* scalar load_cost. */
641 1, /* scalar_store_cost. */
642 1, /* vec_stmt_cost. */
643 1, /* vec_to_scalar_cost. */
644 1, /* scalar_to_vec_cost. */
645 1, /* vec_align_load_cost. */
646 2, /* vec_unalign_load_cost. */
647 1, /* vec_store_cost. */
648 3, /* cond_taken_branch_cost. */
649 1, /* cond_not_taken_branch_cost. */
650 };
651
652 /* For some reason, Athlon deals better with REP prefix (relative to loops)
653 compared to K8. Alignment becomes important after 8 bytes for memcpy and
654 128 bytes for memset. */
655 static stringop_algs athlon_memcpy[2] = {
656 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
657 DUMMY_STRINGOP_ALGS};
658 static stringop_algs athlon_memset[2] = {
659 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
660 DUMMY_STRINGOP_ALGS};
661 static const
662 struct processor_costs athlon_cost = {
663 COSTS_N_INSNS (1), /* cost of an add instruction */
664 COSTS_N_INSNS (2), /* cost of a lea instruction */
665 COSTS_N_INSNS (1), /* variable shift costs */
666 COSTS_N_INSNS (1), /* constant shift costs */
667 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
668 COSTS_N_INSNS (5), /* HI */
669 COSTS_N_INSNS (5), /* SI */
670 COSTS_N_INSNS (5), /* DI */
671 COSTS_N_INSNS (5)}, /* other */
672 0, /* cost of multiply per each bit set */
673 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
674 COSTS_N_INSNS (26), /* HI */
675 COSTS_N_INSNS (42), /* SI */
676 COSTS_N_INSNS (74), /* DI */
677 COSTS_N_INSNS (74)}, /* other */
678 COSTS_N_INSNS (1), /* cost of movsx */
679 COSTS_N_INSNS (1), /* cost of movzx */
680 8, /* "large" insn */
681 9, /* MOVE_RATIO */
682 4, /* cost for loading QImode using movzbl */
683 {3, 4, 3}, /* cost of loading integer registers
684 in QImode, HImode and SImode.
685 Relative to reg-reg move (2). */
686 {3, 4, 3}, /* cost of storing integer registers */
687 4, /* cost of reg,reg fld/fst */
688 {4, 4, 12}, /* cost of loading fp registers
689 in SFmode, DFmode and XFmode */
690 {6, 6, 8}, /* cost of storing fp registers
691 in SFmode, DFmode and XFmode */
692 2, /* cost of moving MMX register */
693 {4, 4}, /* cost of loading MMX registers
694 in SImode and DImode */
695 {4, 4}, /* cost of storing MMX registers
696 in SImode and DImode */
697 2, /* cost of moving SSE register */
698 {4, 4, 6}, /* cost of loading SSE registers
699 in SImode, DImode and TImode */
700 {4, 4, 5}, /* cost of storing SSE registers
701 in SImode, DImode and TImode */
702 5, /* MMX or SSE register to integer */
703 64, /* size of l1 cache. */
704 256, /* size of l2 cache. */
705 64, /* size of prefetch block */
706 6, /* number of parallel prefetches */
707 5, /* Branch cost */
708 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
709 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
710 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
711 COSTS_N_INSNS (2), /* cost of FABS instruction. */
712 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
713 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
714 athlon_memcpy,
715 athlon_memset,
716 1, /* scalar_stmt_cost. */
717 1, /* scalar load_cost. */
718 1, /* scalar_store_cost. */
719 1, /* vec_stmt_cost. */
720 1, /* vec_to_scalar_cost. */
721 1, /* scalar_to_vec_cost. */
722 1, /* vec_align_load_cost. */
723 2, /* vec_unalign_load_cost. */
724 1, /* vec_store_cost. */
725 3, /* cond_taken_branch_cost. */
726 1, /* cond_not_taken_branch_cost. */
727 };
728
729 /* K8 has optimized REP instruction for medium sized blocks, but for very
730 small blocks it is better to use loop. For large blocks, libcall can
731 do nontemporary accesses and beat inline considerably. */
732 static stringop_algs k8_memcpy[2] = {
733 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
734 {-1, rep_prefix_4_byte, false}}},
735 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
736 {-1, libcall, false}}}};
737 static stringop_algs k8_memset[2] = {
738 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
739 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
740 {libcall, {{48, unrolled_loop, false},
741 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
742 static const
743 struct processor_costs k8_cost = {
744 COSTS_N_INSNS (1), /* cost of an add instruction */
745 COSTS_N_INSNS (2), /* cost of a lea instruction */
746 COSTS_N_INSNS (1), /* variable shift costs */
747 COSTS_N_INSNS (1), /* constant shift costs */
748 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
749 COSTS_N_INSNS (4), /* HI */
750 COSTS_N_INSNS (3), /* SI */
751 COSTS_N_INSNS (4), /* DI */
752 COSTS_N_INSNS (5)}, /* other */
753 0, /* cost of multiply per each bit set */
754 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
755 COSTS_N_INSNS (26), /* HI */
756 COSTS_N_INSNS (42), /* SI */
757 COSTS_N_INSNS (74), /* DI */
758 COSTS_N_INSNS (74)}, /* other */
759 COSTS_N_INSNS (1), /* cost of movsx */
760 COSTS_N_INSNS (1), /* cost of movzx */
761 8, /* "large" insn */
762 9, /* MOVE_RATIO */
763 4, /* cost for loading QImode using movzbl */
764 {3, 4, 3}, /* cost of loading integer registers
765 in QImode, HImode and SImode.
766 Relative to reg-reg move (2). */
767 {3, 4, 3}, /* cost of storing integer registers */
768 4, /* cost of reg,reg fld/fst */
769 {4, 4, 12}, /* cost of loading fp registers
770 in SFmode, DFmode and XFmode */
771 {6, 6, 8}, /* cost of storing fp registers
772 in SFmode, DFmode and XFmode */
773 2, /* cost of moving MMX register */
774 {3, 3}, /* cost of loading MMX registers
775 in SImode and DImode */
776 {4, 4}, /* cost of storing MMX registers
777 in SImode and DImode */
778 2, /* cost of moving SSE register */
779 {4, 3, 6}, /* cost of loading SSE registers
780 in SImode, DImode and TImode */
781 {4, 4, 5}, /* cost of storing SSE registers
782 in SImode, DImode and TImode */
783 5, /* MMX or SSE register to integer */
784 64, /* size of l1 cache. */
785 512, /* size of l2 cache. */
786 64, /* size of prefetch block */
787 /* New AMD processors never drop prefetches; if they cannot be performed
788 immediately, they are queued. We set number of simultaneous prefetches
789 to a large constant to reflect this (it probably is not a good idea not
790 to limit number of prefetches at all, as their execution also takes some
791 time). */
792 100, /* number of parallel prefetches */
793 3, /* Branch cost */
794 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
795 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
796 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
797 COSTS_N_INSNS (2), /* cost of FABS instruction. */
798 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
799 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
800
801 k8_memcpy,
802 k8_memset,
803 4, /* scalar_stmt_cost. */
804 2, /* scalar load_cost. */
805 2, /* scalar_store_cost. */
806 5, /* vec_stmt_cost. */
807 0, /* vec_to_scalar_cost. */
808 2, /* scalar_to_vec_cost. */
809 2, /* vec_align_load_cost. */
810 3, /* vec_unalign_load_cost. */
811 3, /* vec_store_cost. */
812 3, /* cond_taken_branch_cost. */
813 2, /* cond_not_taken_branch_cost. */
814 };
815
816 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
817 very small blocks it is better to use loop. For large blocks, libcall can
818 do nontemporary accesses and beat inline considerably. */
819 static stringop_algs amdfam10_memcpy[2] = {
820 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
821 {-1, rep_prefix_4_byte, false}}},
822 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
823 {-1, libcall, false}}}};
824 static stringop_algs amdfam10_memset[2] = {
825 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
826 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
827 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
828 {-1, libcall, false}}}};
829 struct processor_costs amdfam10_cost = {
830 COSTS_N_INSNS (1), /* cost of an add instruction */
831 COSTS_N_INSNS (2), /* cost of a lea instruction */
832 COSTS_N_INSNS (1), /* variable shift costs */
833 COSTS_N_INSNS (1), /* constant shift costs */
834 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
835 COSTS_N_INSNS (4), /* HI */
836 COSTS_N_INSNS (3), /* SI */
837 COSTS_N_INSNS (4), /* DI */
838 COSTS_N_INSNS (5)}, /* other */
839 0, /* cost of multiply per each bit set */
840 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
841 COSTS_N_INSNS (35), /* HI */
842 COSTS_N_INSNS (51), /* SI */
843 COSTS_N_INSNS (83), /* DI */
844 COSTS_N_INSNS (83)}, /* other */
845 COSTS_N_INSNS (1), /* cost of movsx */
846 COSTS_N_INSNS (1), /* cost of movzx */
847 8, /* "large" insn */
848 9, /* MOVE_RATIO */
849 4, /* cost for loading QImode using movzbl */
850 {3, 4, 3}, /* cost of loading integer registers
851 in QImode, HImode and SImode.
852 Relative to reg-reg move (2). */
853 {3, 4, 3}, /* cost of storing integer registers */
854 4, /* cost of reg,reg fld/fst */
855 {4, 4, 12}, /* cost of loading fp registers
856 in SFmode, DFmode and XFmode */
857 {6, 6, 8}, /* cost of storing fp registers
858 in SFmode, DFmode and XFmode */
859 2, /* cost of moving MMX register */
860 {3, 3}, /* cost of loading MMX registers
861 in SImode and DImode */
862 {4, 4}, /* cost of storing MMX registers
863 in SImode and DImode */
864 2, /* cost of moving SSE register */
865 {4, 4, 3}, /* cost of loading SSE registers
866 in SImode, DImode and TImode */
867 {4, 4, 5}, /* cost of storing SSE registers
868 in SImode, DImode and TImode */
869 3, /* MMX or SSE register to integer */
870 /* On K8:
871 MOVD reg64, xmmreg Double FSTORE 4
872 MOVD reg32, xmmreg Double FSTORE 4
873 On AMDFAM10:
874 MOVD reg64, xmmreg Double FADD 3
875 1/1 1/1
876 MOVD reg32, xmmreg Double FADD 3
877 1/1 1/1 */
878 64, /* size of l1 cache. */
879 512, /* size of l2 cache. */
880 64, /* size of prefetch block */
881 /* New AMD processors never drop prefetches; if they cannot be performed
882 immediately, they are queued. We set number of simultaneous prefetches
883 to a large constant to reflect this (it probably is not a good idea not
884 to limit number of prefetches at all, as their execution also takes some
885 time). */
886 100, /* number of parallel prefetches */
887 2, /* Branch cost */
888 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
889 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
890 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
891 COSTS_N_INSNS (2), /* cost of FABS instruction. */
892 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
893 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
894
895 amdfam10_memcpy,
896 amdfam10_memset,
897 4, /* scalar_stmt_cost. */
898 2, /* scalar load_cost. */
899 2, /* scalar_store_cost. */
900 6, /* vec_stmt_cost. */
901 0, /* vec_to_scalar_cost. */
902 2, /* scalar_to_vec_cost. */
903 2, /* vec_align_load_cost. */
904 2, /* vec_unalign_load_cost. */
905 2, /* vec_store_cost. */
906 2, /* cond_taken_branch_cost. */
907 1, /* cond_not_taken_branch_cost. */
908 };
909
910 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
911 very small blocks it is better to use loop. For large blocks, libcall
912 can do nontemporary accesses and beat inline considerably. */
913 static stringop_algs bdver1_memcpy[2] = {
914 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
915 {-1, rep_prefix_4_byte, false}}},
916 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
917 {-1, libcall, false}}}};
918 static stringop_algs bdver1_memset[2] = {
919 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
920 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
921 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
922 {-1, libcall, false}}}};
923
924 const struct processor_costs bdver1_cost = {
925 COSTS_N_INSNS (1), /* cost of an add instruction */
926 COSTS_N_INSNS (1), /* cost of a lea instruction */
927 COSTS_N_INSNS (1), /* variable shift costs */
928 COSTS_N_INSNS (1), /* constant shift costs */
929 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
930 COSTS_N_INSNS (4), /* HI */
931 COSTS_N_INSNS (4), /* SI */
932 COSTS_N_INSNS (6), /* DI */
933 COSTS_N_INSNS (6)}, /* other */
934 0, /* cost of multiply per each bit set */
935 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
936 COSTS_N_INSNS (35), /* HI */
937 COSTS_N_INSNS (51), /* SI */
938 COSTS_N_INSNS (83), /* DI */
939 COSTS_N_INSNS (83)}, /* other */
940 COSTS_N_INSNS (1), /* cost of movsx */
941 COSTS_N_INSNS (1), /* cost of movzx */
942 8, /* "large" insn */
943 9, /* MOVE_RATIO */
944 4, /* cost for loading QImode using movzbl */
945 {5, 5, 4}, /* cost of loading integer registers
946 in QImode, HImode and SImode.
947 Relative to reg-reg move (2). */
948 {4, 4, 4}, /* cost of storing integer registers */
949 2, /* cost of reg,reg fld/fst */
950 {5, 5, 12}, /* cost of loading fp registers
951 in SFmode, DFmode and XFmode */
952 {4, 4, 8}, /* cost of storing fp registers
953 in SFmode, DFmode and XFmode */
954 2, /* cost of moving MMX register */
955 {4, 4}, /* cost of loading MMX registers
956 in SImode and DImode */
957 {4, 4}, /* cost of storing MMX registers
958 in SImode and DImode */
959 2, /* cost of moving SSE register */
960 {4, 4, 4}, /* cost of loading SSE registers
961 in SImode, DImode and TImode */
962 {4, 4, 4}, /* cost of storing SSE registers
963 in SImode, DImode and TImode */
964 2, /* MMX or SSE register to integer */
965 /* On K8:
966 MOVD reg64, xmmreg Double FSTORE 4
967 MOVD reg32, xmmreg Double FSTORE 4
968 On AMDFAM10:
969 MOVD reg64, xmmreg Double FADD 3
970 1/1 1/1
971 MOVD reg32, xmmreg Double FADD 3
972 1/1 1/1 */
973 16, /* size of l1 cache. */
974 2048, /* size of l2 cache. */
975 64, /* size of prefetch block */
976 /* New AMD processors never drop prefetches; if they cannot be performed
977 immediately, they are queued. We set number of simultaneous prefetches
978 to a large constant to reflect this (it probably is not a good idea not
979 to limit number of prefetches at all, as their execution also takes some
980 time). */
981 100, /* number of parallel prefetches */
982 2, /* Branch cost */
983 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
984 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
985 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
986 COSTS_N_INSNS (2), /* cost of FABS instruction. */
987 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
988 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
989
990 bdver1_memcpy,
991 bdver1_memset,
992 6, /* scalar_stmt_cost. */
993 4, /* scalar load_cost. */
994 4, /* scalar_store_cost. */
995 6, /* vec_stmt_cost. */
996 0, /* vec_to_scalar_cost. */
997 2, /* scalar_to_vec_cost. */
998 4, /* vec_align_load_cost. */
999 4, /* vec_unalign_load_cost. */
1000 4, /* vec_store_cost. */
1001 2, /* cond_taken_branch_cost. */
1002 1, /* cond_not_taken_branch_cost. */
1003 };
1004
1005 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1006 very small blocks it is better to use loop. For large blocks, libcall
1007 can do nontemporary accesses and beat inline considerably. */
1008
1009 static stringop_algs bdver2_memcpy[2] = {
1010 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1011 {-1, rep_prefix_4_byte, false}}},
1012 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1013 {-1, libcall, false}}}};
1014 static stringop_algs bdver2_memset[2] = {
1015 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1016 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1017 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1018 {-1, libcall, false}}}};
1019
1020 const struct processor_costs bdver2_cost = {
1021 COSTS_N_INSNS (1), /* cost of an add instruction */
1022 COSTS_N_INSNS (1), /* cost of a lea instruction */
1023 COSTS_N_INSNS (1), /* variable shift costs */
1024 COSTS_N_INSNS (1), /* constant shift costs */
1025 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1026 COSTS_N_INSNS (4), /* HI */
1027 COSTS_N_INSNS (4), /* SI */
1028 COSTS_N_INSNS (6), /* DI */
1029 COSTS_N_INSNS (6)}, /* other */
1030 0, /* cost of multiply per each bit set */
1031 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1032 COSTS_N_INSNS (35), /* HI */
1033 COSTS_N_INSNS (51), /* SI */
1034 COSTS_N_INSNS (83), /* DI */
1035 COSTS_N_INSNS (83)}, /* other */
1036 COSTS_N_INSNS (1), /* cost of movsx */
1037 COSTS_N_INSNS (1), /* cost of movzx */
1038 8, /* "large" insn */
1039 9, /* MOVE_RATIO */
1040 4, /* cost for loading QImode using movzbl */
1041 {5, 5, 4}, /* cost of loading integer registers
1042 in QImode, HImode and SImode.
1043 Relative to reg-reg move (2). */
1044 {4, 4, 4}, /* cost of storing integer registers */
1045 2, /* cost of reg,reg fld/fst */
1046 {5, 5, 12}, /* cost of loading fp registers
1047 in SFmode, DFmode and XFmode */
1048 {4, 4, 8}, /* cost of storing fp registers
1049 in SFmode, DFmode and XFmode */
1050 2, /* cost of moving MMX register */
1051 {4, 4}, /* cost of loading MMX registers
1052 in SImode and DImode */
1053 {4, 4}, /* cost of storing MMX registers
1054 in SImode and DImode */
1055 2, /* cost of moving SSE register */
1056 {4, 4, 4}, /* cost of loading SSE registers
1057 in SImode, DImode and TImode */
1058 {4, 4, 4}, /* cost of storing SSE registers
1059 in SImode, DImode and TImode */
1060 2, /* MMX or SSE register to integer */
1061 /* On K8:
1062 MOVD reg64, xmmreg Double FSTORE 4
1063 MOVD reg32, xmmreg Double FSTORE 4
1064 On AMDFAM10:
1065 MOVD reg64, xmmreg Double FADD 3
1066 1/1 1/1
1067 MOVD reg32, xmmreg Double FADD 3
1068 1/1 1/1 */
1069 16, /* size of l1 cache. */
1070 2048, /* size of l2 cache. */
1071 64, /* size of prefetch block */
1072 /* New AMD processors never drop prefetches; if they cannot be performed
1073 immediately, they are queued. We set number of simultaneous prefetches
1074 to a large constant to reflect this (it probably is not a good idea not
1075 to limit number of prefetches at all, as their execution also takes some
1076 time). */
1077 100, /* number of parallel prefetches */
1078 2, /* Branch cost */
1079 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1080 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1081 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1082 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1083 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1084 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1085
1086 bdver2_memcpy,
1087 bdver2_memset,
1088 6, /* scalar_stmt_cost. */
1089 4, /* scalar load_cost. */
1090 4, /* scalar_store_cost. */
1091 6, /* vec_stmt_cost. */
1092 0, /* vec_to_scalar_cost. */
1093 2, /* scalar_to_vec_cost. */
1094 4, /* vec_align_load_cost. */
1095 4, /* vec_unalign_load_cost. */
1096 4, /* vec_store_cost. */
1097 2, /* cond_taken_branch_cost. */
1098 1, /* cond_not_taken_branch_cost. */
1099 };
1100
1101
1102 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1103 very small blocks it is better to use loop. For large blocks, libcall
1104 can do nontemporary accesses and beat inline considerably. */
1105 static stringop_algs bdver3_memcpy[2] = {
1106 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1107 {-1, rep_prefix_4_byte, false}}},
1108 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1109 {-1, libcall, false}}}};
1110 static stringop_algs bdver3_memset[2] = {
1111 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1112 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1113 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1114 {-1, libcall, false}}}};
1115 struct processor_costs bdver3_cost = {
1116 COSTS_N_INSNS (1), /* cost of an add instruction */
1117 COSTS_N_INSNS (1), /* cost of a lea instruction */
1118 COSTS_N_INSNS (1), /* variable shift costs */
1119 COSTS_N_INSNS (1), /* constant shift costs */
1120 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1121 COSTS_N_INSNS (4), /* HI */
1122 COSTS_N_INSNS (4), /* SI */
1123 COSTS_N_INSNS (6), /* DI */
1124 COSTS_N_INSNS (6)}, /* other */
1125 0, /* cost of multiply per each bit set */
1126 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1127 COSTS_N_INSNS (35), /* HI */
1128 COSTS_N_INSNS (51), /* SI */
1129 COSTS_N_INSNS (83), /* DI */
1130 COSTS_N_INSNS (83)}, /* other */
1131 COSTS_N_INSNS (1), /* cost of movsx */
1132 COSTS_N_INSNS (1), /* cost of movzx */
1133 8, /* "large" insn */
1134 9, /* MOVE_RATIO */
1135 4, /* cost for loading QImode using movzbl */
1136 {5, 5, 4}, /* cost of loading integer registers
1137 in QImode, HImode and SImode.
1138 Relative to reg-reg move (2). */
1139 {4, 4, 4}, /* cost of storing integer registers */
1140 2, /* cost of reg,reg fld/fst */
1141 {5, 5, 12}, /* cost of loading fp registers
1142 in SFmode, DFmode and XFmode */
1143 {4, 4, 8}, /* cost of storing fp registers
1144 in SFmode, DFmode and XFmode */
1145 2, /* cost of moving MMX register */
1146 {4, 4}, /* cost of loading MMX registers
1147 in SImode and DImode */
1148 {4, 4}, /* cost of storing MMX registers
1149 in SImode and DImode */
1150 2, /* cost of moving SSE register */
1151 {4, 4, 4}, /* cost of loading SSE registers
1152 in SImode, DImode and TImode */
1153 {4, 4, 4}, /* cost of storing SSE registers
1154 in SImode, DImode and TImode */
1155 2, /* MMX or SSE register to integer */
1156 16, /* size of l1 cache. */
1157 2048, /* size of l2 cache. */
1158 64, /* size of prefetch block */
1159 /* New AMD processors never drop prefetches; if they cannot be performed
1160 immediately, they are queued. We set number of simultaneous prefetches
1161 to a large constant to reflect this (it probably is not a good idea not
1162 to limit number of prefetches at all, as their execution also takes some
1163 time). */
1164 100, /* number of parallel prefetches */
1165 2, /* Branch cost */
1166 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1167 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1168 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1169 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1170 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1171 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1172
1173 bdver3_memcpy,
1174 bdver3_memset,
1175 6, /* scalar_stmt_cost. */
1176 4, /* scalar load_cost. */
1177 4, /* scalar_store_cost. */
1178 6, /* vec_stmt_cost. */
1179 0, /* vec_to_scalar_cost. */
1180 2, /* scalar_to_vec_cost. */
1181 4, /* vec_align_load_cost. */
1182 4, /* vec_unalign_load_cost. */
1183 4, /* vec_store_cost. */
1184 2, /* cond_taken_branch_cost. */
1185 1, /* cond_not_taken_branch_cost. */
1186 };
1187
1188 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1189 very small blocks it is better to use loop. For large blocks, libcall
1190 can do nontemporary accesses and beat inline considerably. */
1191 static stringop_algs bdver4_memcpy[2] = {
1192 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1193 {-1, rep_prefix_4_byte, false}}},
1194 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1195 {-1, libcall, false}}}};
1196 static stringop_algs bdver4_memset[2] = {
1197 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1198 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1199 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1200 {-1, libcall, false}}}};
1201 struct processor_costs bdver4_cost = {
1202 COSTS_N_INSNS (1), /* cost of an add instruction */
1203 COSTS_N_INSNS (1), /* cost of a lea instruction */
1204 COSTS_N_INSNS (1), /* variable shift costs */
1205 COSTS_N_INSNS (1), /* constant shift costs */
1206 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1207 COSTS_N_INSNS (4), /* HI */
1208 COSTS_N_INSNS (4), /* SI */
1209 COSTS_N_INSNS (6), /* DI */
1210 COSTS_N_INSNS (6)}, /* other */
1211 0, /* cost of multiply per each bit set */
1212 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1213 COSTS_N_INSNS (35), /* HI */
1214 COSTS_N_INSNS (51), /* SI */
1215 COSTS_N_INSNS (83), /* DI */
1216 COSTS_N_INSNS (83)}, /* other */
1217 COSTS_N_INSNS (1), /* cost of movsx */
1218 COSTS_N_INSNS (1), /* cost of movzx */
1219 8, /* "large" insn */
1220 9, /* MOVE_RATIO */
1221 4, /* cost for loading QImode using movzbl */
1222 {5, 5, 4}, /* cost of loading integer registers
1223 in QImode, HImode and SImode.
1224 Relative to reg-reg move (2). */
1225 {4, 4, 4}, /* cost of storing integer registers */
1226 2, /* cost of reg,reg fld/fst */
1227 {5, 5, 12}, /* cost of loading fp registers
1228 in SFmode, DFmode and XFmode */
1229 {4, 4, 8}, /* cost of storing fp registers
1230 in SFmode, DFmode and XFmode */
1231 2, /* cost of moving MMX register */
1232 {4, 4}, /* cost of loading MMX registers
1233 in SImode and DImode */
1234 {4, 4}, /* cost of storing MMX registers
1235 in SImode and DImode */
1236 2, /* cost of moving SSE register */
1237 {4, 4, 4}, /* cost of loading SSE registers
1238 in SImode, DImode and TImode */
1239 {4, 4, 4}, /* cost of storing SSE registers
1240 in SImode, DImode and TImode */
1241 2, /* MMX or SSE register to integer */
1242 16, /* size of l1 cache. */
1243 2048, /* size of l2 cache. */
1244 64, /* size of prefetch block */
1245 /* New AMD processors never drop prefetches; if they cannot be performed
1246 immediately, they are queued. We set number of simultaneous prefetches
1247 to a large constant to reflect this (it probably is not a good idea not
1248 to limit number of prefetches at all, as their execution also takes some
1249 time). */
1250 100, /* number of parallel prefetches */
1251 2, /* Branch cost */
1252 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1253 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1254 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1255 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1256 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1257 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1258
1259 bdver4_memcpy,
1260 bdver4_memset,
1261 6, /* scalar_stmt_cost. */
1262 4, /* scalar load_cost. */
1263 4, /* scalar_store_cost. */
1264 6, /* vec_stmt_cost. */
1265 0, /* vec_to_scalar_cost. */
1266 2, /* scalar_to_vec_cost. */
1267 4, /* vec_align_load_cost. */
1268 4, /* vec_unalign_load_cost. */
1269 4, /* vec_store_cost. */
1270 2, /* cond_taken_branch_cost. */
1271 1, /* cond_not_taken_branch_cost. */
1272 };
1273
1274 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1275 very small blocks it is better to use loop. For large blocks, libcall can
1276 do nontemporary accesses and beat inline considerably. */
1277 static stringop_algs btver1_memcpy[2] = {
1278 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1279 {-1, rep_prefix_4_byte, false}}},
1280 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1281 {-1, libcall, false}}}};
1282 static stringop_algs btver1_memset[2] = {
1283 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1284 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1285 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1286 {-1, libcall, false}}}};
1287 const struct processor_costs btver1_cost = {
1288 COSTS_N_INSNS (1), /* cost of an add instruction */
1289 COSTS_N_INSNS (2), /* cost of a lea instruction */
1290 COSTS_N_INSNS (1), /* variable shift costs */
1291 COSTS_N_INSNS (1), /* constant shift costs */
1292 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1293 COSTS_N_INSNS (4), /* HI */
1294 COSTS_N_INSNS (3), /* SI */
1295 COSTS_N_INSNS (4), /* DI */
1296 COSTS_N_INSNS (5)}, /* other */
1297 0, /* cost of multiply per each bit set */
1298 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1299 COSTS_N_INSNS (35), /* HI */
1300 COSTS_N_INSNS (51), /* SI */
1301 COSTS_N_INSNS (83), /* DI */
1302 COSTS_N_INSNS (83)}, /* other */
1303 COSTS_N_INSNS (1), /* cost of movsx */
1304 COSTS_N_INSNS (1), /* cost of movzx */
1305 8, /* "large" insn */
1306 9, /* MOVE_RATIO */
1307 4, /* cost for loading QImode using movzbl */
1308 {3, 4, 3}, /* cost of loading integer registers
1309 in QImode, HImode and SImode.
1310 Relative to reg-reg move (2). */
1311 {3, 4, 3}, /* cost of storing integer registers */
1312 4, /* cost of reg,reg fld/fst */
1313 {4, 4, 12}, /* cost of loading fp registers
1314 in SFmode, DFmode and XFmode */
1315 {6, 6, 8}, /* cost of storing fp registers
1316 in SFmode, DFmode and XFmode */
1317 2, /* cost of moving MMX register */
1318 {3, 3}, /* cost of loading MMX registers
1319 in SImode and DImode */
1320 {4, 4}, /* cost of storing MMX registers
1321 in SImode and DImode */
1322 2, /* cost of moving SSE register */
1323 {4, 4, 3}, /* cost of loading SSE registers
1324 in SImode, DImode and TImode */
1325 {4, 4, 5}, /* cost of storing SSE registers
1326 in SImode, DImode and TImode */
1327 3, /* MMX or SSE register to integer */
1328 /* On K8:
1329 MOVD reg64, xmmreg Double FSTORE 4
1330 MOVD reg32, xmmreg Double FSTORE 4
1331 On AMDFAM10:
1332 MOVD reg64, xmmreg Double FADD 3
1333 1/1 1/1
1334 MOVD reg32, xmmreg Double FADD 3
1335 1/1 1/1 */
1336 32, /* size of l1 cache. */
1337 512, /* size of l2 cache. */
1338 64, /* size of prefetch block */
1339 100, /* number of parallel prefetches */
1340 2, /* Branch cost */
1341 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1342 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1343 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1344 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1345 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1346 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1347
1348 btver1_memcpy,
1349 btver1_memset,
1350 4, /* scalar_stmt_cost. */
1351 2, /* scalar load_cost. */
1352 2, /* scalar_store_cost. */
1353 6, /* vec_stmt_cost. */
1354 0, /* vec_to_scalar_cost. */
1355 2, /* scalar_to_vec_cost. */
1356 2, /* vec_align_load_cost. */
1357 2, /* vec_unalign_load_cost. */
1358 2, /* vec_store_cost. */
1359 2, /* cond_taken_branch_cost. */
1360 1, /* cond_not_taken_branch_cost. */
1361 };
1362
1363 static stringop_algs btver2_memcpy[2] = {
1364 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1365 {-1, rep_prefix_4_byte, false}}},
1366 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1367 {-1, libcall, false}}}};
1368 static stringop_algs btver2_memset[2] = {
1369 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1370 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1371 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1372 {-1, libcall, false}}}};
1373 const struct processor_costs btver2_cost = {
1374 COSTS_N_INSNS (1), /* cost of an add instruction */
1375 COSTS_N_INSNS (2), /* cost of a lea instruction */
1376 COSTS_N_INSNS (1), /* variable shift costs */
1377 COSTS_N_INSNS (1), /* constant shift costs */
1378 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1379 COSTS_N_INSNS (4), /* HI */
1380 COSTS_N_INSNS (3), /* SI */
1381 COSTS_N_INSNS (4), /* DI */
1382 COSTS_N_INSNS (5)}, /* other */
1383 0, /* cost of multiply per each bit set */
1384 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1385 COSTS_N_INSNS (35), /* HI */
1386 COSTS_N_INSNS (51), /* SI */
1387 COSTS_N_INSNS (83), /* DI */
1388 COSTS_N_INSNS (83)}, /* other */
1389 COSTS_N_INSNS (1), /* cost of movsx */
1390 COSTS_N_INSNS (1), /* cost of movzx */
1391 8, /* "large" insn */
1392 9, /* MOVE_RATIO */
1393 4, /* cost for loading QImode using movzbl */
1394 {3, 4, 3}, /* cost of loading integer registers
1395 in QImode, HImode and SImode.
1396 Relative to reg-reg move (2). */
1397 {3, 4, 3}, /* cost of storing integer registers */
1398 4, /* cost of reg,reg fld/fst */
1399 {4, 4, 12}, /* cost of loading fp registers
1400 in SFmode, DFmode and XFmode */
1401 {6, 6, 8}, /* cost of storing fp registers
1402 in SFmode, DFmode and XFmode */
1403 2, /* cost of moving MMX register */
1404 {3, 3}, /* cost of loading MMX registers
1405 in SImode and DImode */
1406 {4, 4}, /* cost of storing MMX registers
1407 in SImode and DImode */
1408 2, /* cost of moving SSE register */
1409 {4, 4, 3}, /* cost of loading SSE registers
1410 in SImode, DImode and TImode */
1411 {4, 4, 5}, /* cost of storing SSE registers
1412 in SImode, DImode and TImode */
1413 3, /* MMX or SSE register to integer */
1414 /* On K8:
1415 MOVD reg64, xmmreg Double FSTORE 4
1416 MOVD reg32, xmmreg Double FSTORE 4
1417 On AMDFAM10:
1418 MOVD reg64, xmmreg Double FADD 3
1419 1/1 1/1
1420 MOVD reg32, xmmreg Double FADD 3
1421 1/1 1/1 */
1422 32, /* size of l1 cache. */
1423 2048, /* size of l2 cache. */
1424 64, /* size of prefetch block */
1425 100, /* number of parallel prefetches */
1426 2, /* Branch cost */
1427 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1428 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1429 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1430 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1431 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1432 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1433 btver2_memcpy,
1434 btver2_memset,
1435 4, /* scalar_stmt_cost. */
1436 2, /* scalar load_cost. */
1437 2, /* scalar_store_cost. */
1438 6, /* vec_stmt_cost. */
1439 0, /* vec_to_scalar_cost. */
1440 2, /* scalar_to_vec_cost. */
1441 2, /* vec_align_load_cost. */
1442 2, /* vec_unalign_load_cost. */
1443 2, /* vec_store_cost. */
1444 2, /* cond_taken_branch_cost. */
1445 1, /* cond_not_taken_branch_cost. */
1446 };
1447
1448 static stringop_algs pentium4_memcpy[2] = {
1449 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1450 DUMMY_STRINGOP_ALGS};
1451 static stringop_algs pentium4_memset[2] = {
1452 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1453 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1454 DUMMY_STRINGOP_ALGS};
1455
1456 static const
1457 struct processor_costs pentium4_cost = {
1458 COSTS_N_INSNS (1), /* cost of an add instruction */
1459 COSTS_N_INSNS (3), /* cost of a lea instruction */
1460 COSTS_N_INSNS (4), /* variable shift costs */
1461 COSTS_N_INSNS (4), /* constant shift costs */
1462 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1463 COSTS_N_INSNS (15), /* HI */
1464 COSTS_N_INSNS (15), /* SI */
1465 COSTS_N_INSNS (15), /* DI */
1466 COSTS_N_INSNS (15)}, /* other */
1467 0, /* cost of multiply per each bit set */
1468 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1469 COSTS_N_INSNS (56), /* HI */
1470 COSTS_N_INSNS (56), /* SI */
1471 COSTS_N_INSNS (56), /* DI */
1472 COSTS_N_INSNS (56)}, /* other */
1473 COSTS_N_INSNS (1), /* cost of movsx */
1474 COSTS_N_INSNS (1), /* cost of movzx */
1475 16, /* "large" insn */
1476 6, /* MOVE_RATIO */
1477 2, /* cost for loading QImode using movzbl */
1478 {4, 5, 4}, /* cost of loading integer registers
1479 in QImode, HImode and SImode.
1480 Relative to reg-reg move (2). */
1481 {2, 3, 2}, /* cost of storing integer registers */
1482 2, /* cost of reg,reg fld/fst */
1483 {2, 2, 6}, /* cost of loading fp registers
1484 in SFmode, DFmode and XFmode */
1485 {4, 4, 6}, /* cost of storing fp registers
1486 in SFmode, DFmode and XFmode */
1487 2, /* cost of moving MMX register */
1488 {2, 2}, /* cost of loading MMX registers
1489 in SImode and DImode */
1490 {2, 2}, /* cost of storing MMX registers
1491 in SImode and DImode */
1492 12, /* cost of moving SSE register */
1493 {12, 12, 12}, /* cost of loading SSE registers
1494 in SImode, DImode and TImode */
1495 {2, 2, 8}, /* cost of storing SSE registers
1496 in SImode, DImode and TImode */
1497 10, /* MMX or SSE register to integer */
1498 8, /* size of l1 cache. */
1499 256, /* size of l2 cache. */
1500 64, /* size of prefetch block */
1501 6, /* number of parallel prefetches */
1502 2, /* Branch cost */
1503 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1504 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1505 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1506 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1507 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1508 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1509 pentium4_memcpy,
1510 pentium4_memset,
1511 1, /* scalar_stmt_cost. */
1512 1, /* scalar load_cost. */
1513 1, /* scalar_store_cost. */
1514 1, /* vec_stmt_cost. */
1515 1, /* vec_to_scalar_cost. */
1516 1, /* scalar_to_vec_cost. */
1517 1, /* vec_align_load_cost. */
1518 2, /* vec_unalign_load_cost. */
1519 1, /* vec_store_cost. */
1520 3, /* cond_taken_branch_cost. */
1521 1, /* cond_not_taken_branch_cost. */
1522 };
1523
1524 static stringop_algs nocona_memcpy[2] = {
1525 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1526 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1527 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1528
1529 static stringop_algs nocona_memset[2] = {
1530 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1531 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1532 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1533 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1534
1535 static const
1536 struct processor_costs nocona_cost = {
1537 COSTS_N_INSNS (1), /* cost of an add instruction */
1538 COSTS_N_INSNS (1), /* cost of a lea instruction */
1539 COSTS_N_INSNS (1), /* variable shift costs */
1540 COSTS_N_INSNS (1), /* constant shift costs */
1541 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1542 COSTS_N_INSNS (10), /* HI */
1543 COSTS_N_INSNS (10), /* SI */
1544 COSTS_N_INSNS (10), /* DI */
1545 COSTS_N_INSNS (10)}, /* other */
1546 0, /* cost of multiply per each bit set */
1547 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1548 COSTS_N_INSNS (66), /* HI */
1549 COSTS_N_INSNS (66), /* SI */
1550 COSTS_N_INSNS (66), /* DI */
1551 COSTS_N_INSNS (66)}, /* other */
1552 COSTS_N_INSNS (1), /* cost of movsx */
1553 COSTS_N_INSNS (1), /* cost of movzx */
1554 16, /* "large" insn */
1555 17, /* MOVE_RATIO */
1556 4, /* cost for loading QImode using movzbl */
1557 {4, 4, 4}, /* cost of loading integer registers
1558 in QImode, HImode and SImode.
1559 Relative to reg-reg move (2). */
1560 {4, 4, 4}, /* cost of storing integer registers */
1561 3, /* cost of reg,reg fld/fst */
1562 {12, 12, 12}, /* cost of loading fp registers
1563 in SFmode, DFmode and XFmode */
1564 {4, 4, 4}, /* cost of storing fp registers
1565 in SFmode, DFmode and XFmode */
1566 6, /* cost of moving MMX register */
1567 {12, 12}, /* cost of loading MMX registers
1568 in SImode and DImode */
1569 {12, 12}, /* cost of storing MMX registers
1570 in SImode and DImode */
1571 6, /* cost of moving SSE register */
1572 {12, 12, 12}, /* cost of loading SSE registers
1573 in SImode, DImode and TImode */
1574 {12, 12, 12}, /* cost of storing SSE registers
1575 in SImode, DImode and TImode */
1576 8, /* MMX or SSE register to integer */
1577 8, /* size of l1 cache. */
1578 1024, /* size of l2 cache. */
1579 64, /* size of prefetch block */
1580 8, /* number of parallel prefetches */
1581 1, /* Branch cost */
1582 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1583 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1584 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1585 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1586 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1587 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1588 nocona_memcpy,
1589 nocona_memset,
1590 1, /* scalar_stmt_cost. */
1591 1, /* scalar load_cost. */
1592 1, /* scalar_store_cost. */
1593 1, /* vec_stmt_cost. */
1594 1, /* vec_to_scalar_cost. */
1595 1, /* scalar_to_vec_cost. */
1596 1, /* vec_align_load_cost. */
1597 2, /* vec_unalign_load_cost. */
1598 1, /* vec_store_cost. */
1599 3, /* cond_taken_branch_cost. */
1600 1, /* cond_not_taken_branch_cost. */
1601 };
1602
1603 static stringop_algs atom_memcpy[2] = {
1604 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1605 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1606 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1607 static stringop_algs atom_memset[2] = {
1608 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1609 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1610 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1611 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1612 static const
1613 struct processor_costs atom_cost = {
1614 COSTS_N_INSNS (1), /* cost of an add instruction */
1615 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1616 COSTS_N_INSNS (1), /* variable shift costs */
1617 COSTS_N_INSNS (1), /* constant shift costs */
1618 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1619 COSTS_N_INSNS (4), /* HI */
1620 COSTS_N_INSNS (3), /* SI */
1621 COSTS_N_INSNS (4), /* DI */
1622 COSTS_N_INSNS (2)}, /* other */
1623 0, /* cost of multiply per each bit set */
1624 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1625 COSTS_N_INSNS (26), /* HI */
1626 COSTS_N_INSNS (42), /* SI */
1627 COSTS_N_INSNS (74), /* DI */
1628 COSTS_N_INSNS (74)}, /* other */
1629 COSTS_N_INSNS (1), /* cost of movsx */
1630 COSTS_N_INSNS (1), /* cost of movzx */
1631 8, /* "large" insn */
1632 17, /* MOVE_RATIO */
1633 4, /* cost for loading QImode using movzbl */
1634 {4, 4, 4}, /* cost of loading integer registers
1635 in QImode, HImode and SImode.
1636 Relative to reg-reg move (2). */
1637 {4, 4, 4}, /* cost of storing integer registers */
1638 4, /* cost of reg,reg fld/fst */
1639 {12, 12, 12}, /* cost of loading fp registers
1640 in SFmode, DFmode and XFmode */
1641 {6, 6, 8}, /* cost of storing fp registers
1642 in SFmode, DFmode and XFmode */
1643 2, /* cost of moving MMX register */
1644 {8, 8}, /* cost of loading MMX registers
1645 in SImode and DImode */
1646 {8, 8}, /* cost of storing MMX registers
1647 in SImode and DImode */
1648 2, /* cost of moving SSE register */
1649 {8, 8, 8}, /* cost of loading SSE registers
1650 in SImode, DImode and TImode */
1651 {8, 8, 8}, /* cost of storing SSE registers
1652 in SImode, DImode and TImode */
1653 5, /* MMX or SSE register to integer */
1654 32, /* size of l1 cache. */
1655 256, /* size of l2 cache. */
1656 64, /* size of prefetch block */
1657 6, /* number of parallel prefetches */
1658 3, /* Branch cost */
1659 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1660 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1661 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1662 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1663 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1664 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1665 atom_memcpy,
1666 atom_memset,
1667 1, /* scalar_stmt_cost. */
1668 1, /* scalar load_cost. */
1669 1, /* scalar_store_cost. */
1670 1, /* vec_stmt_cost. */
1671 1, /* vec_to_scalar_cost. */
1672 1, /* scalar_to_vec_cost. */
1673 1, /* vec_align_load_cost. */
1674 2, /* vec_unalign_load_cost. */
1675 1, /* vec_store_cost. */
1676 3, /* cond_taken_branch_cost. */
1677 1, /* cond_not_taken_branch_cost. */
1678 };
1679
1680 static stringop_algs slm_memcpy[2] = {
1681 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1682 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1683 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1684 static stringop_algs slm_memset[2] = {
1685 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1686 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1687 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1688 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1689 static const
1690 struct processor_costs slm_cost = {
1691 COSTS_N_INSNS (1), /* cost of an add instruction */
1692 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1693 COSTS_N_INSNS (1), /* variable shift costs */
1694 COSTS_N_INSNS (1), /* constant shift costs */
1695 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1696 COSTS_N_INSNS (3), /* HI */
1697 COSTS_N_INSNS (3), /* SI */
1698 COSTS_N_INSNS (4), /* DI */
1699 COSTS_N_INSNS (2)}, /* other */
1700 0, /* cost of multiply per each bit set */
1701 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1702 COSTS_N_INSNS (26), /* HI */
1703 COSTS_N_INSNS (42), /* SI */
1704 COSTS_N_INSNS (74), /* DI */
1705 COSTS_N_INSNS (74)}, /* other */
1706 COSTS_N_INSNS (1), /* cost of movsx */
1707 COSTS_N_INSNS (1), /* cost of movzx */
1708 8, /* "large" insn */
1709 17, /* MOVE_RATIO */
1710 4, /* cost for loading QImode using movzbl */
1711 {4, 4, 4}, /* cost of loading integer registers
1712 in QImode, HImode and SImode.
1713 Relative to reg-reg move (2). */
1714 {4, 4, 4}, /* cost of storing integer registers */
1715 4, /* cost of reg,reg fld/fst */
1716 {12, 12, 12}, /* cost of loading fp registers
1717 in SFmode, DFmode and XFmode */
1718 {6, 6, 8}, /* cost of storing fp registers
1719 in SFmode, DFmode and XFmode */
1720 2, /* cost of moving MMX register */
1721 {8, 8}, /* cost of loading MMX registers
1722 in SImode and DImode */
1723 {8, 8}, /* cost of storing MMX registers
1724 in SImode and DImode */
1725 2, /* cost of moving SSE register */
1726 {8, 8, 8}, /* cost of loading SSE registers
1727 in SImode, DImode and TImode */
1728 {8, 8, 8}, /* cost of storing SSE registers
1729 in SImode, DImode and TImode */
1730 5, /* MMX or SSE register to integer */
1731 32, /* size of l1 cache. */
1732 256, /* size of l2 cache. */
1733 64, /* size of prefetch block */
1734 6, /* number of parallel prefetches */
1735 3, /* Branch cost */
1736 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1737 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1738 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1739 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1740 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1741 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1742 slm_memcpy,
1743 slm_memset,
1744 1, /* scalar_stmt_cost. */
1745 1, /* scalar load_cost. */
1746 1, /* scalar_store_cost. */
1747 1, /* vec_stmt_cost. */
1748 4, /* vec_to_scalar_cost. */
1749 1, /* scalar_to_vec_cost. */
1750 1, /* vec_align_load_cost. */
1751 2, /* vec_unalign_load_cost. */
1752 1, /* vec_store_cost. */
1753 3, /* cond_taken_branch_cost. */
1754 1, /* cond_not_taken_branch_cost. */
1755 };
1756
1757 static stringop_algs intel_memcpy[2] = {
1758 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1759 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1760 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1761 static stringop_algs intel_memset[2] = {
1762 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1763 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1764 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1765 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1766 static const
1767 struct processor_costs intel_cost = {
1768 COSTS_N_INSNS (1), /* cost of an add instruction */
1769 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1770 COSTS_N_INSNS (1), /* variable shift costs */
1771 COSTS_N_INSNS (1), /* constant shift costs */
1772 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1773 COSTS_N_INSNS (3), /* HI */
1774 COSTS_N_INSNS (3), /* SI */
1775 COSTS_N_INSNS (4), /* DI */
1776 COSTS_N_INSNS (2)}, /* other */
1777 0, /* cost of multiply per each bit set */
1778 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1779 COSTS_N_INSNS (26), /* HI */
1780 COSTS_N_INSNS (42), /* SI */
1781 COSTS_N_INSNS (74), /* DI */
1782 COSTS_N_INSNS (74)}, /* other */
1783 COSTS_N_INSNS (1), /* cost of movsx */
1784 COSTS_N_INSNS (1), /* cost of movzx */
1785 8, /* "large" insn */
1786 17, /* MOVE_RATIO */
1787 4, /* cost for loading QImode using movzbl */
1788 {4, 4, 4}, /* cost of loading integer registers
1789 in QImode, HImode and SImode.
1790 Relative to reg-reg move (2). */
1791 {4, 4, 4}, /* cost of storing integer registers */
1792 4, /* cost of reg,reg fld/fst */
1793 {12, 12, 12}, /* cost of loading fp registers
1794 in SFmode, DFmode and XFmode */
1795 {6, 6, 8}, /* cost of storing fp registers
1796 in SFmode, DFmode and XFmode */
1797 2, /* cost of moving MMX register */
1798 {8, 8}, /* cost of loading MMX registers
1799 in SImode and DImode */
1800 {8, 8}, /* cost of storing MMX registers
1801 in SImode and DImode */
1802 2, /* cost of moving SSE register */
1803 {8, 8, 8}, /* cost of loading SSE registers
1804 in SImode, DImode and TImode */
1805 {8, 8, 8}, /* cost of storing SSE registers
1806 in SImode, DImode and TImode */
1807 5, /* MMX or SSE register to integer */
1808 32, /* size of l1 cache. */
1809 256, /* size of l2 cache. */
1810 64, /* size of prefetch block */
1811 6, /* number of parallel prefetches */
1812 3, /* Branch cost */
1813 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1814 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1815 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1816 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1817 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1818 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1819 intel_memcpy,
1820 intel_memset,
1821 1, /* scalar_stmt_cost. */
1822 1, /* scalar load_cost. */
1823 1, /* scalar_store_cost. */
1824 1, /* vec_stmt_cost. */
1825 4, /* vec_to_scalar_cost. */
1826 1, /* scalar_to_vec_cost. */
1827 1, /* vec_align_load_cost. */
1828 2, /* vec_unalign_load_cost. */
1829 1, /* vec_store_cost. */
1830 3, /* cond_taken_branch_cost. */
1831 1, /* cond_not_taken_branch_cost. */
1832 };
1833
1834 /* Generic should produce code tuned for Core-i7 (and newer chips)
1835 and btver1 (and newer chips). */
1836
1837 static stringop_algs generic_memcpy[2] = {
1838 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1839 {-1, libcall, false}}},
1840 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1841 {-1, libcall, false}}}};
1842 static stringop_algs generic_memset[2] = {
1843 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1844 {-1, libcall, false}}},
1845 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1846 {-1, libcall, false}}}};
1847 static const
1848 struct processor_costs generic_cost = {
1849 COSTS_N_INSNS (1), /* cost of an add instruction */
1850 /* On all chips taken into consideration lea is 2 cycles and more. With
1851 this cost however our current implementation of synth_mult results in
1852 use of unnecessary temporary registers causing regression on several
1853 SPECfp benchmarks. */
1854 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1855 COSTS_N_INSNS (1), /* variable shift costs */
1856 COSTS_N_INSNS (1), /* constant shift costs */
1857 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1858 COSTS_N_INSNS (4), /* HI */
1859 COSTS_N_INSNS (3), /* SI */
1860 COSTS_N_INSNS (4), /* DI */
1861 COSTS_N_INSNS (2)}, /* other */
1862 0, /* cost of multiply per each bit set */
1863 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1864 COSTS_N_INSNS (26), /* HI */
1865 COSTS_N_INSNS (42), /* SI */
1866 COSTS_N_INSNS (74), /* DI */
1867 COSTS_N_INSNS (74)}, /* other */
1868 COSTS_N_INSNS (1), /* cost of movsx */
1869 COSTS_N_INSNS (1), /* cost of movzx */
1870 8, /* "large" insn */
1871 17, /* MOVE_RATIO */
1872 4, /* cost for loading QImode using movzbl */
1873 {4, 4, 4}, /* cost of loading integer registers
1874 in QImode, HImode and SImode.
1875 Relative to reg-reg move (2). */
1876 {4, 4, 4}, /* cost of storing integer registers */
1877 4, /* cost of reg,reg fld/fst */
1878 {12, 12, 12}, /* cost of loading fp registers
1879 in SFmode, DFmode and XFmode */
1880 {6, 6, 8}, /* cost of storing fp registers
1881 in SFmode, DFmode and XFmode */
1882 2, /* cost of moving MMX register */
1883 {8, 8}, /* cost of loading MMX registers
1884 in SImode and DImode */
1885 {8, 8}, /* cost of storing MMX registers
1886 in SImode and DImode */
1887 2, /* cost of moving SSE register */
1888 {8, 8, 8}, /* cost of loading SSE registers
1889 in SImode, DImode and TImode */
1890 {8, 8, 8}, /* cost of storing SSE registers
1891 in SImode, DImode and TImode */
1892 5, /* MMX or SSE register to integer */
1893 32, /* size of l1 cache. */
1894 512, /* size of l2 cache. */
1895 64, /* size of prefetch block */
1896 6, /* number of parallel prefetches */
1897 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1898 value is increased to perhaps more appropriate value of 5. */
1899 3, /* Branch cost */
1900 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1901 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1902 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1903 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1904 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1905 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1906 generic_memcpy,
1907 generic_memset,
1908 1, /* scalar_stmt_cost. */
1909 1, /* scalar load_cost. */
1910 1, /* scalar_store_cost. */
1911 1, /* vec_stmt_cost. */
1912 1, /* vec_to_scalar_cost. */
1913 1, /* scalar_to_vec_cost. */
1914 1, /* vec_align_load_cost. */
1915 2, /* vec_unalign_load_cost. */
1916 1, /* vec_store_cost. */
1917 3, /* cond_taken_branch_cost. */
1918 1, /* cond_not_taken_branch_cost. */
1919 };
1920
1921 /* core_cost should produce code tuned for Core familly of CPUs. */
1922 static stringop_algs core_memcpy[2] = {
1923 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1924 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1925 {-1, libcall, false}}}};
1926 static stringop_algs core_memset[2] = {
1927 {libcall, {{6, loop_1_byte, true},
1928 {24, loop, true},
1929 {8192, rep_prefix_4_byte, true},
1930 {-1, libcall, false}}},
1931 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1932 {-1, libcall, false}}}};
1933
1934 static const
1935 struct processor_costs core_cost = {
1936 COSTS_N_INSNS (1), /* cost of an add instruction */
1937 /* On all chips taken into consideration lea is 2 cycles and more. With
1938 this cost however our current implementation of synth_mult results in
1939 use of unnecessary temporary registers causing regression on several
1940 SPECfp benchmarks. */
1941 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1942 COSTS_N_INSNS (1), /* variable shift costs */
1943 COSTS_N_INSNS (1), /* constant shift costs */
1944 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1945 COSTS_N_INSNS (4), /* HI */
1946 COSTS_N_INSNS (3), /* SI */
1947 COSTS_N_INSNS (4), /* DI */
1948 COSTS_N_INSNS (2)}, /* other */
1949 0, /* cost of multiply per each bit set */
1950 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1951 COSTS_N_INSNS (26), /* HI */
1952 COSTS_N_INSNS (42), /* SI */
1953 COSTS_N_INSNS (74), /* DI */
1954 COSTS_N_INSNS (74)}, /* other */
1955 COSTS_N_INSNS (1), /* cost of movsx */
1956 COSTS_N_INSNS (1), /* cost of movzx */
1957 8, /* "large" insn */
1958 17, /* MOVE_RATIO */
1959 4, /* cost for loading QImode using movzbl */
1960 {4, 4, 4}, /* cost of loading integer registers
1961 in QImode, HImode and SImode.
1962 Relative to reg-reg move (2). */
1963 {4, 4, 4}, /* cost of storing integer registers */
1964 4, /* cost of reg,reg fld/fst */
1965 {12, 12, 12}, /* cost of loading fp registers
1966 in SFmode, DFmode and XFmode */
1967 {6, 6, 8}, /* cost of storing fp registers
1968 in SFmode, DFmode and XFmode */
1969 2, /* cost of moving MMX register */
1970 {8, 8}, /* cost of loading MMX registers
1971 in SImode and DImode */
1972 {8, 8}, /* cost of storing MMX registers
1973 in SImode and DImode */
1974 2, /* cost of moving SSE register */
1975 {8, 8, 8}, /* cost of loading SSE registers
1976 in SImode, DImode and TImode */
1977 {8, 8, 8}, /* cost of storing SSE registers
1978 in SImode, DImode and TImode */
1979 5, /* MMX or SSE register to integer */
1980 64, /* size of l1 cache. */
1981 512, /* size of l2 cache. */
1982 64, /* size of prefetch block */
1983 6, /* number of parallel prefetches */
1984 /* FIXME perhaps more appropriate value is 5. */
1985 3, /* Branch cost */
1986 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1987 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1988 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1989 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1990 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1991 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1992 core_memcpy,
1993 core_memset,
1994 1, /* scalar_stmt_cost. */
1995 1, /* scalar load_cost. */
1996 1, /* scalar_store_cost. */
1997 1, /* vec_stmt_cost. */
1998 1, /* vec_to_scalar_cost. */
1999 1, /* scalar_to_vec_cost. */
2000 1, /* vec_align_load_cost. */
2001 2, /* vec_unalign_load_cost. */
2002 1, /* vec_store_cost. */
2003 3, /* cond_taken_branch_cost. */
2004 1, /* cond_not_taken_branch_cost. */
2005 };
2006
2007
2008 /* Set by -mtune. */
2009 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2010
2011 /* Set by -mtune or -Os. */
2012 const struct processor_costs *ix86_cost = &pentium_cost;
2013
2014 /* Processor feature/optimization bitmasks. */
2015 #define m_386 (1<<PROCESSOR_I386)
2016 #define m_486 (1<<PROCESSOR_I486)
2017 #define m_PENT (1<<PROCESSOR_PENTIUM)
2018 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2019 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2020 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2021 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2022 #define m_CORE2 (1<<PROCESSOR_CORE2)
2023 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2024 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2025 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2026 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2027 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2028 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2029 #define m_INTEL (1<<PROCESSOR_INTEL)
2030
2031 #define m_GEODE (1<<PROCESSOR_GEODE)
2032 #define m_K6 (1<<PROCESSOR_K6)
2033 #define m_K6_GEODE (m_K6 | m_GEODE)
2034 #define m_K8 (1<<PROCESSOR_K8)
2035 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2036 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2037 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2038 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2039 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2040 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2041 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2042 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2043 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2044 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2045 #define m_BTVER (m_BTVER1 | m_BTVER2)
2046 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2047
2048 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2049
2050 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2051 #undef DEF_TUNE
2052 #define DEF_TUNE(tune, name, selector) name,
2053 #include "x86-tune.def"
2054 #undef DEF_TUNE
2055 };
2056
2057 /* Feature tests against the various tunings. */
2058 unsigned char ix86_tune_features[X86_TUNE_LAST];
2059
2060 /* Feature tests against the various tunings used to create ix86_tune_features
2061 based on the processor mask. */
2062 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2063 #undef DEF_TUNE
2064 #define DEF_TUNE(tune, name, selector) selector,
2065 #include "x86-tune.def"
2066 #undef DEF_TUNE
2067 };
2068
2069 /* Feature tests against the various architecture variations. */
2070 unsigned char ix86_arch_features[X86_ARCH_LAST];
2071
2072 /* Feature tests against the various architecture variations, used to create
2073 ix86_arch_features based on the processor mask. */
2074 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2075 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2076 ~(m_386 | m_486 | m_PENT | m_K6),
2077
2078 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2079 ~m_386,
2080
2081 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2082 ~(m_386 | m_486),
2083
2084 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2085 ~m_386,
2086
2087 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2088 ~m_386,
2089 };
2090
2091 /* In case the average insn count for single function invocation is
2092 lower than this constant, emit fast (but longer) prologue and
2093 epilogue code. */
2094 #define FAST_PROLOGUE_INSN_COUNT 20
2095
2096 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2097 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2098 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2099 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2100
2101 /* Array of the smallest class containing reg number REGNO, indexed by
2102 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2103
2104 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2105 {
2106 /* ax, dx, cx, bx */
2107 AREG, DREG, CREG, BREG,
2108 /* si, di, bp, sp */
2109 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2110 /* FP registers */
2111 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2112 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2113 /* arg pointer */
2114 NON_Q_REGS,
2115 /* flags, fpsr, fpcr, frame */
2116 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2117 /* SSE registers */
2118 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2119 SSE_REGS, SSE_REGS,
2120 /* MMX registers */
2121 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2122 MMX_REGS, MMX_REGS,
2123 /* REX registers */
2124 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2125 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2126 /* SSE REX registers */
2127 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2128 SSE_REGS, SSE_REGS,
2129 /* AVX-512 SSE registers */
2130 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2131 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2132 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2133 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2134 /* Mask registers. */
2135 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2136 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2137 };
2138
2139 /* The "default" register map used in 32bit mode. */
2140
2141 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2142 {
2143 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2144 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2145 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2146 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2147 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2148 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2149 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2150 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2151 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2152 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2153 };
2154
2155 /* The "default" register map used in 64bit mode. */
2156
2157 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2158 {
2159 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2160 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2161 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2162 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2163 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2164 8,9,10,11,12,13,14,15, /* extended integer registers */
2165 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2166 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2167 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2168 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2169 };
2170
2171 /* Define the register numbers to be used in Dwarf debugging information.
2172 The SVR4 reference port C compiler uses the following register numbers
2173 in its Dwarf output code:
2174 0 for %eax (gcc regno = 0)
2175 1 for %ecx (gcc regno = 2)
2176 2 for %edx (gcc regno = 1)
2177 3 for %ebx (gcc regno = 3)
2178 4 for %esp (gcc regno = 7)
2179 5 for %ebp (gcc regno = 6)
2180 6 for %esi (gcc regno = 4)
2181 7 for %edi (gcc regno = 5)
2182 The following three DWARF register numbers are never generated by
2183 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2184 believes these numbers have these meanings.
2185 8 for %eip (no gcc equivalent)
2186 9 for %eflags (gcc regno = 17)
2187 10 for %trapno (no gcc equivalent)
2188 It is not at all clear how we should number the FP stack registers
2189 for the x86 architecture. If the version of SDB on x86/svr4 were
2190 a bit less brain dead with respect to floating-point then we would
2191 have a precedent to follow with respect to DWARF register numbers
2192 for x86 FP registers, but the SDB on x86/svr4 is so completely
2193 broken with respect to FP registers that it is hardly worth thinking
2194 of it as something to strive for compatibility with.
2195 The version of x86/svr4 SDB I have at the moment does (partially)
2196 seem to believe that DWARF register number 11 is associated with
2197 the x86 register %st(0), but that's about all. Higher DWARF
2198 register numbers don't seem to be associated with anything in
2199 particular, and even for DWARF regno 11, SDB only seems to under-
2200 stand that it should say that a variable lives in %st(0) (when
2201 asked via an `=' command) if we said it was in DWARF regno 11,
2202 but SDB still prints garbage when asked for the value of the
2203 variable in question (via a `/' command).
2204 (Also note that the labels SDB prints for various FP stack regs
2205 when doing an `x' command are all wrong.)
2206 Note that these problems generally don't affect the native SVR4
2207 C compiler because it doesn't allow the use of -O with -g and
2208 because when it is *not* optimizing, it allocates a memory
2209 location for each floating-point variable, and the memory
2210 location is what gets described in the DWARF AT_location
2211 attribute for the variable in question.
2212 Regardless of the severe mental illness of the x86/svr4 SDB, we
2213 do something sensible here and we use the following DWARF
2214 register numbers. Note that these are all stack-top-relative
2215 numbers.
2216 11 for %st(0) (gcc regno = 8)
2217 12 for %st(1) (gcc regno = 9)
2218 13 for %st(2) (gcc regno = 10)
2219 14 for %st(3) (gcc regno = 11)
2220 15 for %st(4) (gcc regno = 12)
2221 16 for %st(5) (gcc regno = 13)
2222 17 for %st(6) (gcc regno = 14)
2223 18 for %st(7) (gcc regno = 15)
2224 */
2225 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2226 {
2227 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2228 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2229 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2230 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2231 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2232 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2233 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2234 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2235 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2236 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2237 };
2238
2239 /* Define parameter passing and return registers. */
2240
2241 static int const x86_64_int_parameter_registers[6] =
2242 {
2243 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2244 };
2245
2246 static int const x86_64_ms_abi_int_parameter_registers[4] =
2247 {
2248 CX_REG, DX_REG, R8_REG, R9_REG
2249 };
2250
2251 static int const x86_64_int_return_registers[4] =
2252 {
2253 AX_REG, DX_REG, DI_REG, SI_REG
2254 };
2255
2256 /* Additional registers that are clobbered by SYSV calls. */
2257
2258 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2259 {
2260 SI_REG, DI_REG,
2261 XMM6_REG, XMM7_REG,
2262 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2263 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2264 };
2265
2266 /* Define the structure for the machine field in struct function. */
2267
2268 struct GTY(()) stack_local_entry {
2269 unsigned short mode;
2270 unsigned short n;
2271 rtx rtl;
2272 struct stack_local_entry *next;
2273 };
2274
2275 /* Structure describing stack frame layout.
2276 Stack grows downward:
2277
2278 [arguments]
2279 <- ARG_POINTER
2280 saved pc
2281
2282 saved static chain if ix86_static_chain_on_stack
2283
2284 saved frame pointer if frame_pointer_needed
2285 <- HARD_FRAME_POINTER
2286 [saved regs]
2287 <- regs_save_offset
2288 [padding0]
2289
2290 [saved SSE regs]
2291 <- sse_regs_save_offset
2292 [padding1] |
2293 | <- FRAME_POINTER
2294 [va_arg registers] |
2295 |
2296 [frame] |
2297 |
2298 [padding2] | = to_allocate
2299 <- STACK_POINTER
2300 */
2301 struct ix86_frame
2302 {
2303 int nsseregs;
2304 int nregs;
2305 int va_arg_size;
2306 int red_zone_size;
2307 int outgoing_arguments_size;
2308
2309 /* The offsets relative to ARG_POINTER. */
2310 HOST_WIDE_INT frame_pointer_offset;
2311 HOST_WIDE_INT hard_frame_pointer_offset;
2312 HOST_WIDE_INT stack_pointer_offset;
2313 HOST_WIDE_INT hfp_save_offset;
2314 HOST_WIDE_INT reg_save_offset;
2315 HOST_WIDE_INT sse_reg_save_offset;
2316
2317 /* When save_regs_using_mov is set, emit prologue using
2318 move instead of push instructions. */
2319 bool save_regs_using_mov;
2320 };
2321
2322 /* Which cpu are we scheduling for. */
2323 enum attr_cpu ix86_schedule;
2324
2325 /* Which cpu are we optimizing for. */
2326 enum processor_type ix86_tune;
2327
2328 /* Which instruction set architecture to use. */
2329 enum processor_type ix86_arch;
2330
2331 /* True if processor has SSE prefetch instruction. */
2332 unsigned char x86_prefetch_sse;
2333
2334 /* -mstackrealign option */
2335 static const char ix86_force_align_arg_pointer_string[]
2336 = "force_align_arg_pointer";
2337
2338 static rtx (*ix86_gen_leave) (void);
2339 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2340 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2341 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2342 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2343 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2344 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2345 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2346 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2347 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2348 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2349 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2350
2351 /* Preferred alignment for stack boundary in bits. */
2352 unsigned int ix86_preferred_stack_boundary;
2353
2354 /* Alignment for incoming stack boundary in bits specified at
2355 command line. */
2356 static unsigned int ix86_user_incoming_stack_boundary;
2357
2358 /* Default alignment for incoming stack boundary in bits. */
2359 static unsigned int ix86_default_incoming_stack_boundary;
2360
2361 /* Alignment for incoming stack boundary in bits. */
2362 unsigned int ix86_incoming_stack_boundary;
2363
2364 /* Calling abi specific va_list type nodes. */
2365 static GTY(()) tree sysv_va_list_type_node;
2366 static GTY(()) tree ms_va_list_type_node;
2367
2368 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2369 char internal_label_prefix[16];
2370 int internal_label_prefix_len;
2371
2372 /* Fence to use after loop using movnt. */
2373 tree x86_mfence;
2374
2375 /* Register class used for passing given 64bit part of the argument.
2376 These represent classes as documented by the PS ABI, with the exception
2377 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2378 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2379
2380 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2381 whenever possible (upper half does contain padding). */
2382 enum x86_64_reg_class
2383 {
2384 X86_64_NO_CLASS,
2385 X86_64_INTEGER_CLASS,
2386 X86_64_INTEGERSI_CLASS,
2387 X86_64_SSE_CLASS,
2388 X86_64_SSESF_CLASS,
2389 X86_64_SSEDF_CLASS,
2390 X86_64_SSEUP_CLASS,
2391 X86_64_X87_CLASS,
2392 X86_64_X87UP_CLASS,
2393 X86_64_COMPLEX_X87_CLASS,
2394 X86_64_MEMORY_CLASS
2395 };
2396
2397 #define MAX_CLASSES 8
2398
2399 /* Table of constants used by fldpi, fldln2, etc.... */
2400 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2401 static bool ext_80387_constants_init = 0;
2402
2403 \f
2404 static struct machine_function * ix86_init_machine_status (void);
2405 static rtx ix86_function_value (const_tree, const_tree, bool);
2406 static bool ix86_function_value_regno_p (const unsigned int);
2407 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2408 const_tree);
2409 static rtx ix86_static_chain (const_tree, bool);
2410 static int ix86_function_regparm (const_tree, const_tree);
2411 static void ix86_compute_frame_layout (struct ix86_frame *);
2412 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2413 rtx, rtx, int);
2414 static void ix86_add_new_builtins (HOST_WIDE_INT);
2415 static tree ix86_canonical_va_list_type (tree);
2416 static void predict_jump (int);
2417 static unsigned int split_stack_prologue_scratch_regno (void);
2418 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2419
2420 enum ix86_function_specific_strings
2421 {
2422 IX86_FUNCTION_SPECIFIC_ARCH,
2423 IX86_FUNCTION_SPECIFIC_TUNE,
2424 IX86_FUNCTION_SPECIFIC_MAX
2425 };
2426
2427 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2428 const char *, enum fpmath_unit, bool);
2429 static void ix86_function_specific_save (struct cl_target_option *,
2430 struct gcc_options *opts);
2431 static void ix86_function_specific_restore (struct gcc_options *opts,
2432 struct cl_target_option *);
2433 static void ix86_function_specific_print (FILE *, int,
2434 struct cl_target_option *);
2435 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2436 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2437 struct gcc_options *,
2438 struct gcc_options *,
2439 struct gcc_options *);
2440 static bool ix86_can_inline_p (tree, tree);
2441 static void ix86_set_current_function (tree);
2442 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2443
2444 static enum calling_abi ix86_function_abi (const_tree);
2445
2446 \f
2447 #ifndef SUBTARGET32_DEFAULT_CPU
2448 #define SUBTARGET32_DEFAULT_CPU "i386"
2449 #endif
2450
2451 /* Whether -mtune= or -march= were specified */
2452 static int ix86_tune_defaulted;
2453 static int ix86_arch_specified;
2454
2455 /* Vectorization library interface and handlers. */
2456 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2457
2458 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2459 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2460
2461 /* Processor target table, indexed by processor number */
2462 struct ptt
2463 {
2464 const char *const name; /* processor name */
2465 const struct processor_costs *cost; /* Processor costs */
2466 const int align_loop; /* Default alignments. */
2467 const int align_loop_max_skip;
2468 const int align_jump;
2469 const int align_jump_max_skip;
2470 const int align_func;
2471 };
2472
2473 /* This table must be in sync with enum processor_type in i386.h. */
2474 static const struct ptt processor_target_table[PROCESSOR_max] =
2475 {
2476 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2477 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2478 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2479 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2480 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2481 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2482 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2483 {"core2", &core_cost, 16, 10, 16, 10, 16},
2484 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2485 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2486 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2487 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2488 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2489 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2490 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2491 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2492 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2493 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2494 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2495 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2496 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2497 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2498 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2499 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2500 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2501 };
2502 \f
2503 static unsigned int
2504 rest_of_handle_insert_vzeroupper (void)
2505 {
2506 int i;
2507
2508 /* vzeroupper instructions are inserted immediately after reload to
2509 account for possible spills from 256bit registers. The pass
2510 reuses mode switching infrastructure by re-running mode insertion
2511 pass, so disable entities that have already been processed. */
2512 for (i = 0; i < MAX_386_ENTITIES; i++)
2513 ix86_optimize_mode_switching[i] = 0;
2514
2515 ix86_optimize_mode_switching[AVX_U128] = 1;
2516
2517 /* Call optimize_mode_switching. */
2518 g->get_passes ()->execute_pass_mode_switching ();
2519 return 0;
2520 }
2521
2522 namespace {
2523
2524 const pass_data pass_data_insert_vzeroupper =
2525 {
2526 RTL_PASS, /* type */
2527 "vzeroupper", /* name */
2528 OPTGROUP_NONE, /* optinfo_flags */
2529 TV_NONE, /* tv_id */
2530 0, /* properties_required */
2531 0, /* properties_provided */
2532 0, /* properties_destroyed */
2533 0, /* todo_flags_start */
2534 TODO_df_finish, /* todo_flags_finish */
2535 };
2536
2537 class pass_insert_vzeroupper : public rtl_opt_pass
2538 {
2539 public:
2540 pass_insert_vzeroupper(gcc::context *ctxt)
2541 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2542 {}
2543
2544 /* opt_pass methods: */
2545 virtual bool gate (function *)
2546 {
2547 return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER;
2548 }
2549
2550 virtual unsigned int execute (function *)
2551 {
2552 return rest_of_handle_insert_vzeroupper ();
2553 }
2554
2555 }; // class pass_insert_vzeroupper
2556
2557 } // anon namespace
2558
2559 rtl_opt_pass *
2560 make_pass_insert_vzeroupper (gcc::context *ctxt)
2561 {
2562 return new pass_insert_vzeroupper (ctxt);
2563 }
2564
2565 /* Return true if a red-zone is in use. */
2566
2567 static inline bool
2568 ix86_using_red_zone (void)
2569 {
2570 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2571 }
2572 \f
2573 /* Return a string that documents the current -m options. The caller is
2574 responsible for freeing the string. */
2575
2576 static char *
2577 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2578 const char *tune, enum fpmath_unit fpmath,
2579 bool add_nl_p)
2580 {
2581 struct ix86_target_opts
2582 {
2583 const char *option; /* option string */
2584 HOST_WIDE_INT mask; /* isa mask options */
2585 };
2586
2587 /* This table is ordered so that options like -msse4.2 that imply
2588 preceding options while match those first. */
2589 static struct ix86_target_opts isa_opts[] =
2590 {
2591 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2592 { "-mfma", OPTION_MASK_ISA_FMA },
2593 { "-mxop", OPTION_MASK_ISA_XOP },
2594 { "-mlwp", OPTION_MASK_ISA_LWP },
2595 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2596 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2597 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2598 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2599 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2600 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
2601 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
2602 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2603 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2604 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2605 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2606 { "-msse3", OPTION_MASK_ISA_SSE3 },
2607 { "-msse2", OPTION_MASK_ISA_SSE2 },
2608 { "-msse", OPTION_MASK_ISA_SSE },
2609 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2610 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2611 { "-mmmx", OPTION_MASK_ISA_MMX },
2612 { "-mabm", OPTION_MASK_ISA_ABM },
2613 { "-mbmi", OPTION_MASK_ISA_BMI },
2614 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2615 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2616 { "-mhle", OPTION_MASK_ISA_HLE },
2617 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2618 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2619 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2620 { "-madx", OPTION_MASK_ISA_ADX },
2621 { "-mtbm", OPTION_MASK_ISA_TBM },
2622 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2623 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2624 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2625 { "-maes", OPTION_MASK_ISA_AES },
2626 { "-msha", OPTION_MASK_ISA_SHA },
2627 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2628 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2629 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2630 { "-mf16c", OPTION_MASK_ISA_F16C },
2631 { "-mrtm", OPTION_MASK_ISA_RTM },
2632 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2633 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2634 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2635 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2636 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2637 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2638 };
2639
2640 /* Flag options. */
2641 static struct ix86_target_opts flag_opts[] =
2642 {
2643 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2644 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2645 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2646 { "-m80387", MASK_80387 },
2647 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2648 { "-malign-double", MASK_ALIGN_DOUBLE },
2649 { "-mcld", MASK_CLD },
2650 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2651 { "-mieee-fp", MASK_IEEE_FP },
2652 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2653 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2654 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2655 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2656 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2657 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2658 { "-mno-red-zone", MASK_NO_RED_ZONE },
2659 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2660 { "-mrecip", MASK_RECIP },
2661 { "-mrtd", MASK_RTD },
2662 { "-msseregparm", MASK_SSEREGPARM },
2663 { "-mstack-arg-probe", MASK_STACK_PROBE },
2664 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2665 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2666 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2667 { "-mvzeroupper", MASK_VZEROUPPER },
2668 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2669 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2670 { "-mprefer-avx128", MASK_PREFER_AVX128},
2671 };
2672
2673 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2674
2675 char isa_other[40];
2676 char target_other[40];
2677 unsigned num = 0;
2678 unsigned i, j;
2679 char *ret;
2680 char *ptr;
2681 size_t len;
2682 size_t line_len;
2683 size_t sep_len;
2684 const char *abi;
2685
2686 memset (opts, '\0', sizeof (opts));
2687
2688 /* Add -march= option. */
2689 if (arch)
2690 {
2691 opts[num][0] = "-march=";
2692 opts[num++][1] = arch;
2693 }
2694
2695 /* Add -mtune= option. */
2696 if (tune)
2697 {
2698 opts[num][0] = "-mtune=";
2699 opts[num++][1] = tune;
2700 }
2701
2702 /* Add -m32/-m64/-mx32. */
2703 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2704 {
2705 if ((isa & OPTION_MASK_ABI_64) != 0)
2706 abi = "-m64";
2707 else
2708 abi = "-mx32";
2709 isa &= ~ (OPTION_MASK_ISA_64BIT
2710 | OPTION_MASK_ABI_64
2711 | OPTION_MASK_ABI_X32);
2712 }
2713 else
2714 abi = "-m32";
2715 opts[num++][0] = abi;
2716
2717 /* Pick out the options in isa options. */
2718 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2719 {
2720 if ((isa & isa_opts[i].mask) != 0)
2721 {
2722 opts[num++][0] = isa_opts[i].option;
2723 isa &= ~ isa_opts[i].mask;
2724 }
2725 }
2726
2727 if (isa && add_nl_p)
2728 {
2729 opts[num++][0] = isa_other;
2730 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2731 isa);
2732 }
2733
2734 /* Add flag options. */
2735 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2736 {
2737 if ((flags & flag_opts[i].mask) != 0)
2738 {
2739 opts[num++][0] = flag_opts[i].option;
2740 flags &= ~ flag_opts[i].mask;
2741 }
2742 }
2743
2744 if (flags && add_nl_p)
2745 {
2746 opts[num++][0] = target_other;
2747 sprintf (target_other, "(other flags: %#x)", flags);
2748 }
2749
2750 /* Add -fpmath= option. */
2751 if (fpmath)
2752 {
2753 opts[num][0] = "-mfpmath=";
2754 switch ((int) fpmath)
2755 {
2756 case FPMATH_387:
2757 opts[num++][1] = "387";
2758 break;
2759
2760 case FPMATH_SSE:
2761 opts[num++][1] = "sse";
2762 break;
2763
2764 case FPMATH_387 | FPMATH_SSE:
2765 opts[num++][1] = "sse+387";
2766 break;
2767
2768 default:
2769 gcc_unreachable ();
2770 }
2771 }
2772
2773 /* Any options? */
2774 if (num == 0)
2775 return NULL;
2776
2777 gcc_assert (num < ARRAY_SIZE (opts));
2778
2779 /* Size the string. */
2780 len = 0;
2781 sep_len = (add_nl_p) ? 3 : 1;
2782 for (i = 0; i < num; i++)
2783 {
2784 len += sep_len;
2785 for (j = 0; j < 2; j++)
2786 if (opts[i][j])
2787 len += strlen (opts[i][j]);
2788 }
2789
2790 /* Build the string. */
2791 ret = ptr = (char *) xmalloc (len);
2792 line_len = 0;
2793
2794 for (i = 0; i < num; i++)
2795 {
2796 size_t len2[2];
2797
2798 for (j = 0; j < 2; j++)
2799 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2800
2801 if (i != 0)
2802 {
2803 *ptr++ = ' ';
2804 line_len++;
2805
2806 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2807 {
2808 *ptr++ = '\\';
2809 *ptr++ = '\n';
2810 line_len = 0;
2811 }
2812 }
2813
2814 for (j = 0; j < 2; j++)
2815 if (opts[i][j])
2816 {
2817 memcpy (ptr, opts[i][j], len2[j]);
2818 ptr += len2[j];
2819 line_len += len2[j];
2820 }
2821 }
2822
2823 *ptr = '\0';
2824 gcc_assert (ret + len >= ptr);
2825
2826 return ret;
2827 }
2828
2829 /* Return true, if profiling code should be emitted before
2830 prologue. Otherwise it returns false.
2831 Note: For x86 with "hotfix" it is sorried. */
2832 static bool
2833 ix86_profile_before_prologue (void)
2834 {
2835 return flag_fentry != 0;
2836 }
2837
2838 /* Function that is callable from the debugger to print the current
2839 options. */
2840 void ATTRIBUTE_UNUSED
2841 ix86_debug_options (void)
2842 {
2843 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2844 ix86_arch_string, ix86_tune_string,
2845 ix86_fpmath, true);
2846
2847 if (opts)
2848 {
2849 fprintf (stderr, "%s\n\n", opts);
2850 free (opts);
2851 }
2852 else
2853 fputs ("<no options>\n\n", stderr);
2854
2855 return;
2856 }
2857
2858 static const char *stringop_alg_names[] = {
2859 #define DEF_ENUM
2860 #define DEF_ALG(alg, name) #name,
2861 #include "stringop.def"
2862 #undef DEF_ENUM
2863 #undef DEF_ALG
2864 };
2865
2866 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2867 The string is of the following form (or comma separated list of it):
2868
2869 strategy_alg:max_size:[align|noalign]
2870
2871 where the full size range for the strategy is either [0, max_size] or
2872 [min_size, max_size], in which min_size is the max_size + 1 of the
2873 preceding range. The last size range must have max_size == -1.
2874
2875 Examples:
2876
2877 1.
2878 -mmemcpy-strategy=libcall:-1:noalign
2879
2880 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2881
2882
2883 2.
2884 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2885
2886 This is to tell the compiler to use the following strategy for memset
2887 1) when the expected size is between [1, 16], use rep_8byte strategy;
2888 2) when the size is between [17, 2048], use vector_loop;
2889 3) when the size is > 2048, use libcall. */
2890
2891 struct stringop_size_range
2892 {
2893 int max;
2894 stringop_alg alg;
2895 bool noalign;
2896 };
2897
2898 static void
2899 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2900 {
2901 const struct stringop_algs *default_algs;
2902 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2903 char *curr_range_str, *next_range_str;
2904 int i = 0, n = 0;
2905
2906 if (is_memset)
2907 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2908 else
2909 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2910
2911 curr_range_str = strategy_str;
2912
2913 do
2914 {
2915 int maxs;
2916 char alg_name[128];
2917 char align[16];
2918 next_range_str = strchr (curr_range_str, ',');
2919 if (next_range_str)
2920 *next_range_str++ = '\0';
2921
2922 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2923 alg_name, &maxs, align))
2924 {
2925 error ("wrong arg %s to option %s", curr_range_str,
2926 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2927 return;
2928 }
2929
2930 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2931 {
2932 error ("size ranges of option %s should be increasing",
2933 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2934 return;
2935 }
2936
2937 for (i = 0; i < last_alg; i++)
2938 if (!strcmp (alg_name, stringop_alg_names[i]))
2939 break;
2940
2941 if (i == last_alg)
2942 {
2943 error ("wrong stringop strategy name %s specified for option %s",
2944 alg_name,
2945 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2946 return;
2947 }
2948
2949 input_ranges[n].max = maxs;
2950 input_ranges[n].alg = (stringop_alg) i;
2951 if (!strcmp (align, "align"))
2952 input_ranges[n].noalign = false;
2953 else if (!strcmp (align, "noalign"))
2954 input_ranges[n].noalign = true;
2955 else
2956 {
2957 error ("unknown alignment %s specified for option %s",
2958 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2959 return;
2960 }
2961 n++;
2962 curr_range_str = next_range_str;
2963 }
2964 while (curr_range_str);
2965
2966 if (input_ranges[n - 1].max != -1)
2967 {
2968 error ("the max value for the last size range should be -1"
2969 " for option %s",
2970 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2971 return;
2972 }
2973
2974 if (n > MAX_STRINGOP_ALGS)
2975 {
2976 error ("too many size ranges specified in option %s",
2977 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2978 return;
2979 }
2980
2981 /* Now override the default algs array. */
2982 for (i = 0; i < n; i++)
2983 {
2984 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
2985 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
2986 = input_ranges[i].alg;
2987 *const_cast<int *>(&default_algs->size[i].noalign)
2988 = input_ranges[i].noalign;
2989 }
2990 }
2991
2992 \f
2993 /* parse -mtune-ctrl= option. When DUMP is true,
2994 print the features that are explicitly set. */
2995
2996 static void
2997 parse_mtune_ctrl_str (bool dump)
2998 {
2999 if (!ix86_tune_ctrl_string)
3000 return;
3001
3002 char *next_feature_string = NULL;
3003 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3004 char *orig = curr_feature_string;
3005 int i;
3006 do
3007 {
3008 bool clear = false;
3009
3010 next_feature_string = strchr (curr_feature_string, ',');
3011 if (next_feature_string)
3012 *next_feature_string++ = '\0';
3013 if (*curr_feature_string == '^')
3014 {
3015 curr_feature_string++;
3016 clear = true;
3017 }
3018 for (i = 0; i < X86_TUNE_LAST; i++)
3019 {
3020 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3021 {
3022 ix86_tune_features[i] = !clear;
3023 if (dump)
3024 fprintf (stderr, "Explicitly %s feature %s\n",
3025 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3026 break;
3027 }
3028 }
3029 if (i == X86_TUNE_LAST)
3030 error ("Unknown parameter to option -mtune-ctrl: %s",
3031 clear ? curr_feature_string - 1 : curr_feature_string);
3032 curr_feature_string = next_feature_string;
3033 }
3034 while (curr_feature_string);
3035 free (orig);
3036 }
3037
3038 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3039 processor type. */
3040
3041 static void
3042 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3043 {
3044 unsigned int ix86_tune_mask = 1u << ix86_tune;
3045 int i;
3046
3047 for (i = 0; i < X86_TUNE_LAST; ++i)
3048 {
3049 if (ix86_tune_no_default)
3050 ix86_tune_features[i] = 0;
3051 else
3052 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3053 }
3054
3055 if (dump)
3056 {
3057 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3058 for (i = 0; i < X86_TUNE_LAST; i++)
3059 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3060 ix86_tune_features[i] ? "on" : "off");
3061 }
3062
3063 parse_mtune_ctrl_str (dump);
3064 }
3065
3066
3067 /* Override various settings based on options. If MAIN_ARGS_P, the
3068 options are from the command line, otherwise they are from
3069 attributes. */
3070
3071 static void
3072 ix86_option_override_internal (bool main_args_p,
3073 struct gcc_options *opts,
3074 struct gcc_options *opts_set)
3075 {
3076 int i;
3077 unsigned int ix86_arch_mask;
3078 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3079 const char *prefix;
3080 const char *suffix;
3081 const char *sw;
3082
3083 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3084 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3085 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3086 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3087 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3088 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3089 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3090 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3091 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3092 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3093 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3094 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3095 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3096 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3097 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3098 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3099 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3100 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3101 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3102 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3103 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3104 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3105 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3106 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3107 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3108 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3109 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3110 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3111 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3112 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3113 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3114 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3115 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3116 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3117 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3118 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3119 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3120 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3121 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3122 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3123 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3124 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3125 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3126 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3127 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3128 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3129 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3130 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3131 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3132 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
3133 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
3134 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
3135
3136 #define PTA_CORE2 \
3137 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3138 | PTA_CX16 | PTA_FXSR)
3139 #define PTA_NEHALEM \
3140 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3141 #define PTA_WESTMERE \
3142 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3143 #define PTA_SANDYBRIDGE \
3144 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3145 #define PTA_IVYBRIDGE \
3146 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3147 #define PTA_HASWELL \
3148 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3149 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3150 #define PTA_BROADWELL \
3151 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3152 #define PTA_BONNELL \
3153 (PTA_CORE2 | PTA_MOVBE)
3154 #define PTA_SILVERMONT \
3155 (PTA_WESTMERE | PTA_MOVBE)
3156
3157 /* if this reaches 64, need to widen struct pta flags below */
3158
3159 static struct pta
3160 {
3161 const char *const name; /* processor name or nickname. */
3162 const enum processor_type processor;
3163 const enum attr_cpu schedule;
3164 const unsigned HOST_WIDE_INT flags;
3165 }
3166 const processor_alias_table[] =
3167 {
3168 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3169 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3170 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3171 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3172 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3173 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3174 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3175 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3176 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3177 PTA_MMX | PTA_SSE | PTA_FXSR},
3178 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3179 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3180 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3181 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3182 PTA_MMX | PTA_SSE | PTA_FXSR},
3183 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3184 PTA_MMX | PTA_SSE | PTA_FXSR},
3185 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3186 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3187 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3188 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3189 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3190 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3191 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3192 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3193 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3194 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3195 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3196 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3197 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3198 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3199 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3200 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3201 PTA_SANDYBRIDGE},
3202 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3203 PTA_SANDYBRIDGE},
3204 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3205 PTA_IVYBRIDGE},
3206 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3207 PTA_IVYBRIDGE},
3208 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3209 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3210 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3211 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3212 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3213 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3214 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3215 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3216 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3217 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3218 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3219 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3220 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3221 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3222 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3223 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3224 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3225 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3226 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3227 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3228 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3229 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3230 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3231 {"x86-64", PROCESSOR_K8, CPU_K8,
3232 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3233 {"k8", PROCESSOR_K8, CPU_K8,
3234 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3235 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3236 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3237 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3238 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3239 {"opteron", PROCESSOR_K8, CPU_K8,
3240 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3241 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3242 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3243 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3244 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3245 {"athlon64", PROCESSOR_K8, CPU_K8,
3246 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3247 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3248 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3249 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3250 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3251 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3252 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3253 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3254 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3255 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3256 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3257 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3258 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3259 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3260 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3261 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3262 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3263 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3264 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3265 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3266 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3267 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3268 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3269 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3270 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3271 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3272 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3273 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3274 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3275 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3276 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3277 | PTA_XSAVEOPT | PTA_FSGSBASE},
3278 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3279 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3280 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3281 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3282 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3283 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3284 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3285 | PTA_MOVBE},
3286 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3287 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3288 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3289 | PTA_FXSR | PTA_XSAVE},
3290 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3291 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3292 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3293 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3294 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3295 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3296
3297 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3298 PTA_64BIT
3299 | PTA_HLE /* flags are only used for -march switch. */ },
3300 };
3301
3302 /* -mrecip options. */
3303 static struct
3304 {
3305 const char *string; /* option name */
3306 unsigned int mask; /* mask bits to set */
3307 }
3308 const recip_options[] =
3309 {
3310 { "all", RECIP_MASK_ALL },
3311 { "none", RECIP_MASK_NONE },
3312 { "div", RECIP_MASK_DIV },
3313 { "sqrt", RECIP_MASK_SQRT },
3314 { "vec-div", RECIP_MASK_VEC_DIV },
3315 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3316 };
3317
3318 int const pta_size = ARRAY_SIZE (processor_alias_table);
3319
3320 /* Set up prefix/suffix so the error messages refer to either the command
3321 line argument, or the attribute(target). */
3322 if (main_args_p)
3323 {
3324 prefix = "-m";
3325 suffix = "";
3326 sw = "switch";
3327 }
3328 else
3329 {
3330 prefix = "option(\"";
3331 suffix = "\")";
3332 sw = "attribute";
3333 }
3334
3335 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3336 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3337 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3338 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3339 #ifdef TARGET_BI_ARCH
3340 else
3341 {
3342 #if TARGET_BI_ARCH == 1
3343 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3344 is on and OPTION_MASK_ABI_X32 is off. We turn off
3345 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3346 -mx32. */
3347 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3348 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3349 #else
3350 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3351 on and OPTION_MASK_ABI_64 is off. We turn off
3352 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3353 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
3354 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
3355 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
3356 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3357 #endif
3358 }
3359 #endif
3360
3361 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3362 {
3363 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3364 OPTION_MASK_ABI_64 for TARGET_X32. */
3365 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3366 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3367 }
3368 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3369 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3370 | OPTION_MASK_ABI_X32
3371 | OPTION_MASK_ABI_64);
3372 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3373 {
3374 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3375 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3376 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3377 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3378 }
3379
3380 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3381 SUBTARGET_OVERRIDE_OPTIONS;
3382 #endif
3383
3384 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3385 SUBSUBTARGET_OVERRIDE_OPTIONS;
3386 #endif
3387
3388 /* -fPIC is the default for x86_64. */
3389 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3390 opts->x_flag_pic = 2;
3391
3392 /* Need to check -mtune=generic first. */
3393 if (opts->x_ix86_tune_string)
3394 {
3395 /* As special support for cross compilers we read -mtune=native
3396 as -mtune=generic. With native compilers we won't see the
3397 -mtune=native, as it was changed by the driver. */
3398 if (!strcmp (opts->x_ix86_tune_string, "native"))
3399 {
3400 opts->x_ix86_tune_string = "generic";
3401 }
3402 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3403 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3404 "%stune=k8%s or %stune=generic%s instead as appropriate",
3405 prefix, suffix, prefix, suffix, prefix, suffix);
3406 }
3407 else
3408 {
3409 if (opts->x_ix86_arch_string)
3410 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3411 if (!opts->x_ix86_tune_string)
3412 {
3413 opts->x_ix86_tune_string
3414 = processor_target_table[TARGET_CPU_DEFAULT].name;
3415 ix86_tune_defaulted = 1;
3416 }
3417
3418 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3419 or defaulted. We need to use a sensible tune option. */
3420 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3421 {
3422 opts->x_ix86_tune_string = "generic";
3423 }
3424 }
3425
3426 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3427 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3428 {
3429 /* rep; movq isn't available in 32-bit code. */
3430 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3431 opts->x_ix86_stringop_alg = no_stringop;
3432 }
3433
3434 if (!opts->x_ix86_arch_string)
3435 opts->x_ix86_arch_string
3436 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3437 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3438 else
3439 ix86_arch_specified = 1;
3440
3441 if (opts_set->x_ix86_pmode)
3442 {
3443 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3444 && opts->x_ix86_pmode == PMODE_SI)
3445 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3446 && opts->x_ix86_pmode == PMODE_DI))
3447 error ("address mode %qs not supported in the %s bit mode",
3448 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3449 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3450 }
3451 else
3452 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3453 ? PMODE_DI : PMODE_SI;
3454
3455 if (!opts_set->x_ix86_abi)
3456 opts->x_ix86_abi = DEFAULT_ABI;
3457
3458 /* For targets using ms ABI enable ms-extensions, if not
3459 explicit turned off. For non-ms ABI we turn off this
3460 option. */
3461 if (!opts_set->x_flag_ms_extensions)
3462 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3463
3464 if (opts_set->x_ix86_cmodel)
3465 {
3466 switch (opts->x_ix86_cmodel)
3467 {
3468 case CM_SMALL:
3469 case CM_SMALL_PIC:
3470 if (opts->x_flag_pic)
3471 opts->x_ix86_cmodel = CM_SMALL_PIC;
3472 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3473 error ("code model %qs not supported in the %s bit mode",
3474 "small", "32");
3475 break;
3476
3477 case CM_MEDIUM:
3478 case CM_MEDIUM_PIC:
3479 if (opts->x_flag_pic)
3480 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3481 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3482 error ("code model %qs not supported in the %s bit mode",
3483 "medium", "32");
3484 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3485 error ("code model %qs not supported in x32 mode",
3486 "medium");
3487 break;
3488
3489 case CM_LARGE:
3490 case CM_LARGE_PIC:
3491 if (opts->x_flag_pic)
3492 opts->x_ix86_cmodel = CM_LARGE_PIC;
3493 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3494 error ("code model %qs not supported in the %s bit mode",
3495 "large", "32");
3496 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3497 error ("code model %qs not supported in x32 mode",
3498 "large");
3499 break;
3500
3501 case CM_32:
3502 if (opts->x_flag_pic)
3503 error ("code model %s does not support PIC mode", "32");
3504 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3505 error ("code model %qs not supported in the %s bit mode",
3506 "32", "64");
3507 break;
3508
3509 case CM_KERNEL:
3510 if (opts->x_flag_pic)
3511 {
3512 error ("code model %s does not support PIC mode", "kernel");
3513 opts->x_ix86_cmodel = CM_32;
3514 }
3515 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3516 error ("code model %qs not supported in the %s bit mode",
3517 "kernel", "32");
3518 break;
3519
3520 default:
3521 gcc_unreachable ();
3522 }
3523 }
3524 else
3525 {
3526 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3527 use of rip-relative addressing. This eliminates fixups that
3528 would otherwise be needed if this object is to be placed in a
3529 DLL, and is essentially just as efficient as direct addressing. */
3530 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3531 && (TARGET_RDOS || TARGET_PECOFF))
3532 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3533 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3534 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3535 else
3536 opts->x_ix86_cmodel = CM_32;
3537 }
3538 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3539 {
3540 error ("-masm=intel not supported in this configuration");
3541 opts->x_ix86_asm_dialect = ASM_ATT;
3542 }
3543 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3544 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3545 sorry ("%i-bit mode not compiled in",
3546 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3547
3548 for (i = 0; i < pta_size; i++)
3549 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3550 {
3551 ix86_schedule = processor_alias_table[i].schedule;
3552 ix86_arch = processor_alias_table[i].processor;
3553 /* Default cpu tuning to the architecture. */
3554 ix86_tune = ix86_arch;
3555
3556 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3557 && !(processor_alias_table[i].flags & PTA_64BIT))
3558 error ("CPU you selected does not support x86-64 "
3559 "instruction set");
3560
3561 if (processor_alias_table[i].flags & PTA_MMX
3562 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3563 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3564 if (processor_alias_table[i].flags & PTA_3DNOW
3565 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3566 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3567 if (processor_alias_table[i].flags & PTA_3DNOW_A
3568 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3569 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3570 if (processor_alias_table[i].flags & PTA_SSE
3571 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3572 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3573 if (processor_alias_table[i].flags & PTA_SSE2
3574 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3575 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3576 if (processor_alias_table[i].flags & PTA_SSE3
3577 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3578 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3579 if (processor_alias_table[i].flags & PTA_SSSE3
3580 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3581 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3582 if (processor_alias_table[i].flags & PTA_SSE4_1
3583 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3584 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3585 if (processor_alias_table[i].flags & PTA_SSE4_2
3586 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3587 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3588 if (processor_alias_table[i].flags & PTA_AVX
3589 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3590 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3591 if (processor_alias_table[i].flags & PTA_AVX2
3592 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3593 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3594 if (processor_alias_table[i].flags & PTA_FMA
3595 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3596 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3597 if (processor_alias_table[i].flags & PTA_SSE4A
3598 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3599 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3600 if (processor_alias_table[i].flags & PTA_FMA4
3601 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3602 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3603 if (processor_alias_table[i].flags & PTA_XOP
3604 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3605 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3606 if (processor_alias_table[i].flags & PTA_LWP
3607 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3608 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3609 if (processor_alias_table[i].flags & PTA_ABM
3610 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3611 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3612 if (processor_alias_table[i].flags & PTA_BMI
3613 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3614 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3615 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3616 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3617 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3618 if (processor_alias_table[i].flags & PTA_TBM
3619 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3620 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3621 if (processor_alias_table[i].flags & PTA_BMI2
3622 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3623 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3624 if (processor_alias_table[i].flags & PTA_CX16
3625 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3626 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3627 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3628 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3629 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3630 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3631 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3632 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3633 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3634 if (processor_alias_table[i].flags & PTA_MOVBE
3635 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3636 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3637 if (processor_alias_table[i].flags & PTA_AES
3638 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3639 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3640 if (processor_alias_table[i].flags & PTA_SHA
3641 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3642 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3643 if (processor_alias_table[i].flags & PTA_PCLMUL
3644 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3645 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3646 if (processor_alias_table[i].flags & PTA_FSGSBASE
3647 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3648 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3649 if (processor_alias_table[i].flags & PTA_RDRND
3650 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3651 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3652 if (processor_alias_table[i].flags & PTA_F16C
3653 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3654 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3655 if (processor_alias_table[i].flags & PTA_RTM
3656 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3657 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3658 if (processor_alias_table[i].flags & PTA_HLE
3659 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3660 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3661 if (processor_alias_table[i].flags & PTA_PRFCHW
3662 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3663 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3664 if (processor_alias_table[i].flags & PTA_RDSEED
3665 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3666 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3667 if (processor_alias_table[i].flags & PTA_ADX
3668 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3669 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3670 if (processor_alias_table[i].flags & PTA_FXSR
3671 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3672 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3673 if (processor_alias_table[i].flags & PTA_XSAVE
3674 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3675 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3676 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3677 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3678 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3679 if (processor_alias_table[i].flags & PTA_AVX512F
3680 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3681 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3682 if (processor_alias_table[i].flags & PTA_AVX512ER
3683 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3684 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3685 if (processor_alias_table[i].flags & PTA_AVX512PF
3686 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3687 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3688 if (processor_alias_table[i].flags & PTA_AVX512CD
3689 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3690 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3691 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3692 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3693 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3694 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3695 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3696 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3697 if (processor_alias_table[i].flags & PTA_XSAVEC
3698 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3699 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3700 if (processor_alias_table[i].flags & PTA_XSAVES
3701 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3702 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3703 if (processor_alias_table[i].flags & PTA_AVX512DQ
3704 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
3705 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
3706 if (processor_alias_table[i].flags & PTA_AVX512BW
3707 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
3708 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
3709 if (processor_alias_table[i].flags & PTA_AVX512VL
3710 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
3711 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
3712 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3713 x86_prefetch_sse = true;
3714
3715 break;
3716 }
3717
3718 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3719 error ("generic CPU can be used only for %stune=%s %s",
3720 prefix, suffix, sw);
3721 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3722 error ("intel CPU can be used only for %stune=%s %s",
3723 prefix, suffix, sw);
3724 else if (i == pta_size)
3725 error ("bad value (%s) for %sarch=%s %s",
3726 opts->x_ix86_arch_string, prefix, suffix, sw);
3727
3728 ix86_arch_mask = 1u << ix86_arch;
3729 for (i = 0; i < X86_ARCH_LAST; ++i)
3730 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3731
3732 for (i = 0; i < pta_size; i++)
3733 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3734 {
3735 ix86_schedule = processor_alias_table[i].schedule;
3736 ix86_tune = processor_alias_table[i].processor;
3737 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3738 {
3739 if (!(processor_alias_table[i].flags & PTA_64BIT))
3740 {
3741 if (ix86_tune_defaulted)
3742 {
3743 opts->x_ix86_tune_string = "x86-64";
3744 for (i = 0; i < pta_size; i++)
3745 if (! strcmp (opts->x_ix86_tune_string,
3746 processor_alias_table[i].name))
3747 break;
3748 ix86_schedule = processor_alias_table[i].schedule;
3749 ix86_tune = processor_alias_table[i].processor;
3750 }
3751 else
3752 error ("CPU you selected does not support x86-64 "
3753 "instruction set");
3754 }
3755 }
3756 /* Intel CPUs have always interpreted SSE prefetch instructions as
3757 NOPs; so, we can enable SSE prefetch instructions even when
3758 -mtune (rather than -march) points us to a processor that has them.
3759 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3760 higher processors. */
3761 if (TARGET_CMOV
3762 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3763 x86_prefetch_sse = true;
3764 break;
3765 }
3766
3767 if (ix86_tune_specified && i == pta_size)
3768 error ("bad value (%s) for %stune=%s %s",
3769 opts->x_ix86_tune_string, prefix, suffix, sw);
3770
3771 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3772
3773 #ifndef USE_IX86_FRAME_POINTER
3774 #define USE_IX86_FRAME_POINTER 0
3775 #endif
3776
3777 #ifndef USE_X86_64_FRAME_POINTER
3778 #define USE_X86_64_FRAME_POINTER 0
3779 #endif
3780
3781 /* Set the default values for switches whose default depends on TARGET_64BIT
3782 in case they weren't overwritten by command line options. */
3783 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3784 {
3785 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3786 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3787 if (opts->x_flag_asynchronous_unwind_tables
3788 && !opts_set->x_flag_unwind_tables
3789 && TARGET_64BIT_MS_ABI)
3790 opts->x_flag_unwind_tables = 1;
3791 if (opts->x_flag_asynchronous_unwind_tables == 2)
3792 opts->x_flag_unwind_tables
3793 = opts->x_flag_asynchronous_unwind_tables = 1;
3794 if (opts->x_flag_pcc_struct_return == 2)
3795 opts->x_flag_pcc_struct_return = 0;
3796 }
3797 else
3798 {
3799 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3800 opts->x_flag_omit_frame_pointer
3801 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3802 if (opts->x_flag_asynchronous_unwind_tables == 2)
3803 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3804 if (opts->x_flag_pcc_struct_return == 2)
3805 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3806 }
3807
3808 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3809 if (opts->x_optimize_size)
3810 ix86_cost = &ix86_size_cost;
3811 else
3812 ix86_cost = ix86_tune_cost;
3813
3814 /* Arrange to set up i386_stack_locals for all functions. */
3815 init_machine_status = ix86_init_machine_status;
3816
3817 /* Validate -mregparm= value. */
3818 if (opts_set->x_ix86_regparm)
3819 {
3820 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3821 warning (0, "-mregparm is ignored in 64-bit mode");
3822 if (opts->x_ix86_regparm > REGPARM_MAX)
3823 {
3824 error ("-mregparm=%d is not between 0 and %d",
3825 opts->x_ix86_regparm, REGPARM_MAX);
3826 opts->x_ix86_regparm = 0;
3827 }
3828 }
3829 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3830 opts->x_ix86_regparm = REGPARM_MAX;
3831
3832 /* Default align_* from the processor table. */
3833 if (opts->x_align_loops == 0)
3834 {
3835 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3836 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3837 }
3838 if (opts->x_align_jumps == 0)
3839 {
3840 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3841 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3842 }
3843 if (opts->x_align_functions == 0)
3844 {
3845 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3846 }
3847
3848 /* Provide default for -mbranch-cost= value. */
3849 if (!opts_set->x_ix86_branch_cost)
3850 opts->x_ix86_branch_cost = ix86_cost->branch_cost;
3851
3852 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3853 {
3854 opts->x_target_flags
3855 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3856
3857 /* Enable by default the SSE and MMX builtins. Do allow the user to
3858 explicitly disable any of these. In particular, disabling SSE and
3859 MMX for kernel code is extremely useful. */
3860 if (!ix86_arch_specified)
3861 opts->x_ix86_isa_flags
3862 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3863 | TARGET_SUBTARGET64_ISA_DEFAULT)
3864 & ~opts->x_ix86_isa_flags_explicit);
3865
3866 if (TARGET_RTD_P (opts->x_target_flags))
3867 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3868 }
3869 else
3870 {
3871 opts->x_target_flags
3872 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3873
3874 if (!ix86_arch_specified)
3875 opts->x_ix86_isa_flags
3876 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3877
3878 /* i386 ABI does not specify red zone. It still makes sense to use it
3879 when programmer takes care to stack from being destroyed. */
3880 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3881 opts->x_target_flags |= MASK_NO_RED_ZONE;
3882 }
3883
3884 /* Keep nonleaf frame pointers. */
3885 if (opts->x_flag_omit_frame_pointer)
3886 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3887 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3888 opts->x_flag_omit_frame_pointer = 1;
3889
3890 /* If we're doing fast math, we don't care about comparison order
3891 wrt NaNs. This lets us use a shorter comparison sequence. */
3892 if (opts->x_flag_finite_math_only)
3893 opts->x_target_flags &= ~MASK_IEEE_FP;
3894
3895 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3896 since the insns won't need emulation. */
3897 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3898 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3899
3900 /* Likewise, if the target doesn't have a 387, or we've specified
3901 software floating point, don't use 387 inline intrinsics. */
3902 if (!TARGET_80387_P (opts->x_target_flags))
3903 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3904
3905 /* Turn on MMX builtins for -msse. */
3906 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3907 opts->x_ix86_isa_flags
3908 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
3909
3910 /* Enable SSE prefetch. */
3911 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
3912 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
3913 x86_prefetch_sse = true;
3914
3915 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
3916 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
3917 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
3918 opts->x_ix86_isa_flags
3919 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
3920
3921 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3922 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
3923 || TARGET_ABM_P (opts->x_ix86_isa_flags))
3924 opts->x_ix86_isa_flags
3925 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
3926
3927 /* Enable lzcnt instruction for -mabm. */
3928 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
3929 opts->x_ix86_isa_flags
3930 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
3931
3932 /* Validate -mpreferred-stack-boundary= value or default it to
3933 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3934 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3935 if (opts_set->x_ix86_preferred_stack_boundary_arg)
3936 {
3937 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3938 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
3939 int max = (TARGET_SEH ? 4 : 12);
3940
3941 if (opts->x_ix86_preferred_stack_boundary_arg < min
3942 || opts->x_ix86_preferred_stack_boundary_arg > max)
3943 {
3944 if (min == max)
3945 error ("-mpreferred-stack-boundary is not supported "
3946 "for this target");
3947 else
3948 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3949 opts->x_ix86_preferred_stack_boundary_arg, min, max);
3950 }
3951 else
3952 ix86_preferred_stack_boundary
3953 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3954 }
3955
3956 /* Set the default value for -mstackrealign. */
3957 if (opts->x_ix86_force_align_arg_pointer == -1)
3958 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3959
3960 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3961
3962 /* Validate -mincoming-stack-boundary= value or default it to
3963 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3964 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3965 if (opts_set->x_ix86_incoming_stack_boundary_arg)
3966 {
3967 if (opts->x_ix86_incoming_stack_boundary_arg
3968 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
3969 || opts->x_ix86_incoming_stack_boundary_arg > 12)
3970 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3971 opts->x_ix86_incoming_stack_boundary_arg,
3972 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
3973 else
3974 {
3975 ix86_user_incoming_stack_boundary
3976 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3977 ix86_incoming_stack_boundary
3978 = ix86_user_incoming_stack_boundary;
3979 }
3980 }
3981
3982 #ifndef NO_PROFILE_COUNTERS
3983 if (flag_nop_mcount)
3984 error ("-mnop-mcount is not compatible with this target");
3985 #endif
3986 if (flag_nop_mcount && flag_pic)
3987 error ("-mnop-mcount is not implemented for -fPIC");
3988
3989 /* Accept -msseregparm only if at least SSE support is enabled. */
3990 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
3991 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
3992 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3993
3994 if (opts_set->x_ix86_fpmath)
3995 {
3996 if (opts->x_ix86_fpmath & FPMATH_SSE)
3997 {
3998 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
3999 {
4000 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4001 opts->x_ix86_fpmath = FPMATH_387;
4002 }
4003 else if ((opts->x_ix86_fpmath & FPMATH_387)
4004 && !TARGET_80387_P (opts->x_target_flags))
4005 {
4006 warning (0, "387 instruction set disabled, using SSE arithmetics");
4007 opts->x_ix86_fpmath = FPMATH_SSE;
4008 }
4009 }
4010 }
4011 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4012 fpmath=387. The second is however default at many targets since the
4013 extra 80bit precision of temporaries is considered to be part of ABI.
4014 Overwrite the default at least for -ffast-math.
4015 TODO: -mfpmath=both seems to produce same performing code with bit
4016 smaller binaries. It is however not clear if register allocation is
4017 ready for this setting.
4018 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4019 codegen. We may switch to 387 with -ffast-math for size optimized
4020 functions. */
4021 else if (fast_math_flags_set_p (&global_options)
4022 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4023 opts->x_ix86_fpmath = FPMATH_SSE;
4024 else
4025 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4026
4027 /* If the i387 is disabled, then do not return values in it. */
4028 if (!TARGET_80387_P (opts->x_target_flags))
4029 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4030
4031 /* Use external vectorized library in vectorizing intrinsics. */
4032 if (opts_set->x_ix86_veclibabi_type)
4033 switch (opts->x_ix86_veclibabi_type)
4034 {
4035 case ix86_veclibabi_type_svml:
4036 ix86_veclib_handler = ix86_veclibabi_svml;
4037 break;
4038
4039 case ix86_veclibabi_type_acml:
4040 ix86_veclib_handler = ix86_veclibabi_acml;
4041 break;
4042
4043 default:
4044 gcc_unreachable ();
4045 }
4046
4047 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4048 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4049 && !opts->x_optimize_size)
4050 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4051
4052 /* If stack probes are required, the space used for large function
4053 arguments on the stack must also be probed, so enable
4054 -maccumulate-outgoing-args so this happens in the prologue. */
4055 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4056 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4057 {
4058 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4059 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4060 "for correctness", prefix, suffix);
4061 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4062 }
4063
4064 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4065 {
4066 char *p;
4067 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4068 p = strchr (internal_label_prefix, 'X');
4069 internal_label_prefix_len = p - internal_label_prefix;
4070 *p = '\0';
4071 }
4072
4073 /* When scheduling description is not available, disable scheduler pass
4074 so it won't slow down the compilation and make x87 code slower. */
4075 if (!TARGET_SCHEDULE)
4076 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4077
4078 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4079 ix86_tune_cost->simultaneous_prefetches,
4080 opts->x_param_values,
4081 opts_set->x_param_values);
4082 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4083 ix86_tune_cost->prefetch_block,
4084 opts->x_param_values,
4085 opts_set->x_param_values);
4086 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4087 ix86_tune_cost->l1_cache_size,
4088 opts->x_param_values,
4089 opts_set->x_param_values);
4090 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4091 ix86_tune_cost->l2_cache_size,
4092 opts->x_param_values,
4093 opts_set->x_param_values);
4094
4095 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4096 if (opts->x_flag_prefetch_loop_arrays < 0
4097 && HAVE_prefetch
4098 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4099 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4100 opts->x_flag_prefetch_loop_arrays = 1;
4101
4102 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4103 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4104 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4105 targetm.expand_builtin_va_start = NULL;
4106
4107 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4108 {
4109 ix86_gen_leave = gen_leave_rex64;
4110 if (Pmode == DImode)
4111 {
4112 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4113 ix86_gen_tls_local_dynamic_base_64
4114 = gen_tls_local_dynamic_base_64_di;
4115 }
4116 else
4117 {
4118 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4119 ix86_gen_tls_local_dynamic_base_64
4120 = gen_tls_local_dynamic_base_64_si;
4121 }
4122 }
4123 else
4124 ix86_gen_leave = gen_leave;
4125
4126 if (Pmode == DImode)
4127 {
4128 ix86_gen_add3 = gen_adddi3;
4129 ix86_gen_sub3 = gen_subdi3;
4130 ix86_gen_sub3_carry = gen_subdi3_carry;
4131 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4132 ix86_gen_andsp = gen_anddi3;
4133 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4134 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4135 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4136 ix86_gen_monitor = gen_sse3_monitor_di;
4137 }
4138 else
4139 {
4140 ix86_gen_add3 = gen_addsi3;
4141 ix86_gen_sub3 = gen_subsi3;
4142 ix86_gen_sub3_carry = gen_subsi3_carry;
4143 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4144 ix86_gen_andsp = gen_andsi3;
4145 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4146 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4147 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4148 ix86_gen_monitor = gen_sse3_monitor_si;
4149 }
4150
4151 #ifdef USE_IX86_CLD
4152 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4153 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4154 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4155 #endif
4156
4157 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4158 {
4159 if (opts->x_flag_fentry > 0)
4160 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4161 "with -fpic");
4162 opts->x_flag_fentry = 0;
4163 }
4164 else if (TARGET_SEH)
4165 {
4166 if (opts->x_flag_fentry == 0)
4167 sorry ("-mno-fentry isn%'t compatible with SEH");
4168 opts->x_flag_fentry = 1;
4169 }
4170 else if (opts->x_flag_fentry < 0)
4171 {
4172 #if defined(PROFILE_BEFORE_PROLOGUE)
4173 opts->x_flag_fentry = 1;
4174 #else
4175 opts->x_flag_fentry = 0;
4176 #endif
4177 }
4178
4179 /* When not opts->x_optimize for size, enable vzeroupper optimization for
4180 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4181 AVX unaligned load/store. */
4182 if (!opts->x_optimize_size)
4183 {
4184 if (flag_expensive_optimizations
4185 && !(opts_set->x_target_flags & MASK_VZEROUPPER))
4186 opts->x_target_flags |= MASK_VZEROUPPER;
4187 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4188 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4189 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4190 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4191 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4192 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4193 /* Enable 128-bit AVX instruction generation
4194 for the auto-vectorizer. */
4195 if (TARGET_AVX128_OPTIMAL
4196 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4197 opts->x_target_flags |= MASK_PREFER_AVX128;
4198 }
4199
4200 if (opts->x_ix86_recip_name)
4201 {
4202 char *p = ASTRDUP (opts->x_ix86_recip_name);
4203 char *q;
4204 unsigned int mask, i;
4205 bool invert;
4206
4207 while ((q = strtok (p, ",")) != NULL)
4208 {
4209 p = NULL;
4210 if (*q == '!')
4211 {
4212 invert = true;
4213 q++;
4214 }
4215 else
4216 invert = false;
4217
4218 if (!strcmp (q, "default"))
4219 mask = RECIP_MASK_ALL;
4220 else
4221 {
4222 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4223 if (!strcmp (q, recip_options[i].string))
4224 {
4225 mask = recip_options[i].mask;
4226 break;
4227 }
4228
4229 if (i == ARRAY_SIZE (recip_options))
4230 {
4231 error ("unknown option for -mrecip=%s", q);
4232 invert = false;
4233 mask = RECIP_MASK_NONE;
4234 }
4235 }
4236
4237 opts->x_recip_mask_explicit |= mask;
4238 if (invert)
4239 opts->x_recip_mask &= ~mask;
4240 else
4241 opts->x_recip_mask |= mask;
4242 }
4243 }
4244
4245 if (TARGET_RECIP_P (opts->x_target_flags))
4246 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4247 else if (opts_set->x_target_flags & MASK_RECIP)
4248 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4249
4250 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4251 for 64-bit Bionic. */
4252 if (TARGET_HAS_BIONIC
4253 && !(opts_set->x_target_flags
4254 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4255 opts->x_target_flags |= (TARGET_64BIT
4256 ? MASK_LONG_DOUBLE_128
4257 : MASK_LONG_DOUBLE_64);
4258
4259 /* Only one of them can be active. */
4260 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4261 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4262
4263 /* Save the initial options in case the user does function specific
4264 options. */
4265 if (main_args_p)
4266 target_option_default_node = target_option_current_node
4267 = build_target_option_node (opts);
4268
4269 /* Handle stack protector */
4270 if (!opts_set->x_ix86_stack_protector_guard)
4271 opts->x_ix86_stack_protector_guard
4272 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4273
4274 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4275 if (opts->x_ix86_tune_memcpy_strategy)
4276 {
4277 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4278 ix86_parse_stringop_strategy_string (str, false);
4279 free (str);
4280 }
4281
4282 if (opts->x_ix86_tune_memset_strategy)
4283 {
4284 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4285 ix86_parse_stringop_strategy_string (str, true);
4286 free (str);
4287 }
4288 }
4289
4290 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4291
4292 static void
4293 ix86_option_override (void)
4294 {
4295 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4296 static struct register_pass_info insert_vzeroupper_info
4297 = { pass_insert_vzeroupper, "reload",
4298 1, PASS_POS_INSERT_AFTER
4299 };
4300
4301 ix86_option_override_internal (true, &global_options, &global_options_set);
4302
4303
4304 /* This needs to be done at start up. It's convenient to do it here. */
4305 register_pass (&insert_vzeroupper_info);
4306 }
4307
4308 /* Update register usage after having seen the compiler flags. */
4309
4310 static void
4311 ix86_conditional_register_usage (void)
4312 {
4313 int i, c_mask;
4314 unsigned int j;
4315
4316 /* The PIC register, if it exists, is fixed. */
4317 j = PIC_OFFSET_TABLE_REGNUM;
4318 if (j != INVALID_REGNUM)
4319 fixed_regs[j] = call_used_regs[j] = 1;
4320
4321 /* For 32-bit targets, squash the REX registers. */
4322 if (! TARGET_64BIT)
4323 {
4324 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4325 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4326 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4327 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4328 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4329 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4330 }
4331
4332 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4333 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4334 : TARGET_64BIT ? (1 << 2)
4335 : (1 << 1));
4336
4337 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4338
4339 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4340 {
4341 /* Set/reset conditionally defined registers from
4342 CALL_USED_REGISTERS initializer. */
4343 if (call_used_regs[i] > 1)
4344 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4345
4346 /* Calculate registers of CLOBBERED_REGS register set
4347 as call used registers from GENERAL_REGS register set. */
4348 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4349 && call_used_regs[i])
4350 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4351 }
4352
4353 /* If MMX is disabled, squash the registers. */
4354 if (! TARGET_MMX)
4355 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4356 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4357 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4358
4359 /* If SSE is disabled, squash the registers. */
4360 if (! TARGET_SSE)
4361 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4362 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4363 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4364
4365 /* If the FPU is disabled, squash the registers. */
4366 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4367 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4368 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4369 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4370
4371 /* If AVX512F is disabled, squash the registers. */
4372 if (! TARGET_AVX512F)
4373 {
4374 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4375 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4376
4377 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4378 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4379 }
4380 }
4381
4382 \f
4383 /* Save the current options */
4384
4385 static void
4386 ix86_function_specific_save (struct cl_target_option *ptr,
4387 struct gcc_options *opts)
4388 {
4389 ptr->arch = ix86_arch;
4390 ptr->schedule = ix86_schedule;
4391 ptr->tune = ix86_tune;
4392 ptr->branch_cost = ix86_branch_cost;
4393 ptr->tune_defaulted = ix86_tune_defaulted;
4394 ptr->arch_specified = ix86_arch_specified;
4395 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4396 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4397 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4398 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4399 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4400 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4401 ptr->x_ix86_abi = opts->x_ix86_abi;
4402 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4403 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4404 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4405 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4406 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4407 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4408 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4409 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4410 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4411 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4412 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4413 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4414 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4415 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4416 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4417 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4418 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4419 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4420 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4421 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4422
4423 /* The fields are char but the variables are not; make sure the
4424 values fit in the fields. */
4425 gcc_assert (ptr->arch == ix86_arch);
4426 gcc_assert (ptr->schedule == ix86_schedule);
4427 gcc_assert (ptr->tune == ix86_tune);
4428 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4429 }
4430
4431 /* Restore the current options */
4432
4433 static void
4434 ix86_function_specific_restore (struct gcc_options *opts,
4435 struct cl_target_option *ptr)
4436 {
4437 enum processor_type old_tune = ix86_tune;
4438 enum processor_type old_arch = ix86_arch;
4439 unsigned int ix86_arch_mask;
4440 int i;
4441
4442 /* We don't change -fPIC. */
4443 opts->x_flag_pic = flag_pic;
4444
4445 ix86_arch = (enum processor_type) ptr->arch;
4446 ix86_schedule = (enum attr_cpu) ptr->schedule;
4447 ix86_tune = (enum processor_type) ptr->tune;
4448 opts->x_ix86_branch_cost = ptr->branch_cost;
4449 ix86_tune_defaulted = ptr->tune_defaulted;
4450 ix86_arch_specified = ptr->arch_specified;
4451 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4452 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4453 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4454 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4455 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4456 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4457 opts->x_ix86_abi = ptr->x_ix86_abi;
4458 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4459 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4460 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4461 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4462 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4463 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4464 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4465 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4466 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4467 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4468 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4469 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4470 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4471 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4472 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4473 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4474 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4475 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4476 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4477 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4478
4479 /* Recreate the arch feature tests if the arch changed */
4480 if (old_arch != ix86_arch)
4481 {
4482 ix86_arch_mask = 1u << ix86_arch;
4483 for (i = 0; i < X86_ARCH_LAST; ++i)
4484 ix86_arch_features[i]
4485 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4486 }
4487
4488 /* Recreate the tune optimization tests */
4489 if (old_tune != ix86_tune)
4490 set_ix86_tune_features (ix86_tune, false);
4491 }
4492
4493 /* Print the current options */
4494
4495 static void
4496 ix86_function_specific_print (FILE *file, int indent,
4497 struct cl_target_option *ptr)
4498 {
4499 char *target_string
4500 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4501 NULL, NULL, ptr->x_ix86_fpmath, false);
4502
4503 gcc_assert (ptr->arch < PROCESSOR_max);
4504 fprintf (file, "%*sarch = %d (%s)\n",
4505 indent, "",
4506 ptr->arch, processor_target_table[ptr->arch].name);
4507
4508 gcc_assert (ptr->tune < PROCESSOR_max);
4509 fprintf (file, "%*stune = %d (%s)\n",
4510 indent, "",
4511 ptr->tune, processor_target_table[ptr->tune].name);
4512
4513 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4514
4515 if (target_string)
4516 {
4517 fprintf (file, "%*s%s\n", indent, "", target_string);
4518 free (target_string);
4519 }
4520 }
4521
4522 \f
4523 /* Inner function to process the attribute((target(...))), take an argument and
4524 set the current options from the argument. If we have a list, recursively go
4525 over the list. */
4526
4527 static bool
4528 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4529 struct gcc_options *opts,
4530 struct gcc_options *opts_set,
4531 struct gcc_options *enum_opts_set)
4532 {
4533 char *next_optstr;
4534 bool ret = true;
4535
4536 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4537 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4538 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4539 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4540 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4541
4542 enum ix86_opt_type
4543 {
4544 ix86_opt_unknown,
4545 ix86_opt_yes,
4546 ix86_opt_no,
4547 ix86_opt_str,
4548 ix86_opt_enum,
4549 ix86_opt_isa
4550 };
4551
4552 static const struct
4553 {
4554 const char *string;
4555 size_t len;
4556 enum ix86_opt_type type;
4557 int opt;
4558 int mask;
4559 } attrs[] = {
4560 /* isa options */
4561 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4562 IX86_ATTR_ISA ("abm", OPT_mabm),
4563 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4564 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4565 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4566 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4567 IX86_ATTR_ISA ("aes", OPT_maes),
4568 IX86_ATTR_ISA ("sha", OPT_msha),
4569 IX86_ATTR_ISA ("avx", OPT_mavx),
4570 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4571 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4572 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4573 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4574 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4575 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
4576 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
4577 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
4578 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4579 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4580 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4581 IX86_ATTR_ISA ("sse", OPT_msse),
4582 IX86_ATTR_ISA ("sse2", OPT_msse2),
4583 IX86_ATTR_ISA ("sse3", OPT_msse3),
4584 IX86_ATTR_ISA ("sse4", OPT_msse4),
4585 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4586 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4587 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4588 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4589 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4590 IX86_ATTR_ISA ("fma", OPT_mfma),
4591 IX86_ATTR_ISA ("xop", OPT_mxop),
4592 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4593 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4594 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4595 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4596 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4597 IX86_ATTR_ISA ("hle", OPT_mhle),
4598 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4599 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4600 IX86_ATTR_ISA ("adx", OPT_madx),
4601 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4602 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4603 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4604 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4605 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
4606 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
4607 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
4608
4609 /* enum options */
4610 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4611
4612 /* string options */
4613 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4614 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4615
4616 /* flag options */
4617 IX86_ATTR_YES ("cld",
4618 OPT_mcld,
4619 MASK_CLD),
4620
4621 IX86_ATTR_NO ("fancy-math-387",
4622 OPT_mfancy_math_387,
4623 MASK_NO_FANCY_MATH_387),
4624
4625 IX86_ATTR_YES ("ieee-fp",
4626 OPT_mieee_fp,
4627 MASK_IEEE_FP),
4628
4629 IX86_ATTR_YES ("inline-all-stringops",
4630 OPT_minline_all_stringops,
4631 MASK_INLINE_ALL_STRINGOPS),
4632
4633 IX86_ATTR_YES ("inline-stringops-dynamically",
4634 OPT_minline_stringops_dynamically,
4635 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4636
4637 IX86_ATTR_NO ("align-stringops",
4638 OPT_mno_align_stringops,
4639 MASK_NO_ALIGN_STRINGOPS),
4640
4641 IX86_ATTR_YES ("recip",
4642 OPT_mrecip,
4643 MASK_RECIP),
4644
4645 };
4646
4647 /* If this is a list, recurse to get the options. */
4648 if (TREE_CODE (args) == TREE_LIST)
4649 {
4650 bool ret = true;
4651
4652 for (; args; args = TREE_CHAIN (args))
4653 if (TREE_VALUE (args)
4654 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4655 p_strings, opts, opts_set,
4656 enum_opts_set))
4657 ret = false;
4658
4659 return ret;
4660 }
4661
4662 else if (TREE_CODE (args) != STRING_CST)
4663 {
4664 error ("attribute %<target%> argument not a string");
4665 return false;
4666 }
4667
4668 /* Handle multiple arguments separated by commas. */
4669 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4670
4671 while (next_optstr && *next_optstr != '\0')
4672 {
4673 char *p = next_optstr;
4674 char *orig_p = p;
4675 char *comma = strchr (next_optstr, ',');
4676 const char *opt_string;
4677 size_t len, opt_len;
4678 int opt;
4679 bool opt_set_p;
4680 char ch;
4681 unsigned i;
4682 enum ix86_opt_type type = ix86_opt_unknown;
4683 int mask = 0;
4684
4685 if (comma)
4686 {
4687 *comma = '\0';
4688 len = comma - next_optstr;
4689 next_optstr = comma + 1;
4690 }
4691 else
4692 {
4693 len = strlen (p);
4694 next_optstr = NULL;
4695 }
4696
4697 /* Recognize no-xxx. */
4698 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4699 {
4700 opt_set_p = false;
4701 p += 3;
4702 len -= 3;
4703 }
4704 else
4705 opt_set_p = true;
4706
4707 /* Find the option. */
4708 ch = *p;
4709 opt = N_OPTS;
4710 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4711 {
4712 type = attrs[i].type;
4713 opt_len = attrs[i].len;
4714 if (ch == attrs[i].string[0]
4715 && ((type != ix86_opt_str && type != ix86_opt_enum)
4716 ? len == opt_len
4717 : len > opt_len)
4718 && memcmp (p, attrs[i].string, opt_len) == 0)
4719 {
4720 opt = attrs[i].opt;
4721 mask = attrs[i].mask;
4722 opt_string = attrs[i].string;
4723 break;
4724 }
4725 }
4726
4727 /* Process the option. */
4728 if (opt == N_OPTS)
4729 {
4730 error ("attribute(target(\"%s\")) is unknown", orig_p);
4731 ret = false;
4732 }
4733
4734 else if (type == ix86_opt_isa)
4735 {
4736 struct cl_decoded_option decoded;
4737
4738 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4739 ix86_handle_option (opts, opts_set,
4740 &decoded, input_location);
4741 }
4742
4743 else if (type == ix86_opt_yes || type == ix86_opt_no)
4744 {
4745 if (type == ix86_opt_no)
4746 opt_set_p = !opt_set_p;
4747
4748 if (opt_set_p)
4749 opts->x_target_flags |= mask;
4750 else
4751 opts->x_target_flags &= ~mask;
4752 }
4753
4754 else if (type == ix86_opt_str)
4755 {
4756 if (p_strings[opt])
4757 {
4758 error ("option(\"%s\") was already specified", opt_string);
4759 ret = false;
4760 }
4761 else
4762 p_strings[opt] = xstrdup (p + opt_len);
4763 }
4764
4765 else if (type == ix86_opt_enum)
4766 {
4767 bool arg_ok;
4768 int value;
4769
4770 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4771 if (arg_ok)
4772 set_option (opts, enum_opts_set, opt, value,
4773 p + opt_len, DK_UNSPECIFIED, input_location,
4774 global_dc);
4775 else
4776 {
4777 error ("attribute(target(\"%s\")) is unknown", orig_p);
4778 ret = false;
4779 }
4780 }
4781
4782 else
4783 gcc_unreachable ();
4784 }
4785
4786 return ret;
4787 }
4788
4789 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4790
4791 tree
4792 ix86_valid_target_attribute_tree (tree args,
4793 struct gcc_options *opts,
4794 struct gcc_options *opts_set)
4795 {
4796 const char *orig_arch_string = opts->x_ix86_arch_string;
4797 const char *orig_tune_string = opts->x_ix86_tune_string;
4798 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4799 int orig_tune_defaulted = ix86_tune_defaulted;
4800 int orig_arch_specified = ix86_arch_specified;
4801 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4802 tree t = NULL_TREE;
4803 int i;
4804 struct cl_target_option *def
4805 = TREE_TARGET_OPTION (target_option_default_node);
4806 struct gcc_options enum_opts_set;
4807
4808 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4809
4810 /* Process each of the options on the chain. */
4811 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4812 opts_set, &enum_opts_set))
4813 return error_mark_node;
4814
4815 /* If the changed options are different from the default, rerun
4816 ix86_option_override_internal, and then save the options away.
4817 The string options are are attribute options, and will be undone
4818 when we copy the save structure. */
4819 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4820 || opts->x_target_flags != def->x_target_flags
4821 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4822 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4823 || enum_opts_set.x_ix86_fpmath)
4824 {
4825 /* If we are using the default tune= or arch=, undo the string assigned,
4826 and use the default. */
4827 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4828 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4829 else if (!orig_arch_specified)
4830 opts->x_ix86_arch_string = NULL;
4831
4832 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4833 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4834 else if (orig_tune_defaulted)
4835 opts->x_ix86_tune_string = NULL;
4836
4837 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4838 if (enum_opts_set.x_ix86_fpmath)
4839 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4840 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4841 && TARGET_SSE_P (opts->x_ix86_isa_flags))
4842 {
4843 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4844 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4845 }
4846
4847 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4848 ix86_option_override_internal (false, opts, opts_set);
4849
4850 /* Add any builtin functions with the new isa if any. */
4851 ix86_add_new_builtins (opts->x_ix86_isa_flags);
4852
4853 /* Save the current options unless we are validating options for
4854 #pragma. */
4855 t = build_target_option_node (opts);
4856
4857 opts->x_ix86_arch_string = orig_arch_string;
4858 opts->x_ix86_tune_string = orig_tune_string;
4859 opts_set->x_ix86_fpmath = orig_fpmath_set;
4860
4861 /* Free up memory allocated to hold the strings */
4862 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4863 free (option_strings[i]);
4864 }
4865
4866 return t;
4867 }
4868
4869 /* Hook to validate attribute((target("string"))). */
4870
4871 static bool
4872 ix86_valid_target_attribute_p (tree fndecl,
4873 tree ARG_UNUSED (name),
4874 tree args,
4875 int ARG_UNUSED (flags))
4876 {
4877 struct gcc_options func_options;
4878 tree new_target, new_optimize;
4879 bool ret = true;
4880
4881 /* attribute((target("default"))) does nothing, beyond
4882 affecting multi-versioning. */
4883 if (TREE_VALUE (args)
4884 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
4885 && TREE_CHAIN (args) == NULL_TREE
4886 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
4887 return true;
4888
4889 tree old_optimize = build_optimization_node (&global_options);
4890
4891 /* Get the optimization options of the current function. */
4892 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4893
4894 if (!func_optimize)
4895 func_optimize = old_optimize;
4896
4897 /* Init func_options. */
4898 memset (&func_options, 0, sizeof (func_options));
4899 init_options_struct (&func_options, NULL);
4900 lang_hooks.init_options_struct (&func_options);
4901
4902 cl_optimization_restore (&func_options,
4903 TREE_OPTIMIZATION (func_optimize));
4904
4905 /* Initialize func_options to the default before its target options can
4906 be set. */
4907 cl_target_option_restore (&func_options,
4908 TREE_TARGET_OPTION (target_option_default_node));
4909
4910 new_target = ix86_valid_target_attribute_tree (args, &func_options,
4911 &global_options_set);
4912
4913 new_optimize = build_optimization_node (&func_options);
4914
4915 if (new_target == error_mark_node)
4916 ret = false;
4917
4918 else if (fndecl && new_target)
4919 {
4920 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4921
4922 if (old_optimize != new_optimize)
4923 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4924 }
4925
4926 return ret;
4927 }
4928
4929 \f
4930 /* Hook to determine if one function can safely inline another. */
4931
4932 static bool
4933 ix86_can_inline_p (tree caller, tree callee)
4934 {
4935 bool ret = false;
4936 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4937 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4938
4939 /* If callee has no option attributes, then it is ok to inline. */
4940 if (!callee_tree)
4941 ret = true;
4942
4943 /* If caller has no option attributes, but callee does then it is not ok to
4944 inline. */
4945 else if (!caller_tree)
4946 ret = false;
4947
4948 else
4949 {
4950 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4951 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4952
4953 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4954 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4955 function. */
4956 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4957 != callee_opts->x_ix86_isa_flags)
4958 ret = false;
4959
4960 /* See if we have the same non-isa options. */
4961 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4962 ret = false;
4963
4964 /* See if arch, tune, etc. are the same. */
4965 else if (caller_opts->arch != callee_opts->arch)
4966 ret = false;
4967
4968 else if (caller_opts->tune != callee_opts->tune)
4969 ret = false;
4970
4971 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4972 ret = false;
4973
4974 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4975 ret = false;
4976
4977 else
4978 ret = true;
4979 }
4980
4981 return ret;
4982 }
4983
4984 \f
4985 /* Remember the last target of ix86_set_current_function. */
4986 static GTY(()) tree ix86_previous_fndecl;
4987
4988 /* Invalidate ix86_previous_fndecl cache. */
4989 void
4990 ix86_reset_previous_fndecl (void)
4991 {
4992 ix86_previous_fndecl = NULL_TREE;
4993 }
4994
4995 /* Establish appropriate back-end context for processing the function
4996 FNDECL. The argument might be NULL to indicate processing at top
4997 level, outside of any function scope. */
4998 static void
4999 ix86_set_current_function (tree fndecl)
5000 {
5001 /* Only change the context if the function changes. This hook is called
5002 several times in the course of compiling a function, and we don't want to
5003 slow things down too much or call target_reinit when it isn't safe. */
5004 if (fndecl && fndecl != ix86_previous_fndecl)
5005 {
5006 tree old_tree = (ix86_previous_fndecl
5007 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
5008 : NULL_TREE);
5009
5010 tree new_tree = (fndecl
5011 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
5012 : NULL_TREE);
5013
5014 ix86_previous_fndecl = fndecl;
5015 if (old_tree == new_tree)
5016 ;
5017
5018 else if (new_tree)
5019 {
5020 cl_target_option_restore (&global_options,
5021 TREE_TARGET_OPTION (new_tree));
5022 if (TREE_TARGET_GLOBALS (new_tree))
5023 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5024 else
5025 TREE_TARGET_GLOBALS (new_tree)
5026 = save_target_globals_default_opts ();
5027 }
5028
5029 else if (old_tree)
5030 {
5031 new_tree = target_option_current_node;
5032 cl_target_option_restore (&global_options,
5033 TREE_TARGET_OPTION (new_tree));
5034 if (TREE_TARGET_GLOBALS (new_tree))
5035 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5036 else if (new_tree == target_option_default_node)
5037 restore_target_globals (&default_target_globals);
5038 else
5039 TREE_TARGET_GLOBALS (new_tree)
5040 = save_target_globals_default_opts ();
5041 }
5042 }
5043 }
5044
5045 \f
5046 /* Return true if this goes in large data/bss. */
5047
5048 static bool
5049 ix86_in_large_data_p (tree exp)
5050 {
5051 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5052 return false;
5053
5054 /* Functions are never large data. */
5055 if (TREE_CODE (exp) == FUNCTION_DECL)
5056 return false;
5057
5058 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5059 {
5060 const char *section = DECL_SECTION_NAME (exp);
5061 if (strcmp (section, ".ldata") == 0
5062 || strcmp (section, ".lbss") == 0)
5063 return true;
5064 return false;
5065 }
5066 else
5067 {
5068 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5069
5070 /* If this is an incomplete type with size 0, then we can't put it
5071 in data because it might be too big when completed. Also,
5072 int_size_in_bytes returns -1 if size can vary or is larger than
5073 an integer in which case also it is safer to assume that it goes in
5074 large data. */
5075 if (size <= 0 || size > ix86_section_threshold)
5076 return true;
5077 }
5078
5079 return false;
5080 }
5081
5082 /* Switch to the appropriate section for output of DECL.
5083 DECL is either a `VAR_DECL' node or a constant of some sort.
5084 RELOC indicates whether forming the initial value of DECL requires
5085 link-time relocations. */
5086
5087 ATTRIBUTE_UNUSED static section *
5088 x86_64_elf_select_section (tree decl, int reloc,
5089 unsigned HOST_WIDE_INT align)
5090 {
5091 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5092 && ix86_in_large_data_p (decl))
5093 {
5094 const char *sname = NULL;
5095 unsigned int flags = SECTION_WRITE;
5096 switch (categorize_decl_for_section (decl, reloc))
5097 {
5098 case SECCAT_DATA:
5099 sname = ".ldata";
5100 break;
5101 case SECCAT_DATA_REL:
5102 sname = ".ldata.rel";
5103 break;
5104 case SECCAT_DATA_REL_LOCAL:
5105 sname = ".ldata.rel.local";
5106 break;
5107 case SECCAT_DATA_REL_RO:
5108 sname = ".ldata.rel.ro";
5109 break;
5110 case SECCAT_DATA_REL_RO_LOCAL:
5111 sname = ".ldata.rel.ro.local";
5112 break;
5113 case SECCAT_BSS:
5114 sname = ".lbss";
5115 flags |= SECTION_BSS;
5116 break;
5117 case SECCAT_RODATA:
5118 case SECCAT_RODATA_MERGE_STR:
5119 case SECCAT_RODATA_MERGE_STR_INIT:
5120 case SECCAT_RODATA_MERGE_CONST:
5121 sname = ".lrodata";
5122 flags = 0;
5123 break;
5124 case SECCAT_SRODATA:
5125 case SECCAT_SDATA:
5126 case SECCAT_SBSS:
5127 gcc_unreachable ();
5128 case SECCAT_TEXT:
5129 case SECCAT_TDATA:
5130 case SECCAT_TBSS:
5131 /* We don't split these for medium model. Place them into
5132 default sections and hope for best. */
5133 break;
5134 }
5135 if (sname)
5136 {
5137 /* We might get called with string constants, but get_named_section
5138 doesn't like them as they are not DECLs. Also, we need to set
5139 flags in that case. */
5140 if (!DECL_P (decl))
5141 return get_section (sname, flags, NULL);
5142 return get_named_section (decl, sname, reloc);
5143 }
5144 }
5145 return default_elf_select_section (decl, reloc, align);
5146 }
5147
5148 /* Select a set of attributes for section NAME based on the properties
5149 of DECL and whether or not RELOC indicates that DECL's initializer
5150 might contain runtime relocations. */
5151
5152 static unsigned int ATTRIBUTE_UNUSED
5153 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5154 {
5155 unsigned int flags = default_section_type_flags (decl, name, reloc);
5156
5157 if (decl == NULL_TREE
5158 && (strcmp (name, ".ldata.rel.ro") == 0
5159 || strcmp (name, ".ldata.rel.ro.local") == 0))
5160 flags |= SECTION_RELRO;
5161
5162 if (strcmp (name, ".lbss") == 0
5163 || strncmp (name, ".lbss.", 5) == 0
5164 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5165 flags |= SECTION_BSS;
5166
5167 return flags;
5168 }
5169
5170 /* Build up a unique section name, expressed as a
5171 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5172 RELOC indicates whether the initial value of EXP requires
5173 link-time relocations. */
5174
5175 static void ATTRIBUTE_UNUSED
5176 x86_64_elf_unique_section (tree decl, int reloc)
5177 {
5178 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5179 && ix86_in_large_data_p (decl))
5180 {
5181 const char *prefix = NULL;
5182 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5183 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5184
5185 switch (categorize_decl_for_section (decl, reloc))
5186 {
5187 case SECCAT_DATA:
5188 case SECCAT_DATA_REL:
5189 case SECCAT_DATA_REL_LOCAL:
5190 case SECCAT_DATA_REL_RO:
5191 case SECCAT_DATA_REL_RO_LOCAL:
5192 prefix = one_only ? ".ld" : ".ldata";
5193 break;
5194 case SECCAT_BSS:
5195 prefix = one_only ? ".lb" : ".lbss";
5196 break;
5197 case SECCAT_RODATA:
5198 case SECCAT_RODATA_MERGE_STR:
5199 case SECCAT_RODATA_MERGE_STR_INIT:
5200 case SECCAT_RODATA_MERGE_CONST:
5201 prefix = one_only ? ".lr" : ".lrodata";
5202 break;
5203 case SECCAT_SRODATA:
5204 case SECCAT_SDATA:
5205 case SECCAT_SBSS:
5206 gcc_unreachable ();
5207 case SECCAT_TEXT:
5208 case SECCAT_TDATA:
5209 case SECCAT_TBSS:
5210 /* We don't split these for medium model. Place them into
5211 default sections and hope for best. */
5212 break;
5213 }
5214 if (prefix)
5215 {
5216 const char *name, *linkonce;
5217 char *string;
5218
5219 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5220 name = targetm.strip_name_encoding (name);
5221
5222 /* If we're using one_only, then there needs to be a .gnu.linkonce
5223 prefix to the section name. */
5224 linkonce = one_only ? ".gnu.linkonce" : "";
5225
5226 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5227
5228 set_decl_section_name (decl, string);
5229 return;
5230 }
5231 }
5232 default_unique_section (decl, reloc);
5233 }
5234
5235 #ifdef COMMON_ASM_OP
5236 /* This says how to output assembler code to declare an
5237 uninitialized external linkage data object.
5238
5239 For medium model x86-64 we need to use .largecomm opcode for
5240 large objects. */
5241 void
5242 x86_elf_aligned_common (FILE *file,
5243 const char *name, unsigned HOST_WIDE_INT size,
5244 int align)
5245 {
5246 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5247 && size > (unsigned int)ix86_section_threshold)
5248 fputs (".largecomm\t", file);
5249 else
5250 fputs (COMMON_ASM_OP, file);
5251 assemble_name (file, name);
5252 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5253 size, align / BITS_PER_UNIT);
5254 }
5255 #endif
5256
5257 /* Utility function for targets to use in implementing
5258 ASM_OUTPUT_ALIGNED_BSS. */
5259
5260 void
5261 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
5262 unsigned HOST_WIDE_INT size, int align)
5263 {
5264 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5265 && size > (unsigned int)ix86_section_threshold)
5266 switch_to_section (get_named_section (decl, ".lbss", 0));
5267 else
5268 switch_to_section (bss_section);
5269 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5270 #ifdef ASM_DECLARE_OBJECT_NAME
5271 last_assemble_variable_decl = decl;
5272 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5273 #else
5274 /* Standard thing is just output label for the object. */
5275 ASM_OUTPUT_LABEL (file, name);
5276 #endif /* ASM_DECLARE_OBJECT_NAME */
5277 ASM_OUTPUT_SKIP (file, size ? size : 1);
5278 }
5279 \f
5280 /* Decide whether we must probe the stack before any space allocation
5281 on this target. It's essentially TARGET_STACK_PROBE except when
5282 -fstack-check causes the stack to be already probed differently. */
5283
5284 bool
5285 ix86_target_stack_probe (void)
5286 {
5287 /* Do not probe the stack twice if static stack checking is enabled. */
5288 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5289 return false;
5290
5291 return TARGET_STACK_PROBE;
5292 }
5293 \f
5294 /* Decide whether we can make a sibling call to a function. DECL is the
5295 declaration of the function being targeted by the call and EXP is the
5296 CALL_EXPR representing the call. */
5297
5298 static bool
5299 ix86_function_ok_for_sibcall (tree decl, tree exp)
5300 {
5301 tree type, decl_or_type;
5302 rtx a, b;
5303
5304 /* If we are generating position-independent code, we cannot sibcall
5305 optimize any indirect call, or a direct call to a global function,
5306 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5307 if (!TARGET_MACHO
5308 && !TARGET_64BIT
5309 && flag_pic
5310 && (!decl || !targetm.binds_local_p (decl)))
5311 return false;
5312
5313 /* If we need to align the outgoing stack, then sibcalling would
5314 unalign the stack, which may break the called function. */
5315 if (ix86_minimum_incoming_stack_boundary (true)
5316 < PREFERRED_STACK_BOUNDARY)
5317 return false;
5318
5319 if (decl)
5320 {
5321 decl_or_type = decl;
5322 type = TREE_TYPE (decl);
5323 }
5324 else
5325 {
5326 /* We're looking at the CALL_EXPR, we need the type of the function. */
5327 type = CALL_EXPR_FN (exp); /* pointer expression */
5328 type = TREE_TYPE (type); /* pointer type */
5329 type = TREE_TYPE (type); /* function type */
5330 decl_or_type = type;
5331 }
5332
5333 /* Check that the return value locations are the same. Like
5334 if we are returning floats on the 80387 register stack, we cannot
5335 make a sibcall from a function that doesn't return a float to a
5336 function that does or, conversely, from a function that does return
5337 a float to a function that doesn't; the necessary stack adjustment
5338 would not be executed. This is also the place we notice
5339 differences in the return value ABI. Note that it is ok for one
5340 of the functions to have void return type as long as the return
5341 value of the other is passed in a register. */
5342 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5343 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5344 cfun->decl, false);
5345 if (STACK_REG_P (a) || STACK_REG_P (b))
5346 {
5347 if (!rtx_equal_p (a, b))
5348 return false;
5349 }
5350 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5351 ;
5352 else if (!rtx_equal_p (a, b))
5353 return false;
5354
5355 if (TARGET_64BIT)
5356 {
5357 /* The SYSV ABI has more call-clobbered registers;
5358 disallow sibcalls from MS to SYSV. */
5359 if (cfun->machine->call_abi == MS_ABI
5360 && ix86_function_type_abi (type) == SYSV_ABI)
5361 return false;
5362 }
5363 else
5364 {
5365 /* If this call is indirect, we'll need to be able to use a
5366 call-clobbered register for the address of the target function.
5367 Make sure that all such registers are not used for passing
5368 parameters. Note that DLLIMPORT functions are indirect. */
5369 if (!decl
5370 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5371 {
5372 if (ix86_function_regparm (type, NULL) >= 3)
5373 {
5374 /* ??? Need to count the actual number of registers to be used,
5375 not the possible number of registers. Fix later. */
5376 return false;
5377 }
5378 }
5379 }
5380
5381 /* Otherwise okay. That also includes certain types of indirect calls. */
5382 return true;
5383 }
5384
5385 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5386 and "sseregparm" calling convention attributes;
5387 arguments as in struct attribute_spec.handler. */
5388
5389 static tree
5390 ix86_handle_cconv_attribute (tree *node, tree name,
5391 tree args,
5392 int,
5393 bool *no_add_attrs)
5394 {
5395 if (TREE_CODE (*node) != FUNCTION_TYPE
5396 && TREE_CODE (*node) != METHOD_TYPE
5397 && TREE_CODE (*node) != FIELD_DECL
5398 && TREE_CODE (*node) != TYPE_DECL)
5399 {
5400 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5401 name);
5402 *no_add_attrs = true;
5403 return NULL_TREE;
5404 }
5405
5406 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5407 if (is_attribute_p ("regparm", name))
5408 {
5409 tree cst;
5410
5411 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5412 {
5413 error ("fastcall and regparm attributes are not compatible");
5414 }
5415
5416 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5417 {
5418 error ("regparam and thiscall attributes are not compatible");
5419 }
5420
5421 cst = TREE_VALUE (args);
5422 if (TREE_CODE (cst) != INTEGER_CST)
5423 {
5424 warning (OPT_Wattributes,
5425 "%qE attribute requires an integer constant argument",
5426 name);
5427 *no_add_attrs = true;
5428 }
5429 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5430 {
5431 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5432 name, REGPARM_MAX);
5433 *no_add_attrs = true;
5434 }
5435
5436 return NULL_TREE;
5437 }
5438
5439 if (TARGET_64BIT)
5440 {
5441 /* Do not warn when emulating the MS ABI. */
5442 if ((TREE_CODE (*node) != FUNCTION_TYPE
5443 && TREE_CODE (*node) != METHOD_TYPE)
5444 || ix86_function_type_abi (*node) != MS_ABI)
5445 warning (OPT_Wattributes, "%qE attribute ignored",
5446 name);
5447 *no_add_attrs = true;
5448 return NULL_TREE;
5449 }
5450
5451 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5452 if (is_attribute_p ("fastcall", name))
5453 {
5454 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5455 {
5456 error ("fastcall and cdecl attributes are not compatible");
5457 }
5458 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5459 {
5460 error ("fastcall and stdcall attributes are not compatible");
5461 }
5462 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5463 {
5464 error ("fastcall and regparm attributes are not compatible");
5465 }
5466 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5467 {
5468 error ("fastcall and thiscall attributes are not compatible");
5469 }
5470 }
5471
5472 /* Can combine stdcall with fastcall (redundant), regparm and
5473 sseregparm. */
5474 else if (is_attribute_p ("stdcall", name))
5475 {
5476 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5477 {
5478 error ("stdcall and cdecl attributes are not compatible");
5479 }
5480 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5481 {
5482 error ("stdcall and fastcall attributes are not compatible");
5483 }
5484 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5485 {
5486 error ("stdcall and thiscall attributes are not compatible");
5487 }
5488 }
5489
5490 /* Can combine cdecl with regparm and sseregparm. */
5491 else if (is_attribute_p ("cdecl", name))
5492 {
5493 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5494 {
5495 error ("stdcall and cdecl attributes are not compatible");
5496 }
5497 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5498 {
5499 error ("fastcall and cdecl attributes are not compatible");
5500 }
5501 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5502 {
5503 error ("cdecl and thiscall attributes are not compatible");
5504 }
5505 }
5506 else if (is_attribute_p ("thiscall", name))
5507 {
5508 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5509 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5510 name);
5511 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5512 {
5513 error ("stdcall and thiscall attributes are not compatible");
5514 }
5515 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5516 {
5517 error ("fastcall and thiscall attributes are not compatible");
5518 }
5519 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5520 {
5521 error ("cdecl and thiscall attributes are not compatible");
5522 }
5523 }
5524
5525 /* Can combine sseregparm with all attributes. */
5526
5527 return NULL_TREE;
5528 }
5529
5530 /* The transactional memory builtins are implicitly regparm or fastcall
5531 depending on the ABI. Override the generic do-nothing attribute that
5532 these builtins were declared with, and replace it with one of the two
5533 attributes that we expect elsewhere. */
5534
5535 static tree
5536 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
5537 int flags, bool *no_add_attrs)
5538 {
5539 tree alt;
5540
5541 /* In no case do we want to add the placeholder attribute. */
5542 *no_add_attrs = true;
5543
5544 /* The 64-bit ABI is unchanged for transactional memory. */
5545 if (TARGET_64BIT)
5546 return NULL_TREE;
5547
5548 /* ??? Is there a better way to validate 32-bit windows? We have
5549 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5550 if (CHECK_STACK_LIMIT > 0)
5551 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5552 else
5553 {
5554 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5555 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5556 }
5557 decl_attributes (node, alt, flags);
5558
5559 return NULL_TREE;
5560 }
5561
5562 /* This function determines from TYPE the calling-convention. */
5563
5564 unsigned int
5565 ix86_get_callcvt (const_tree type)
5566 {
5567 unsigned int ret = 0;
5568 bool is_stdarg;
5569 tree attrs;
5570
5571 if (TARGET_64BIT)
5572 return IX86_CALLCVT_CDECL;
5573
5574 attrs = TYPE_ATTRIBUTES (type);
5575 if (attrs != NULL_TREE)
5576 {
5577 if (lookup_attribute ("cdecl", attrs))
5578 ret |= IX86_CALLCVT_CDECL;
5579 else if (lookup_attribute ("stdcall", attrs))
5580 ret |= IX86_CALLCVT_STDCALL;
5581 else if (lookup_attribute ("fastcall", attrs))
5582 ret |= IX86_CALLCVT_FASTCALL;
5583 else if (lookup_attribute ("thiscall", attrs))
5584 ret |= IX86_CALLCVT_THISCALL;
5585
5586 /* Regparam isn't allowed for thiscall and fastcall. */
5587 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5588 {
5589 if (lookup_attribute ("regparm", attrs))
5590 ret |= IX86_CALLCVT_REGPARM;
5591 if (lookup_attribute ("sseregparm", attrs))
5592 ret |= IX86_CALLCVT_SSEREGPARM;
5593 }
5594
5595 if (IX86_BASE_CALLCVT(ret) != 0)
5596 return ret;
5597 }
5598
5599 is_stdarg = stdarg_p (type);
5600 if (TARGET_RTD && !is_stdarg)
5601 return IX86_CALLCVT_STDCALL | ret;
5602
5603 if (ret != 0
5604 || is_stdarg
5605 || TREE_CODE (type) != METHOD_TYPE
5606 || ix86_function_type_abi (type) != MS_ABI)
5607 return IX86_CALLCVT_CDECL | ret;
5608
5609 return IX86_CALLCVT_THISCALL;
5610 }
5611
5612 /* Return 0 if the attributes for two types are incompatible, 1 if they
5613 are compatible, and 2 if they are nearly compatible (which causes a
5614 warning to be generated). */
5615
5616 static int
5617 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5618 {
5619 unsigned int ccvt1, ccvt2;
5620
5621 if (TREE_CODE (type1) != FUNCTION_TYPE
5622 && TREE_CODE (type1) != METHOD_TYPE)
5623 return 1;
5624
5625 ccvt1 = ix86_get_callcvt (type1);
5626 ccvt2 = ix86_get_callcvt (type2);
5627 if (ccvt1 != ccvt2)
5628 return 0;
5629 if (ix86_function_regparm (type1, NULL)
5630 != ix86_function_regparm (type2, NULL))
5631 return 0;
5632
5633 return 1;
5634 }
5635 \f
5636 /* Return the regparm value for a function with the indicated TYPE and DECL.
5637 DECL may be NULL when calling function indirectly
5638 or considering a libcall. */
5639
5640 static int
5641 ix86_function_regparm (const_tree type, const_tree decl)
5642 {
5643 tree attr;
5644 int regparm;
5645 unsigned int ccvt;
5646
5647 if (TARGET_64BIT)
5648 return (ix86_function_type_abi (type) == SYSV_ABI
5649 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5650 ccvt = ix86_get_callcvt (type);
5651 regparm = ix86_regparm;
5652
5653 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5654 {
5655 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5656 if (attr)
5657 {
5658 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5659 return regparm;
5660 }
5661 }
5662 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5663 return 2;
5664 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5665 return 1;
5666
5667 /* Use register calling convention for local functions when possible. */
5668 if (decl
5669 && TREE_CODE (decl) == FUNCTION_DECL
5670 /* Caller and callee must agree on the calling convention, so
5671 checking here just optimize means that with
5672 __attribute__((optimize (...))) caller could use regparm convention
5673 and callee not, or vice versa. Instead look at whether the callee
5674 is optimized or not. */
5675 && opt_for_fn (decl, optimize)
5676 && !(profile_flag && !flag_fentry))
5677 {
5678 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5679 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE (decl));
5680 if (i && i->local && i->can_change_signature)
5681 {
5682 int local_regparm, globals = 0, regno;
5683
5684 /* Make sure no regparm register is taken by a
5685 fixed register variable. */
5686 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5687 if (fixed_regs[local_regparm])
5688 break;
5689
5690 /* We don't want to use regparm(3) for nested functions as
5691 these use a static chain pointer in the third argument. */
5692 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5693 local_regparm = 2;
5694
5695 /* In 32-bit mode save a register for the split stack. */
5696 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5697 local_regparm = 2;
5698
5699 /* Each fixed register usage increases register pressure,
5700 so less registers should be used for argument passing.
5701 This functionality can be overriden by an explicit
5702 regparm value. */
5703 for (regno = AX_REG; regno <= DI_REG; regno++)
5704 if (fixed_regs[regno])
5705 globals++;
5706
5707 local_regparm
5708 = globals < local_regparm ? local_regparm - globals : 0;
5709
5710 if (local_regparm > regparm)
5711 regparm = local_regparm;
5712 }
5713 }
5714
5715 return regparm;
5716 }
5717
5718 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5719 DFmode (2) arguments in SSE registers for a function with the
5720 indicated TYPE and DECL. DECL may be NULL when calling function
5721 indirectly or considering a libcall. Otherwise return 0. */
5722
5723 static int
5724 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5725 {
5726 gcc_assert (!TARGET_64BIT);
5727
5728 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5729 by the sseregparm attribute. */
5730 if (TARGET_SSEREGPARM
5731 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5732 {
5733 if (!TARGET_SSE)
5734 {
5735 if (warn)
5736 {
5737 if (decl)
5738 error ("calling %qD with attribute sseregparm without "
5739 "SSE/SSE2 enabled", decl);
5740 else
5741 error ("calling %qT with attribute sseregparm without "
5742 "SSE/SSE2 enabled", type);
5743 }
5744 return 0;
5745 }
5746
5747 return 2;
5748 }
5749
5750 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5751 (and DFmode for SSE2) arguments in SSE registers. */
5752 if (decl && TARGET_SSE_MATH && optimize
5753 && !(profile_flag && !flag_fentry))
5754 {
5755 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5756 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5757 if (i && i->local && i->can_change_signature)
5758 return TARGET_SSE2 ? 2 : 1;
5759 }
5760
5761 return 0;
5762 }
5763
5764 /* Return true if EAX is live at the start of the function. Used by
5765 ix86_expand_prologue to determine if we need special help before
5766 calling allocate_stack_worker. */
5767
5768 static bool
5769 ix86_eax_live_at_start_p (void)
5770 {
5771 /* Cheat. Don't bother working forward from ix86_function_regparm
5772 to the function type to whether an actual argument is located in
5773 eax. Instead just look at cfg info, which is still close enough
5774 to correct at this point. This gives false positives for broken
5775 functions that might use uninitialized data that happens to be
5776 allocated in eax, but who cares? */
5777 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5778 }
5779
5780 static bool
5781 ix86_keep_aggregate_return_pointer (tree fntype)
5782 {
5783 tree attr;
5784
5785 if (!TARGET_64BIT)
5786 {
5787 attr = lookup_attribute ("callee_pop_aggregate_return",
5788 TYPE_ATTRIBUTES (fntype));
5789 if (attr)
5790 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5791
5792 /* For 32-bit MS-ABI the default is to keep aggregate
5793 return pointer. */
5794 if (ix86_function_type_abi (fntype) == MS_ABI)
5795 return true;
5796 }
5797 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5798 }
5799
5800 /* Value is the number of bytes of arguments automatically
5801 popped when returning from a subroutine call.
5802 FUNDECL is the declaration node of the function (as a tree),
5803 FUNTYPE is the data type of the function (as a tree),
5804 or for a library call it is an identifier node for the subroutine name.
5805 SIZE is the number of bytes of arguments passed on the stack.
5806
5807 On the 80386, the RTD insn may be used to pop them if the number
5808 of args is fixed, but if the number is variable then the caller
5809 must pop them all. RTD can't be used for library calls now
5810 because the library is compiled with the Unix compiler.
5811 Use of RTD is a selectable option, since it is incompatible with
5812 standard Unix calling sequences. If the option is not selected,
5813 the caller must always pop the args.
5814
5815 The attribute stdcall is equivalent to RTD on a per module basis. */
5816
5817 static int
5818 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5819 {
5820 unsigned int ccvt;
5821
5822 /* None of the 64-bit ABIs pop arguments. */
5823 if (TARGET_64BIT)
5824 return 0;
5825
5826 ccvt = ix86_get_callcvt (funtype);
5827
5828 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5829 | IX86_CALLCVT_THISCALL)) != 0
5830 && ! stdarg_p (funtype))
5831 return size;
5832
5833 /* Lose any fake structure return argument if it is passed on the stack. */
5834 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5835 && !ix86_keep_aggregate_return_pointer (funtype))
5836 {
5837 int nregs = ix86_function_regparm (funtype, fundecl);
5838 if (nregs == 0)
5839 return GET_MODE_SIZE (Pmode);
5840 }
5841
5842 return 0;
5843 }
5844
5845 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5846
5847 static bool
5848 ix86_legitimate_combined_insn (rtx_insn *insn)
5849 {
5850 /* Check operand constraints in case hard registers were propagated
5851 into insn pattern. This check prevents combine pass from
5852 generating insn patterns with invalid hard register operands.
5853 These invalid insns can eventually confuse reload to error out
5854 with a spill failure. See also PRs 46829 and 46843. */
5855 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
5856 {
5857 int i;
5858
5859 extract_insn (insn);
5860 preprocess_constraints (insn);
5861
5862 int n_operands = recog_data.n_operands;
5863 int n_alternatives = recog_data.n_alternatives;
5864 for (i = 0; i < n_operands; i++)
5865 {
5866 rtx op = recog_data.operand[i];
5867 enum machine_mode mode = GET_MODE (op);
5868 const operand_alternative *op_alt;
5869 int offset = 0;
5870 bool win;
5871 int j;
5872
5873 /* For pre-AVX disallow unaligned loads/stores where the
5874 instructions don't support it. */
5875 if (!TARGET_AVX
5876 && VECTOR_MODE_P (GET_MODE (op))
5877 && misaligned_operand (op, GET_MODE (op)))
5878 {
5879 int min_align = get_attr_ssememalign (insn);
5880 if (min_align == 0)
5881 return false;
5882 }
5883
5884 /* A unary operator may be accepted by the predicate, but it
5885 is irrelevant for matching constraints. */
5886 if (UNARY_P (op))
5887 op = XEXP (op, 0);
5888
5889 if (GET_CODE (op) == SUBREG)
5890 {
5891 if (REG_P (SUBREG_REG (op))
5892 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
5893 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
5894 GET_MODE (SUBREG_REG (op)),
5895 SUBREG_BYTE (op),
5896 GET_MODE (op));
5897 op = SUBREG_REG (op);
5898 }
5899
5900 if (!(REG_P (op) && HARD_REGISTER_P (op)))
5901 continue;
5902
5903 op_alt = recog_op_alt;
5904
5905 /* Operand has no constraints, anything is OK. */
5906 win = !n_alternatives;
5907
5908 alternative_mask enabled = recog_data.enabled_alternatives;
5909 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
5910 {
5911 if (!TEST_BIT (enabled, j))
5912 continue;
5913 if (op_alt[i].anything_ok
5914 || (op_alt[i].matches != -1
5915 && operands_match_p
5916 (recog_data.operand[i],
5917 recog_data.operand[op_alt[i].matches]))
5918 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
5919 {
5920 win = true;
5921 break;
5922 }
5923 }
5924
5925 if (!win)
5926 return false;
5927 }
5928 }
5929
5930 return true;
5931 }
5932 \f
5933 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5934
5935 static unsigned HOST_WIDE_INT
5936 ix86_asan_shadow_offset (void)
5937 {
5938 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
5939 : HOST_WIDE_INT_C (0x7fff8000))
5940 : (HOST_WIDE_INT_1 << 29);
5941 }
5942 \f
5943 /* Argument support functions. */
5944
5945 /* Return true when register may be used to pass function parameters. */
5946 bool
5947 ix86_function_arg_regno_p (int regno)
5948 {
5949 int i;
5950 const int *parm_regs;
5951
5952 if (!TARGET_64BIT)
5953 {
5954 if (TARGET_MACHO)
5955 return (regno < REGPARM_MAX
5956 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5957 else
5958 return (regno < REGPARM_MAX
5959 || (TARGET_MMX && MMX_REGNO_P (regno)
5960 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5961 || (TARGET_SSE && SSE_REGNO_P (regno)
5962 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5963 }
5964
5965 if (TARGET_SSE && SSE_REGNO_P (regno)
5966 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5967 return true;
5968
5969 /* TODO: The function should depend on current function ABI but
5970 builtins.c would need updating then. Therefore we use the
5971 default ABI. */
5972
5973 /* RAX is used as hidden argument to va_arg functions. */
5974 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5975 return true;
5976
5977 if (ix86_abi == MS_ABI)
5978 parm_regs = x86_64_ms_abi_int_parameter_registers;
5979 else
5980 parm_regs = x86_64_int_parameter_registers;
5981 for (i = 0; i < (ix86_abi == MS_ABI
5982 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5983 if (regno == parm_regs[i])
5984 return true;
5985 return false;
5986 }
5987
5988 /* Return if we do not know how to pass TYPE solely in registers. */
5989
5990 static bool
5991 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5992 {
5993 if (must_pass_in_stack_var_size_or_pad (mode, type))
5994 return true;
5995
5996 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5997 The layout_type routine is crafty and tries to trick us into passing
5998 currently unsupported vector types on the stack by using TImode. */
5999 return (!TARGET_64BIT && mode == TImode
6000 && type && TREE_CODE (type) != VECTOR_TYPE);
6001 }
6002
6003 /* It returns the size, in bytes, of the area reserved for arguments passed
6004 in registers for the function represented by fndecl dependent to the used
6005 abi format. */
6006 int
6007 ix86_reg_parm_stack_space (const_tree fndecl)
6008 {
6009 enum calling_abi call_abi = SYSV_ABI;
6010 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
6011 call_abi = ix86_function_abi (fndecl);
6012 else
6013 call_abi = ix86_function_type_abi (fndecl);
6014 if (TARGET_64BIT && call_abi == MS_ABI)
6015 return 32;
6016 return 0;
6017 }
6018
6019 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
6020 call abi used. */
6021 enum calling_abi
6022 ix86_function_type_abi (const_tree fntype)
6023 {
6024 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
6025 {
6026 enum calling_abi abi = ix86_abi;
6027 if (abi == SYSV_ABI)
6028 {
6029 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6030 abi = MS_ABI;
6031 }
6032 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6033 abi = SYSV_ABI;
6034 return abi;
6035 }
6036 return ix86_abi;
6037 }
6038
6039 /* We add this as a workaround in order to use libc_has_function
6040 hook in i386.md. */
6041 bool
6042 ix86_libc_has_function (enum function_class fn_class)
6043 {
6044 return targetm.libc_has_function (fn_class);
6045 }
6046
6047 static bool
6048 ix86_function_ms_hook_prologue (const_tree fn)
6049 {
6050 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6051 {
6052 if (decl_function_context (fn) != NULL_TREE)
6053 error_at (DECL_SOURCE_LOCATION (fn),
6054 "ms_hook_prologue is not compatible with nested function");
6055 else
6056 return true;
6057 }
6058 return false;
6059 }
6060
6061 static enum calling_abi
6062 ix86_function_abi (const_tree fndecl)
6063 {
6064 if (! fndecl)
6065 return ix86_abi;
6066 return ix86_function_type_abi (TREE_TYPE (fndecl));
6067 }
6068
6069 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6070 call abi used. */
6071 enum calling_abi
6072 ix86_cfun_abi (void)
6073 {
6074 if (! cfun)
6075 return ix86_abi;
6076 return cfun->machine->call_abi;
6077 }
6078
6079 /* Write the extra assembler code needed to declare a function properly. */
6080
6081 void
6082 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6083 tree decl)
6084 {
6085 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6086
6087 if (is_ms_hook)
6088 {
6089 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6090 unsigned int filler_cc = 0xcccccccc;
6091
6092 for (i = 0; i < filler_count; i += 4)
6093 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6094 }
6095
6096 #ifdef SUBTARGET_ASM_UNWIND_INIT
6097 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6098 #endif
6099
6100 ASM_OUTPUT_LABEL (asm_out_file, fname);
6101
6102 /* Output magic byte marker, if hot-patch attribute is set. */
6103 if (is_ms_hook)
6104 {
6105 if (TARGET_64BIT)
6106 {
6107 /* leaq [%rsp + 0], %rsp */
6108 asm_fprintf (asm_out_file, ASM_BYTE
6109 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6110 }
6111 else
6112 {
6113 /* movl.s %edi, %edi
6114 push %ebp
6115 movl.s %esp, %ebp */
6116 asm_fprintf (asm_out_file, ASM_BYTE
6117 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6118 }
6119 }
6120 }
6121
6122 /* regclass.c */
6123 extern void init_regs (void);
6124
6125 /* Implementation of call abi switching target hook. Specific to FNDECL
6126 the specific call register sets are set. See also
6127 ix86_conditional_register_usage for more details. */
6128 void
6129 ix86_call_abi_override (const_tree fndecl)
6130 {
6131 if (fndecl == NULL_TREE)
6132 cfun->machine->call_abi = ix86_abi;
6133 else
6134 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6135 }
6136
6137 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6138 expensive re-initialization of init_regs each time we switch function context
6139 since this is needed only during RTL expansion. */
6140 static void
6141 ix86_maybe_switch_abi (void)
6142 {
6143 if (TARGET_64BIT &&
6144 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6145 reinit_regs ();
6146 }
6147
6148 /* Return 1 if pseudo register should be created and used to hold
6149 GOT address for PIC code. */
6150 static bool
6151 ix86_use_pseudo_pic_reg (void)
6152 {
6153 if ((TARGET_64BIT
6154 && (ix86_cmodel == CM_SMALL_PIC
6155 || TARGET_PECOFF))
6156 || !flag_pic)
6157 return false;
6158 return true;
6159 }
6160
6161 /* Create and initialize PIC register if required. */
6162 static void
6163 ix86_init_pic_reg (void)
6164 {
6165 edge entry_edge;
6166 rtx_insn *seq;
6167
6168 if (!ix86_use_pseudo_pic_reg ())
6169 return;
6170
6171 start_sequence ();
6172
6173 if (TARGET_64BIT)
6174 {
6175 if (ix86_cmodel == CM_LARGE_PIC)
6176 {
6177 rtx_code_label *label;
6178 rtx tmp_reg;
6179
6180 gcc_assert (Pmode == DImode);
6181 label = gen_label_rtx ();
6182 emit_label (label);
6183 LABEL_PRESERVE_P (label) = 1;
6184 tmp_reg = gen_rtx_REG (Pmode, R11_REG);
6185 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6186 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
6187 label));
6188 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6189 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
6190 pic_offset_table_rtx, tmp_reg));
6191 }
6192 else
6193 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6194 }
6195 else
6196 {
6197 rtx insn = emit_insn (gen_set_got (pic_offset_table_rtx));
6198 RTX_FRAME_RELATED_P (insn) = 1;
6199 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
6200 }
6201
6202 seq = get_insns ();
6203 end_sequence ();
6204
6205 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6206 insert_insn_on_edge (seq, entry_edge);
6207 commit_one_edge_insertion (entry_edge);
6208 }
6209
6210 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6211 for a call to a function whose data type is FNTYPE.
6212 For a library call, FNTYPE is 0. */
6213
6214 void
6215 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6216 tree fntype, /* tree ptr for function decl */
6217 rtx libname, /* SYMBOL_REF of library name or 0 */
6218 tree fndecl,
6219 int caller)
6220 {
6221 struct cgraph_local_info *i;
6222
6223 memset (cum, 0, sizeof (*cum));
6224
6225 if (fndecl)
6226 {
6227 i = cgraph_node::local_info (fndecl);
6228 cum->call_abi = ix86_function_abi (fndecl);
6229 }
6230 else
6231 {
6232 i = NULL;
6233 cum->call_abi = ix86_function_type_abi (fntype);
6234 }
6235
6236 cum->caller = caller;
6237
6238 /* Set up the number of registers to use for passing arguments. */
6239 cum->nregs = ix86_regparm;
6240 if (TARGET_64BIT)
6241 {
6242 cum->nregs = (cum->call_abi == SYSV_ABI
6243 ? X86_64_REGPARM_MAX
6244 : X86_64_MS_REGPARM_MAX);
6245 }
6246 if (TARGET_SSE)
6247 {
6248 cum->sse_nregs = SSE_REGPARM_MAX;
6249 if (TARGET_64BIT)
6250 {
6251 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6252 ? X86_64_SSE_REGPARM_MAX
6253 : X86_64_MS_SSE_REGPARM_MAX);
6254 }
6255 }
6256 if (TARGET_MMX)
6257 cum->mmx_nregs = MMX_REGPARM_MAX;
6258 cum->warn_avx512f = true;
6259 cum->warn_avx = true;
6260 cum->warn_sse = true;
6261 cum->warn_mmx = true;
6262
6263 /* Because type might mismatch in between caller and callee, we need to
6264 use actual type of function for local calls.
6265 FIXME: cgraph_analyze can be told to actually record if function uses
6266 va_start so for local functions maybe_vaarg can be made aggressive
6267 helping K&R code.
6268 FIXME: once typesytem is fixed, we won't need this code anymore. */
6269 if (i && i->local && i->can_change_signature)
6270 fntype = TREE_TYPE (fndecl);
6271 cum->maybe_vaarg = (fntype
6272 ? (!prototype_p (fntype) || stdarg_p (fntype))
6273 : !libname);
6274
6275 if (!TARGET_64BIT)
6276 {
6277 /* If there are variable arguments, then we won't pass anything
6278 in registers in 32-bit mode. */
6279 if (stdarg_p (fntype))
6280 {
6281 cum->nregs = 0;
6282 cum->sse_nregs = 0;
6283 cum->mmx_nregs = 0;
6284 cum->warn_avx512f = false;
6285 cum->warn_avx = false;
6286 cum->warn_sse = false;
6287 cum->warn_mmx = false;
6288 return;
6289 }
6290
6291 /* Use ecx and edx registers if function has fastcall attribute,
6292 else look for regparm information. */
6293 if (fntype)
6294 {
6295 unsigned int ccvt = ix86_get_callcvt (fntype);
6296 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6297 {
6298 cum->nregs = 1;
6299 cum->fastcall = 1; /* Same first register as in fastcall. */
6300 }
6301 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6302 {
6303 cum->nregs = 2;
6304 cum->fastcall = 1;
6305 }
6306 else
6307 cum->nregs = ix86_function_regparm (fntype, fndecl);
6308 }
6309
6310 /* Set up the number of SSE registers used for passing SFmode
6311 and DFmode arguments. Warn for mismatching ABI. */
6312 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6313 }
6314 }
6315
6316 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6317 But in the case of vector types, it is some vector mode.
6318
6319 When we have only some of our vector isa extensions enabled, then there
6320 are some modes for which vector_mode_supported_p is false. For these
6321 modes, the generic vector support in gcc will choose some non-vector mode
6322 in order to implement the type. By computing the natural mode, we'll
6323 select the proper ABI location for the operand and not depend on whatever
6324 the middle-end decides to do with these vector types.
6325
6326 The midde-end can't deal with the vector types > 16 bytes. In this
6327 case, we return the original mode and warn ABI change if CUM isn't
6328 NULL.
6329
6330 If INT_RETURN is true, warn ABI change if the vector mode isn't
6331 available for function return value. */
6332
6333 static enum machine_mode
6334 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6335 bool in_return)
6336 {
6337 enum machine_mode mode = TYPE_MODE (type);
6338
6339 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6340 {
6341 HOST_WIDE_INT size = int_size_in_bytes (type);
6342 if ((size == 8 || size == 16 || size == 32 || size == 64)
6343 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6344 && TYPE_VECTOR_SUBPARTS (type) > 1)
6345 {
6346 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6347
6348 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6349 mode = MIN_MODE_VECTOR_FLOAT;
6350 else
6351 mode = MIN_MODE_VECTOR_INT;
6352
6353 /* Get the mode which has this inner mode and number of units. */
6354 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6355 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6356 && GET_MODE_INNER (mode) == innermode)
6357 {
6358 if (size == 64 && !TARGET_AVX512F)
6359 {
6360 static bool warnedavx512f;
6361 static bool warnedavx512f_ret;
6362
6363 if (cum && cum->warn_avx512f && !warnedavx512f)
6364 {
6365 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6366 "without AVX512F enabled changes the ABI"))
6367 warnedavx512f = true;
6368 }
6369 else if (in_return && !warnedavx512f_ret)
6370 {
6371 if (warning (OPT_Wpsabi, "AVX512F vector return "
6372 "without AVX512F enabled changes the ABI"))
6373 warnedavx512f_ret = true;
6374 }
6375
6376 return TYPE_MODE (type);
6377 }
6378 else if (size == 32 && !TARGET_AVX)
6379 {
6380 static bool warnedavx;
6381 static bool warnedavx_ret;
6382
6383 if (cum && cum->warn_avx && !warnedavx)
6384 {
6385 if (warning (OPT_Wpsabi, "AVX vector argument "
6386 "without AVX enabled changes the ABI"))
6387 warnedavx = true;
6388 }
6389 else if (in_return && !warnedavx_ret)
6390 {
6391 if (warning (OPT_Wpsabi, "AVX vector return "
6392 "without AVX enabled changes the ABI"))
6393 warnedavx_ret = true;
6394 }
6395
6396 return TYPE_MODE (type);
6397 }
6398 else if (((size == 8 && TARGET_64BIT) || size == 16)
6399 && !TARGET_SSE)
6400 {
6401 static bool warnedsse;
6402 static bool warnedsse_ret;
6403
6404 if (cum && cum->warn_sse && !warnedsse)
6405 {
6406 if (warning (OPT_Wpsabi, "SSE vector argument "
6407 "without SSE enabled changes the ABI"))
6408 warnedsse = true;
6409 }
6410 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6411 {
6412 if (warning (OPT_Wpsabi, "SSE vector return "
6413 "without SSE enabled changes the ABI"))
6414 warnedsse_ret = true;
6415 }
6416 }
6417 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6418 {
6419 static bool warnedmmx;
6420 static bool warnedmmx_ret;
6421
6422 if (cum && cum->warn_mmx && !warnedmmx)
6423 {
6424 if (warning (OPT_Wpsabi, "MMX vector argument "
6425 "without MMX enabled changes the ABI"))
6426 warnedmmx = true;
6427 }
6428 else if (in_return && !warnedmmx_ret)
6429 {
6430 if (warning (OPT_Wpsabi, "MMX vector return "
6431 "without MMX enabled changes the ABI"))
6432 warnedmmx_ret = true;
6433 }
6434 }
6435 return mode;
6436 }
6437
6438 gcc_unreachable ();
6439 }
6440 }
6441
6442 return mode;
6443 }
6444
6445 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6446 this may not agree with the mode that the type system has chosen for the
6447 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6448 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6449
6450 static rtx
6451 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
6452 unsigned int regno)
6453 {
6454 rtx tmp;
6455
6456 if (orig_mode != BLKmode)
6457 tmp = gen_rtx_REG (orig_mode, regno);
6458 else
6459 {
6460 tmp = gen_rtx_REG (mode, regno);
6461 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6462 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6463 }
6464
6465 return tmp;
6466 }
6467
6468 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6469 of this code is to classify each 8bytes of incoming argument by the register
6470 class and assign registers accordingly. */
6471
6472 /* Return the union class of CLASS1 and CLASS2.
6473 See the x86-64 PS ABI for details. */
6474
6475 static enum x86_64_reg_class
6476 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6477 {
6478 /* Rule #1: If both classes are equal, this is the resulting class. */
6479 if (class1 == class2)
6480 return class1;
6481
6482 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6483 the other class. */
6484 if (class1 == X86_64_NO_CLASS)
6485 return class2;
6486 if (class2 == X86_64_NO_CLASS)
6487 return class1;
6488
6489 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6490 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6491 return X86_64_MEMORY_CLASS;
6492
6493 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6494 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6495 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6496 return X86_64_INTEGERSI_CLASS;
6497 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6498 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6499 return X86_64_INTEGER_CLASS;
6500
6501 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6502 MEMORY is used. */
6503 if (class1 == X86_64_X87_CLASS
6504 || class1 == X86_64_X87UP_CLASS
6505 || class1 == X86_64_COMPLEX_X87_CLASS
6506 || class2 == X86_64_X87_CLASS
6507 || class2 == X86_64_X87UP_CLASS
6508 || class2 == X86_64_COMPLEX_X87_CLASS)
6509 return X86_64_MEMORY_CLASS;
6510
6511 /* Rule #6: Otherwise class SSE is used. */
6512 return X86_64_SSE_CLASS;
6513 }
6514
6515 /* Classify the argument of type TYPE and mode MODE.
6516 CLASSES will be filled by the register class used to pass each word
6517 of the operand. The number of words is returned. In case the parameter
6518 should be passed in memory, 0 is returned. As a special case for zero
6519 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6520
6521 BIT_OFFSET is used internally for handling records and specifies offset
6522 of the offset in bits modulo 512 to avoid overflow cases.
6523
6524 See the x86-64 PS ABI for details.
6525 */
6526
6527 static int
6528 classify_argument (enum machine_mode mode, const_tree type,
6529 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6530 {
6531 HOST_WIDE_INT bytes =
6532 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6533 int words
6534 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6535
6536 /* Variable sized entities are always passed/returned in memory. */
6537 if (bytes < 0)
6538 return 0;
6539
6540 if (mode != VOIDmode
6541 && targetm.calls.must_pass_in_stack (mode, type))
6542 return 0;
6543
6544 if (type && AGGREGATE_TYPE_P (type))
6545 {
6546 int i;
6547 tree field;
6548 enum x86_64_reg_class subclasses[MAX_CLASSES];
6549
6550 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6551 if (bytes > 64)
6552 return 0;
6553
6554 for (i = 0; i < words; i++)
6555 classes[i] = X86_64_NO_CLASS;
6556
6557 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6558 signalize memory class, so handle it as special case. */
6559 if (!words)
6560 {
6561 classes[0] = X86_64_NO_CLASS;
6562 return 1;
6563 }
6564
6565 /* Classify each field of record and merge classes. */
6566 switch (TREE_CODE (type))
6567 {
6568 case RECORD_TYPE:
6569 /* And now merge the fields of structure. */
6570 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6571 {
6572 if (TREE_CODE (field) == FIELD_DECL)
6573 {
6574 int num;
6575
6576 if (TREE_TYPE (field) == error_mark_node)
6577 continue;
6578
6579 /* Bitfields are always classified as integer. Handle them
6580 early, since later code would consider them to be
6581 misaligned integers. */
6582 if (DECL_BIT_FIELD (field))
6583 {
6584 for (i = (int_bit_position (field)
6585 + (bit_offset % 64)) / 8 / 8;
6586 i < ((int_bit_position (field) + (bit_offset % 64))
6587 + tree_to_shwi (DECL_SIZE (field))
6588 + 63) / 8 / 8; i++)
6589 classes[i] =
6590 merge_classes (X86_64_INTEGER_CLASS,
6591 classes[i]);
6592 }
6593 else
6594 {
6595 int pos;
6596
6597 type = TREE_TYPE (field);
6598
6599 /* Flexible array member is ignored. */
6600 if (TYPE_MODE (type) == BLKmode
6601 && TREE_CODE (type) == ARRAY_TYPE
6602 && TYPE_SIZE (type) == NULL_TREE
6603 && TYPE_DOMAIN (type) != NULL_TREE
6604 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6605 == NULL_TREE))
6606 {
6607 static bool warned;
6608
6609 if (!warned && warn_psabi)
6610 {
6611 warned = true;
6612 inform (input_location,
6613 "the ABI of passing struct with"
6614 " a flexible array member has"
6615 " changed in GCC 4.4");
6616 }
6617 continue;
6618 }
6619 num = classify_argument (TYPE_MODE (type), type,
6620 subclasses,
6621 (int_bit_position (field)
6622 + bit_offset) % 512);
6623 if (!num)
6624 return 0;
6625 pos = (int_bit_position (field)
6626 + (bit_offset % 64)) / 8 / 8;
6627 for (i = 0; i < num && (i + pos) < words; i++)
6628 classes[i + pos] =
6629 merge_classes (subclasses[i], classes[i + pos]);
6630 }
6631 }
6632 }
6633 break;
6634
6635 case ARRAY_TYPE:
6636 /* Arrays are handled as small records. */
6637 {
6638 int num;
6639 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6640 TREE_TYPE (type), subclasses, bit_offset);
6641 if (!num)
6642 return 0;
6643
6644 /* The partial classes are now full classes. */
6645 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6646 subclasses[0] = X86_64_SSE_CLASS;
6647 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6648 && !((bit_offset % 64) == 0 && bytes == 4))
6649 subclasses[0] = X86_64_INTEGER_CLASS;
6650
6651 for (i = 0; i < words; i++)
6652 classes[i] = subclasses[i % num];
6653
6654 break;
6655 }
6656 case UNION_TYPE:
6657 case QUAL_UNION_TYPE:
6658 /* Unions are similar to RECORD_TYPE but offset is always 0.
6659 */
6660 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6661 {
6662 if (TREE_CODE (field) == FIELD_DECL)
6663 {
6664 int num;
6665
6666 if (TREE_TYPE (field) == error_mark_node)
6667 continue;
6668
6669 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6670 TREE_TYPE (field), subclasses,
6671 bit_offset);
6672 if (!num)
6673 return 0;
6674 for (i = 0; i < num && i < words; i++)
6675 classes[i] = merge_classes (subclasses[i], classes[i]);
6676 }
6677 }
6678 break;
6679
6680 default:
6681 gcc_unreachable ();
6682 }
6683
6684 if (words > 2)
6685 {
6686 /* When size > 16 bytes, if the first one isn't
6687 X86_64_SSE_CLASS or any other ones aren't
6688 X86_64_SSEUP_CLASS, everything should be passed in
6689 memory. */
6690 if (classes[0] != X86_64_SSE_CLASS)
6691 return 0;
6692
6693 for (i = 1; i < words; i++)
6694 if (classes[i] != X86_64_SSEUP_CLASS)
6695 return 0;
6696 }
6697
6698 /* Final merger cleanup. */
6699 for (i = 0; i < words; i++)
6700 {
6701 /* If one class is MEMORY, everything should be passed in
6702 memory. */
6703 if (classes[i] == X86_64_MEMORY_CLASS)
6704 return 0;
6705
6706 /* The X86_64_SSEUP_CLASS should be always preceded by
6707 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6708 if (classes[i] == X86_64_SSEUP_CLASS
6709 && classes[i - 1] != X86_64_SSE_CLASS
6710 && classes[i - 1] != X86_64_SSEUP_CLASS)
6711 {
6712 /* The first one should never be X86_64_SSEUP_CLASS. */
6713 gcc_assert (i != 0);
6714 classes[i] = X86_64_SSE_CLASS;
6715 }
6716
6717 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6718 everything should be passed in memory. */
6719 if (classes[i] == X86_64_X87UP_CLASS
6720 && (classes[i - 1] != X86_64_X87_CLASS))
6721 {
6722 static bool warned;
6723
6724 /* The first one should never be X86_64_X87UP_CLASS. */
6725 gcc_assert (i != 0);
6726 if (!warned && warn_psabi)
6727 {
6728 warned = true;
6729 inform (input_location,
6730 "the ABI of passing union with long double"
6731 " has changed in GCC 4.4");
6732 }
6733 return 0;
6734 }
6735 }
6736 return words;
6737 }
6738
6739 /* Compute alignment needed. We align all types to natural boundaries with
6740 exception of XFmode that is aligned to 64bits. */
6741 if (mode != VOIDmode && mode != BLKmode)
6742 {
6743 int mode_alignment = GET_MODE_BITSIZE (mode);
6744
6745 if (mode == XFmode)
6746 mode_alignment = 128;
6747 else if (mode == XCmode)
6748 mode_alignment = 256;
6749 if (COMPLEX_MODE_P (mode))
6750 mode_alignment /= 2;
6751 /* Misaligned fields are always returned in memory. */
6752 if (bit_offset % mode_alignment)
6753 return 0;
6754 }
6755
6756 /* for V1xx modes, just use the base mode */
6757 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6758 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6759 mode = GET_MODE_INNER (mode);
6760
6761 /* Classification of atomic types. */
6762 switch (mode)
6763 {
6764 case SDmode:
6765 case DDmode:
6766 classes[0] = X86_64_SSE_CLASS;
6767 return 1;
6768 case TDmode:
6769 classes[0] = X86_64_SSE_CLASS;
6770 classes[1] = X86_64_SSEUP_CLASS;
6771 return 2;
6772 case DImode:
6773 case SImode:
6774 case HImode:
6775 case QImode:
6776 case CSImode:
6777 case CHImode:
6778 case CQImode:
6779 {
6780 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
6781
6782 /* Analyze last 128 bits only. */
6783 size = (size - 1) & 0x7f;
6784
6785 if (size < 32)
6786 {
6787 classes[0] = X86_64_INTEGERSI_CLASS;
6788 return 1;
6789 }
6790 else if (size < 64)
6791 {
6792 classes[0] = X86_64_INTEGER_CLASS;
6793 return 1;
6794 }
6795 else if (size < 64+32)
6796 {
6797 classes[0] = X86_64_INTEGER_CLASS;
6798 classes[1] = X86_64_INTEGERSI_CLASS;
6799 return 2;
6800 }
6801 else if (size < 64+64)
6802 {
6803 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6804 return 2;
6805 }
6806 else
6807 gcc_unreachable ();
6808 }
6809 case CDImode:
6810 case TImode:
6811 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6812 return 2;
6813 case COImode:
6814 case OImode:
6815 /* OImode shouldn't be used directly. */
6816 gcc_unreachable ();
6817 case CTImode:
6818 return 0;
6819 case SFmode:
6820 if (!(bit_offset % 64))
6821 classes[0] = X86_64_SSESF_CLASS;
6822 else
6823 classes[0] = X86_64_SSE_CLASS;
6824 return 1;
6825 case DFmode:
6826 classes[0] = X86_64_SSEDF_CLASS;
6827 return 1;
6828 case XFmode:
6829 classes[0] = X86_64_X87_CLASS;
6830 classes[1] = X86_64_X87UP_CLASS;
6831 return 2;
6832 case TFmode:
6833 classes[0] = X86_64_SSE_CLASS;
6834 classes[1] = X86_64_SSEUP_CLASS;
6835 return 2;
6836 case SCmode:
6837 classes[0] = X86_64_SSE_CLASS;
6838 if (!(bit_offset % 64))
6839 return 1;
6840 else
6841 {
6842 static bool warned;
6843
6844 if (!warned && warn_psabi)
6845 {
6846 warned = true;
6847 inform (input_location,
6848 "the ABI of passing structure with complex float"
6849 " member has changed in GCC 4.4");
6850 }
6851 classes[1] = X86_64_SSESF_CLASS;
6852 return 2;
6853 }
6854 case DCmode:
6855 classes[0] = X86_64_SSEDF_CLASS;
6856 classes[1] = X86_64_SSEDF_CLASS;
6857 return 2;
6858 case XCmode:
6859 classes[0] = X86_64_COMPLEX_X87_CLASS;
6860 return 1;
6861 case TCmode:
6862 /* This modes is larger than 16 bytes. */
6863 return 0;
6864 case V8SFmode:
6865 case V8SImode:
6866 case V32QImode:
6867 case V16HImode:
6868 case V4DFmode:
6869 case V4DImode:
6870 classes[0] = X86_64_SSE_CLASS;
6871 classes[1] = X86_64_SSEUP_CLASS;
6872 classes[2] = X86_64_SSEUP_CLASS;
6873 classes[3] = X86_64_SSEUP_CLASS;
6874 return 4;
6875 case V8DFmode:
6876 case V16SFmode:
6877 case V8DImode:
6878 case V16SImode:
6879 case V32HImode:
6880 case V64QImode:
6881 classes[0] = X86_64_SSE_CLASS;
6882 classes[1] = X86_64_SSEUP_CLASS;
6883 classes[2] = X86_64_SSEUP_CLASS;
6884 classes[3] = X86_64_SSEUP_CLASS;
6885 classes[4] = X86_64_SSEUP_CLASS;
6886 classes[5] = X86_64_SSEUP_CLASS;
6887 classes[6] = X86_64_SSEUP_CLASS;
6888 classes[7] = X86_64_SSEUP_CLASS;
6889 return 8;
6890 case V4SFmode:
6891 case V4SImode:
6892 case V16QImode:
6893 case V8HImode:
6894 case V2DFmode:
6895 case V2DImode:
6896 classes[0] = X86_64_SSE_CLASS;
6897 classes[1] = X86_64_SSEUP_CLASS;
6898 return 2;
6899 case V1TImode:
6900 case V1DImode:
6901 case V2SFmode:
6902 case V2SImode:
6903 case V4HImode:
6904 case V8QImode:
6905 classes[0] = X86_64_SSE_CLASS;
6906 return 1;
6907 case BLKmode:
6908 case VOIDmode:
6909 return 0;
6910 default:
6911 gcc_assert (VECTOR_MODE_P (mode));
6912
6913 if (bytes > 16)
6914 return 0;
6915
6916 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6917
6918 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6919 classes[0] = X86_64_INTEGERSI_CLASS;
6920 else
6921 classes[0] = X86_64_INTEGER_CLASS;
6922 classes[1] = X86_64_INTEGER_CLASS;
6923 return 1 + (bytes > 8);
6924 }
6925 }
6926
6927 /* Examine the argument and return set number of register required in each
6928 class. Return true iff parameter should be passed in memory. */
6929
6930 static bool
6931 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6932 int *int_nregs, int *sse_nregs)
6933 {
6934 enum x86_64_reg_class regclass[MAX_CLASSES];
6935 int n = classify_argument (mode, type, regclass, 0);
6936
6937 *int_nregs = 0;
6938 *sse_nregs = 0;
6939
6940 if (!n)
6941 return true;
6942 for (n--; n >= 0; n--)
6943 switch (regclass[n])
6944 {
6945 case X86_64_INTEGER_CLASS:
6946 case X86_64_INTEGERSI_CLASS:
6947 (*int_nregs)++;
6948 break;
6949 case X86_64_SSE_CLASS:
6950 case X86_64_SSESF_CLASS:
6951 case X86_64_SSEDF_CLASS:
6952 (*sse_nregs)++;
6953 break;
6954 case X86_64_NO_CLASS:
6955 case X86_64_SSEUP_CLASS:
6956 break;
6957 case X86_64_X87_CLASS:
6958 case X86_64_X87UP_CLASS:
6959 case X86_64_COMPLEX_X87_CLASS:
6960 if (!in_return)
6961 return true;
6962 break;
6963 case X86_64_MEMORY_CLASS:
6964 gcc_unreachable ();
6965 }
6966
6967 return false;
6968 }
6969
6970 /* Construct container for the argument used by GCC interface. See
6971 FUNCTION_ARG for the detailed description. */
6972
6973 static rtx
6974 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6975 const_tree type, int in_return, int nintregs, int nsseregs,
6976 const int *intreg, int sse_regno)
6977 {
6978 /* The following variables hold the static issued_error state. */
6979 static bool issued_sse_arg_error;
6980 static bool issued_sse_ret_error;
6981 static bool issued_x87_ret_error;
6982
6983 enum machine_mode tmpmode;
6984 int bytes =
6985 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6986 enum x86_64_reg_class regclass[MAX_CLASSES];
6987 int n;
6988 int i;
6989 int nexps = 0;
6990 int needed_sseregs, needed_intregs;
6991 rtx exp[MAX_CLASSES];
6992 rtx ret;
6993
6994 n = classify_argument (mode, type, regclass, 0);
6995 if (!n)
6996 return NULL;
6997 if (examine_argument (mode, type, in_return, &needed_intregs,
6998 &needed_sseregs))
6999 return NULL;
7000 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
7001 return NULL;
7002
7003 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
7004 some less clueful developer tries to use floating-point anyway. */
7005 if (needed_sseregs && !TARGET_SSE)
7006 {
7007 if (in_return)
7008 {
7009 if (!issued_sse_ret_error)
7010 {
7011 error ("SSE register return with SSE disabled");
7012 issued_sse_ret_error = true;
7013 }
7014 }
7015 else if (!issued_sse_arg_error)
7016 {
7017 error ("SSE register argument with SSE disabled");
7018 issued_sse_arg_error = true;
7019 }
7020 return NULL;
7021 }
7022
7023 /* Likewise, error if the ABI requires us to return values in the
7024 x87 registers and the user specified -mno-80387. */
7025 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
7026 for (i = 0; i < n; i++)
7027 if (regclass[i] == X86_64_X87_CLASS
7028 || regclass[i] == X86_64_X87UP_CLASS
7029 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
7030 {
7031 if (!issued_x87_ret_error)
7032 {
7033 error ("x87 register return with x87 disabled");
7034 issued_x87_ret_error = true;
7035 }
7036 return NULL;
7037 }
7038
7039 /* First construct simple cases. Avoid SCmode, since we want to use
7040 single register to pass this type. */
7041 if (n == 1 && mode != SCmode)
7042 switch (regclass[0])
7043 {
7044 case X86_64_INTEGER_CLASS:
7045 case X86_64_INTEGERSI_CLASS:
7046 return gen_rtx_REG (mode, intreg[0]);
7047 case X86_64_SSE_CLASS:
7048 case X86_64_SSESF_CLASS:
7049 case X86_64_SSEDF_CLASS:
7050 if (mode != BLKmode)
7051 return gen_reg_or_parallel (mode, orig_mode,
7052 SSE_REGNO (sse_regno));
7053 break;
7054 case X86_64_X87_CLASS:
7055 case X86_64_COMPLEX_X87_CLASS:
7056 return gen_rtx_REG (mode, FIRST_STACK_REG);
7057 case X86_64_NO_CLASS:
7058 /* Zero sized array, struct or class. */
7059 return NULL;
7060 default:
7061 gcc_unreachable ();
7062 }
7063 if (n == 2
7064 && regclass[0] == X86_64_SSE_CLASS
7065 && regclass[1] == X86_64_SSEUP_CLASS
7066 && mode != BLKmode)
7067 return gen_reg_or_parallel (mode, orig_mode,
7068 SSE_REGNO (sse_regno));
7069 if (n == 4
7070 && regclass[0] == X86_64_SSE_CLASS
7071 && regclass[1] == X86_64_SSEUP_CLASS
7072 && regclass[2] == X86_64_SSEUP_CLASS
7073 && regclass[3] == X86_64_SSEUP_CLASS
7074 && mode != BLKmode)
7075 return gen_reg_or_parallel (mode, orig_mode,
7076 SSE_REGNO (sse_regno));
7077 if (n == 8
7078 && regclass[0] == X86_64_SSE_CLASS
7079 && regclass[1] == X86_64_SSEUP_CLASS
7080 && regclass[2] == X86_64_SSEUP_CLASS
7081 && regclass[3] == X86_64_SSEUP_CLASS
7082 && regclass[4] == X86_64_SSEUP_CLASS
7083 && regclass[5] == X86_64_SSEUP_CLASS
7084 && regclass[6] == X86_64_SSEUP_CLASS
7085 && regclass[7] == X86_64_SSEUP_CLASS
7086 && mode != BLKmode)
7087 return gen_reg_or_parallel (mode, orig_mode,
7088 SSE_REGNO (sse_regno));
7089 if (n == 2
7090 && regclass[0] == X86_64_X87_CLASS
7091 && regclass[1] == X86_64_X87UP_CLASS)
7092 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7093
7094 if (n == 2
7095 && regclass[0] == X86_64_INTEGER_CLASS
7096 && regclass[1] == X86_64_INTEGER_CLASS
7097 && (mode == CDImode || mode == TImode)
7098 && intreg[0] + 1 == intreg[1])
7099 return gen_rtx_REG (mode, intreg[0]);
7100
7101 /* Otherwise figure out the entries of the PARALLEL. */
7102 for (i = 0; i < n; i++)
7103 {
7104 int pos;
7105
7106 switch (regclass[i])
7107 {
7108 case X86_64_NO_CLASS:
7109 break;
7110 case X86_64_INTEGER_CLASS:
7111 case X86_64_INTEGERSI_CLASS:
7112 /* Merge TImodes on aligned occasions here too. */
7113 if (i * 8 + 8 > bytes)
7114 tmpmode
7115 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7116 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7117 tmpmode = SImode;
7118 else
7119 tmpmode = DImode;
7120 /* We've requested 24 bytes we
7121 don't have mode for. Use DImode. */
7122 if (tmpmode == BLKmode)
7123 tmpmode = DImode;
7124 exp [nexps++]
7125 = gen_rtx_EXPR_LIST (VOIDmode,
7126 gen_rtx_REG (tmpmode, *intreg),
7127 GEN_INT (i*8));
7128 intreg++;
7129 break;
7130 case X86_64_SSESF_CLASS:
7131 exp [nexps++]
7132 = gen_rtx_EXPR_LIST (VOIDmode,
7133 gen_rtx_REG (SFmode,
7134 SSE_REGNO (sse_regno)),
7135 GEN_INT (i*8));
7136 sse_regno++;
7137 break;
7138 case X86_64_SSEDF_CLASS:
7139 exp [nexps++]
7140 = gen_rtx_EXPR_LIST (VOIDmode,
7141 gen_rtx_REG (DFmode,
7142 SSE_REGNO (sse_regno)),
7143 GEN_INT (i*8));
7144 sse_regno++;
7145 break;
7146 case X86_64_SSE_CLASS:
7147 pos = i;
7148 switch (n)
7149 {
7150 case 1:
7151 tmpmode = DImode;
7152 break;
7153 case 2:
7154 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7155 {
7156 tmpmode = TImode;
7157 i++;
7158 }
7159 else
7160 tmpmode = DImode;
7161 break;
7162 case 4:
7163 gcc_assert (i == 0
7164 && regclass[1] == X86_64_SSEUP_CLASS
7165 && regclass[2] == X86_64_SSEUP_CLASS
7166 && regclass[3] == X86_64_SSEUP_CLASS);
7167 tmpmode = OImode;
7168 i += 3;
7169 break;
7170 case 8:
7171 gcc_assert (i == 0
7172 && regclass[1] == X86_64_SSEUP_CLASS
7173 && regclass[2] == X86_64_SSEUP_CLASS
7174 && regclass[3] == X86_64_SSEUP_CLASS
7175 && regclass[4] == X86_64_SSEUP_CLASS
7176 && regclass[5] == X86_64_SSEUP_CLASS
7177 && regclass[6] == X86_64_SSEUP_CLASS
7178 && regclass[7] == X86_64_SSEUP_CLASS);
7179 tmpmode = XImode;
7180 i += 7;
7181 break;
7182 default:
7183 gcc_unreachable ();
7184 }
7185 exp [nexps++]
7186 = gen_rtx_EXPR_LIST (VOIDmode,
7187 gen_rtx_REG (tmpmode,
7188 SSE_REGNO (sse_regno)),
7189 GEN_INT (pos*8));
7190 sse_regno++;
7191 break;
7192 default:
7193 gcc_unreachable ();
7194 }
7195 }
7196
7197 /* Empty aligned struct, union or class. */
7198 if (nexps == 0)
7199 return NULL;
7200
7201 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7202 for (i = 0; i < nexps; i++)
7203 XVECEXP (ret, 0, i) = exp [i];
7204 return ret;
7205 }
7206
7207 /* Update the data in CUM to advance over an argument of mode MODE
7208 and data type TYPE. (TYPE is null for libcalls where that information
7209 may not be available.) */
7210
7211 static void
7212 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7213 const_tree type, HOST_WIDE_INT bytes,
7214 HOST_WIDE_INT words)
7215 {
7216 switch (mode)
7217 {
7218 default:
7219 break;
7220
7221 case BLKmode:
7222 if (bytes < 0)
7223 break;
7224 /* FALLTHRU */
7225
7226 case DImode:
7227 case SImode:
7228 case HImode:
7229 case QImode:
7230 cum->words += words;
7231 cum->nregs -= words;
7232 cum->regno += words;
7233
7234 if (cum->nregs <= 0)
7235 {
7236 cum->nregs = 0;
7237 cum->regno = 0;
7238 }
7239 break;
7240
7241 case OImode:
7242 /* OImode shouldn't be used directly. */
7243 gcc_unreachable ();
7244
7245 case DFmode:
7246 if (cum->float_in_sse < 2)
7247 break;
7248 case SFmode:
7249 if (cum->float_in_sse < 1)
7250 break;
7251 /* FALLTHRU */
7252
7253 case V8SFmode:
7254 case V8SImode:
7255 case V64QImode:
7256 case V32HImode:
7257 case V16SImode:
7258 case V8DImode:
7259 case V16SFmode:
7260 case V8DFmode:
7261 case V32QImode:
7262 case V16HImode:
7263 case V4DFmode:
7264 case V4DImode:
7265 case TImode:
7266 case V16QImode:
7267 case V8HImode:
7268 case V4SImode:
7269 case V2DImode:
7270 case V4SFmode:
7271 case V2DFmode:
7272 if (!type || !AGGREGATE_TYPE_P (type))
7273 {
7274 cum->sse_words += words;
7275 cum->sse_nregs -= 1;
7276 cum->sse_regno += 1;
7277 if (cum->sse_nregs <= 0)
7278 {
7279 cum->sse_nregs = 0;
7280 cum->sse_regno = 0;
7281 }
7282 }
7283 break;
7284
7285 case V8QImode:
7286 case V4HImode:
7287 case V2SImode:
7288 case V2SFmode:
7289 case V1TImode:
7290 case V1DImode:
7291 if (!type || !AGGREGATE_TYPE_P (type))
7292 {
7293 cum->mmx_words += words;
7294 cum->mmx_nregs -= 1;
7295 cum->mmx_regno += 1;
7296 if (cum->mmx_nregs <= 0)
7297 {
7298 cum->mmx_nregs = 0;
7299 cum->mmx_regno = 0;
7300 }
7301 }
7302 break;
7303 }
7304 }
7305
7306 static void
7307 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7308 const_tree type, HOST_WIDE_INT words, bool named)
7309 {
7310 int int_nregs, sse_nregs;
7311
7312 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7313 if (!named && (VALID_AVX512F_REG_MODE (mode)
7314 || VALID_AVX256_REG_MODE (mode)))
7315 return;
7316
7317 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7318 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7319 {
7320 cum->nregs -= int_nregs;
7321 cum->sse_nregs -= sse_nregs;
7322 cum->regno += int_nregs;
7323 cum->sse_regno += sse_nregs;
7324 }
7325 else
7326 {
7327 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7328 cum->words = (cum->words + align - 1) & ~(align - 1);
7329 cum->words += words;
7330 }
7331 }
7332
7333 static void
7334 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7335 HOST_WIDE_INT words)
7336 {
7337 /* Otherwise, this should be passed indirect. */
7338 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7339
7340 cum->words += words;
7341 if (cum->nregs > 0)
7342 {
7343 cum->nregs -= 1;
7344 cum->regno += 1;
7345 }
7346 }
7347
7348 /* Update the data in CUM to advance over an argument of mode MODE and
7349 data type TYPE. (TYPE is null for libcalls where that information
7350 may not be available.) */
7351
7352 static void
7353 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
7354 const_tree type, bool named)
7355 {
7356 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7357 HOST_WIDE_INT bytes, words;
7358
7359 if (mode == BLKmode)
7360 bytes = int_size_in_bytes (type);
7361 else
7362 bytes = GET_MODE_SIZE (mode);
7363 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7364
7365 if (type)
7366 mode = type_natural_mode (type, NULL, false);
7367
7368 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7369 function_arg_advance_ms_64 (cum, bytes, words);
7370 else if (TARGET_64BIT)
7371 function_arg_advance_64 (cum, mode, type, words, named);
7372 else
7373 function_arg_advance_32 (cum, mode, type, bytes, words);
7374 }
7375
7376 /* Define where to put the arguments to a function.
7377 Value is zero to push the argument on the stack,
7378 or a hard register in which to store the argument.
7379
7380 MODE is the argument's machine mode.
7381 TYPE is the data type of the argument (as a tree).
7382 This is null for libcalls where that information may
7383 not be available.
7384 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7385 the preceding args and about the function being called.
7386 NAMED is nonzero if this argument is a named parameter
7387 (otherwise it is an extra parameter matching an ellipsis). */
7388
7389 static rtx
7390 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7391 enum machine_mode orig_mode, const_tree type,
7392 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7393 {
7394 /* Avoid the AL settings for the Unix64 ABI. */
7395 if (mode == VOIDmode)
7396 return constm1_rtx;
7397
7398 switch (mode)
7399 {
7400 default:
7401 break;
7402
7403 case BLKmode:
7404 if (bytes < 0)
7405 break;
7406 /* FALLTHRU */
7407 case DImode:
7408 case SImode:
7409 case HImode:
7410 case QImode:
7411 if (words <= cum->nregs)
7412 {
7413 int regno = cum->regno;
7414
7415 /* Fastcall allocates the first two DWORD (SImode) or
7416 smaller arguments to ECX and EDX if it isn't an
7417 aggregate type . */
7418 if (cum->fastcall)
7419 {
7420 if (mode == BLKmode
7421 || mode == DImode
7422 || (type && AGGREGATE_TYPE_P (type)))
7423 break;
7424
7425 /* ECX not EAX is the first allocated register. */
7426 if (regno == AX_REG)
7427 regno = CX_REG;
7428 }
7429 return gen_rtx_REG (mode, regno);
7430 }
7431 break;
7432
7433 case DFmode:
7434 if (cum->float_in_sse < 2)
7435 break;
7436 case SFmode:
7437 if (cum->float_in_sse < 1)
7438 break;
7439 /* FALLTHRU */
7440 case TImode:
7441 /* In 32bit, we pass TImode in xmm registers. */
7442 case V16QImode:
7443 case V8HImode:
7444 case V4SImode:
7445 case V2DImode:
7446 case V4SFmode:
7447 case V2DFmode:
7448 if (!type || !AGGREGATE_TYPE_P (type))
7449 {
7450 if (cum->sse_nregs)
7451 return gen_reg_or_parallel (mode, orig_mode,
7452 cum->sse_regno + FIRST_SSE_REG);
7453 }
7454 break;
7455
7456 case OImode:
7457 case XImode:
7458 /* OImode and XImode shouldn't be used directly. */
7459 gcc_unreachable ();
7460
7461 case V64QImode:
7462 case V32HImode:
7463 case V16SImode:
7464 case V8DImode:
7465 case V16SFmode:
7466 case V8DFmode:
7467 case V8SFmode:
7468 case V8SImode:
7469 case V32QImode:
7470 case V16HImode:
7471 case V4DFmode:
7472 case V4DImode:
7473 if (!type || !AGGREGATE_TYPE_P (type))
7474 {
7475 if (cum->sse_nregs)
7476 return gen_reg_or_parallel (mode, orig_mode,
7477 cum->sse_regno + FIRST_SSE_REG);
7478 }
7479 break;
7480
7481 case V8QImode:
7482 case V4HImode:
7483 case V2SImode:
7484 case V2SFmode:
7485 case V1TImode:
7486 case V1DImode:
7487 if (!type || !AGGREGATE_TYPE_P (type))
7488 {
7489 if (cum->mmx_nregs)
7490 return gen_reg_or_parallel (mode, orig_mode,
7491 cum->mmx_regno + FIRST_MMX_REG);
7492 }
7493 break;
7494 }
7495
7496 return NULL_RTX;
7497 }
7498
7499 static rtx
7500 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7501 enum machine_mode orig_mode, const_tree type, bool named)
7502 {
7503 /* Handle a hidden AL argument containing number of registers
7504 for varargs x86-64 functions. */
7505 if (mode == VOIDmode)
7506 return GEN_INT (cum->maybe_vaarg
7507 ? (cum->sse_nregs < 0
7508 ? X86_64_SSE_REGPARM_MAX
7509 : cum->sse_regno)
7510 : -1);
7511
7512 switch (mode)
7513 {
7514 default:
7515 break;
7516
7517 case V8SFmode:
7518 case V8SImode:
7519 case V32QImode:
7520 case V16HImode:
7521 case V4DFmode:
7522 case V4DImode:
7523 case V16SFmode:
7524 case V16SImode:
7525 case V64QImode:
7526 case V32HImode:
7527 case V8DFmode:
7528 case V8DImode:
7529 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7530 if (!named)
7531 return NULL;
7532 break;
7533 }
7534
7535 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7536 cum->sse_nregs,
7537 &x86_64_int_parameter_registers [cum->regno],
7538 cum->sse_regno);
7539 }
7540
7541 static rtx
7542 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7543 enum machine_mode orig_mode, bool named,
7544 HOST_WIDE_INT bytes)
7545 {
7546 unsigned int regno;
7547
7548 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7549 We use value of -2 to specify that current function call is MSABI. */
7550 if (mode == VOIDmode)
7551 return GEN_INT (-2);
7552
7553 /* If we've run out of registers, it goes on the stack. */
7554 if (cum->nregs == 0)
7555 return NULL_RTX;
7556
7557 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7558
7559 /* Only floating point modes are passed in anything but integer regs. */
7560 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7561 {
7562 if (named)
7563 regno = cum->regno + FIRST_SSE_REG;
7564 else
7565 {
7566 rtx t1, t2;
7567
7568 /* Unnamed floating parameters are passed in both the
7569 SSE and integer registers. */
7570 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7571 t2 = gen_rtx_REG (mode, regno);
7572 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7573 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7574 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7575 }
7576 }
7577 /* Handle aggregated types passed in register. */
7578 if (orig_mode == BLKmode)
7579 {
7580 if (bytes > 0 && bytes <= 8)
7581 mode = (bytes > 4 ? DImode : SImode);
7582 if (mode == BLKmode)
7583 mode = DImode;
7584 }
7585
7586 return gen_reg_or_parallel (mode, orig_mode, regno);
7587 }
7588
7589 /* Return where to put the arguments to a function.
7590 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7591
7592 MODE is the argument's machine mode. TYPE is the data type of the
7593 argument. It is null for libcalls where that information may not be
7594 available. CUM gives information about the preceding args and about
7595 the function being called. NAMED is nonzero if this argument is a
7596 named parameter (otherwise it is an extra parameter matching an
7597 ellipsis). */
7598
7599 static rtx
7600 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
7601 const_tree type, bool named)
7602 {
7603 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7604 enum machine_mode mode = omode;
7605 HOST_WIDE_INT bytes, words;
7606 rtx arg;
7607
7608 if (mode == BLKmode)
7609 bytes = int_size_in_bytes (type);
7610 else
7611 bytes = GET_MODE_SIZE (mode);
7612 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7613
7614 /* To simplify the code below, represent vector types with a vector mode
7615 even if MMX/SSE are not active. */
7616 if (type && TREE_CODE (type) == VECTOR_TYPE)
7617 mode = type_natural_mode (type, cum, false);
7618
7619 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7620 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7621 else if (TARGET_64BIT)
7622 arg = function_arg_64 (cum, mode, omode, type, named);
7623 else
7624 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7625
7626 return arg;
7627 }
7628
7629 /* A C expression that indicates when an argument must be passed by
7630 reference. If nonzero for an argument, a copy of that argument is
7631 made in memory and a pointer to the argument is passed instead of
7632 the argument itself. The pointer is passed in whatever way is
7633 appropriate for passing a pointer to that type. */
7634
7635 static bool
7636 ix86_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
7637 const_tree type, bool)
7638 {
7639 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7640
7641 /* See Windows x64 Software Convention. */
7642 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7643 {
7644 int msize = (int) GET_MODE_SIZE (mode);
7645 if (type)
7646 {
7647 /* Arrays are passed by reference. */
7648 if (TREE_CODE (type) == ARRAY_TYPE)
7649 return true;
7650
7651 if (AGGREGATE_TYPE_P (type))
7652 {
7653 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7654 are passed by reference. */
7655 msize = int_size_in_bytes (type);
7656 }
7657 }
7658
7659 /* __m128 is passed by reference. */
7660 switch (msize) {
7661 case 1: case 2: case 4: case 8:
7662 break;
7663 default:
7664 return true;
7665 }
7666 }
7667 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7668 return 1;
7669
7670 return 0;
7671 }
7672
7673 /* Return true when TYPE should be 128bit aligned for 32bit argument
7674 passing ABI. XXX: This function is obsolete and is only used for
7675 checking psABI compatibility with previous versions of GCC. */
7676
7677 static bool
7678 ix86_compat_aligned_value_p (const_tree type)
7679 {
7680 enum machine_mode mode = TYPE_MODE (type);
7681 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7682 || mode == TDmode
7683 || mode == TFmode
7684 || mode == TCmode)
7685 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7686 return true;
7687 if (TYPE_ALIGN (type) < 128)
7688 return false;
7689
7690 if (AGGREGATE_TYPE_P (type))
7691 {
7692 /* Walk the aggregates recursively. */
7693 switch (TREE_CODE (type))
7694 {
7695 case RECORD_TYPE:
7696 case UNION_TYPE:
7697 case QUAL_UNION_TYPE:
7698 {
7699 tree field;
7700
7701 /* Walk all the structure fields. */
7702 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7703 {
7704 if (TREE_CODE (field) == FIELD_DECL
7705 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7706 return true;
7707 }
7708 break;
7709 }
7710
7711 case ARRAY_TYPE:
7712 /* Just for use if some languages passes arrays by value. */
7713 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7714 return true;
7715 break;
7716
7717 default:
7718 gcc_unreachable ();
7719 }
7720 }
7721 return false;
7722 }
7723
7724 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7725 XXX: This function is obsolete and is only used for checking psABI
7726 compatibility with previous versions of GCC. */
7727
7728 static unsigned int
7729 ix86_compat_function_arg_boundary (enum machine_mode mode,
7730 const_tree type, unsigned int align)
7731 {
7732 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7733 natural boundaries. */
7734 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7735 {
7736 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7737 make an exception for SSE modes since these require 128bit
7738 alignment.
7739
7740 The handling here differs from field_alignment. ICC aligns MMX
7741 arguments to 4 byte boundaries, while structure fields are aligned
7742 to 8 byte boundaries. */
7743 if (!type)
7744 {
7745 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7746 align = PARM_BOUNDARY;
7747 }
7748 else
7749 {
7750 if (!ix86_compat_aligned_value_p (type))
7751 align = PARM_BOUNDARY;
7752 }
7753 }
7754 if (align > BIGGEST_ALIGNMENT)
7755 align = BIGGEST_ALIGNMENT;
7756 return align;
7757 }
7758
7759 /* Return true when TYPE should be 128bit aligned for 32bit argument
7760 passing ABI. */
7761
7762 static bool
7763 ix86_contains_aligned_value_p (const_tree type)
7764 {
7765 enum machine_mode mode = TYPE_MODE (type);
7766
7767 if (mode == XFmode || mode == XCmode)
7768 return false;
7769
7770 if (TYPE_ALIGN (type) < 128)
7771 return false;
7772
7773 if (AGGREGATE_TYPE_P (type))
7774 {
7775 /* Walk the aggregates recursively. */
7776 switch (TREE_CODE (type))
7777 {
7778 case RECORD_TYPE:
7779 case UNION_TYPE:
7780 case QUAL_UNION_TYPE:
7781 {
7782 tree field;
7783
7784 /* Walk all the structure fields. */
7785 for (field = TYPE_FIELDS (type);
7786 field;
7787 field = DECL_CHAIN (field))
7788 {
7789 if (TREE_CODE (field) == FIELD_DECL
7790 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7791 return true;
7792 }
7793 break;
7794 }
7795
7796 case ARRAY_TYPE:
7797 /* Just for use if some languages passes arrays by value. */
7798 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7799 return true;
7800 break;
7801
7802 default:
7803 gcc_unreachable ();
7804 }
7805 }
7806 else
7807 return TYPE_ALIGN (type) >= 128;
7808
7809 return false;
7810 }
7811
7812 /* Gives the alignment boundary, in bits, of an argument with the
7813 specified mode and type. */
7814
7815 static unsigned int
7816 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7817 {
7818 unsigned int align;
7819 if (type)
7820 {
7821 /* Since the main variant type is used for call, we convert it to
7822 the main variant type. */
7823 type = TYPE_MAIN_VARIANT (type);
7824 align = TYPE_ALIGN (type);
7825 }
7826 else
7827 align = GET_MODE_ALIGNMENT (mode);
7828 if (align < PARM_BOUNDARY)
7829 align = PARM_BOUNDARY;
7830 else
7831 {
7832 static bool warned;
7833 unsigned int saved_align = align;
7834
7835 if (!TARGET_64BIT)
7836 {
7837 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7838 if (!type)
7839 {
7840 if (mode == XFmode || mode == XCmode)
7841 align = PARM_BOUNDARY;
7842 }
7843 else if (!ix86_contains_aligned_value_p (type))
7844 align = PARM_BOUNDARY;
7845
7846 if (align < 128)
7847 align = PARM_BOUNDARY;
7848 }
7849
7850 if (warn_psabi
7851 && !warned
7852 && align != ix86_compat_function_arg_boundary (mode, type,
7853 saved_align))
7854 {
7855 warned = true;
7856 inform (input_location,
7857 "The ABI for passing parameters with %d-byte"
7858 " alignment has changed in GCC 4.6",
7859 align / BITS_PER_UNIT);
7860 }
7861 }
7862
7863 return align;
7864 }
7865
7866 /* Return true if N is a possible register number of function value. */
7867
7868 static bool
7869 ix86_function_value_regno_p (const unsigned int regno)
7870 {
7871 switch (regno)
7872 {
7873 case AX_REG:
7874 return true;
7875 case DX_REG:
7876 return (!TARGET_64BIT || ix86_abi != MS_ABI);
7877 case DI_REG:
7878 case SI_REG:
7879 return TARGET_64BIT && ix86_abi != MS_ABI;
7880
7881 /* Complex values are returned in %st(0)/%st(1) pair. */
7882 case ST0_REG:
7883 case ST1_REG:
7884 /* TODO: The function should depend on current function ABI but
7885 builtins.c would need updating then. Therefore we use the
7886 default ABI. */
7887 if (TARGET_64BIT && ix86_abi == MS_ABI)
7888 return false;
7889 return TARGET_FLOAT_RETURNS_IN_80387;
7890
7891 /* Complex values are returned in %xmm0/%xmm1 pair. */
7892 case XMM0_REG:
7893 case XMM1_REG:
7894 return TARGET_SSE;
7895
7896 case MM0_REG:
7897 if (TARGET_MACHO || TARGET_64BIT)
7898 return false;
7899 return TARGET_MMX;
7900 }
7901
7902 return false;
7903 }
7904
7905 /* Define how to find the value returned by a function.
7906 VALTYPE is the data type of the value (as a tree).
7907 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7908 otherwise, FUNC is 0. */
7909
7910 static rtx
7911 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7912 const_tree fntype, const_tree fn)
7913 {
7914 unsigned int regno;
7915
7916 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7917 we normally prevent this case when mmx is not available. However
7918 some ABIs may require the result to be returned like DImode. */
7919 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7920 regno = FIRST_MMX_REG;
7921
7922 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7923 we prevent this case when sse is not available. However some ABIs
7924 may require the result to be returned like integer TImode. */
7925 else if (mode == TImode
7926 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7927 regno = FIRST_SSE_REG;
7928
7929 /* 32-byte vector modes in %ymm0. */
7930 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7931 regno = FIRST_SSE_REG;
7932
7933 /* 64-byte vector modes in %zmm0. */
7934 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
7935 regno = FIRST_SSE_REG;
7936
7937 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7938 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7939 regno = FIRST_FLOAT_REG;
7940 else
7941 /* Most things go in %eax. */
7942 regno = AX_REG;
7943
7944 /* Override FP return register with %xmm0 for local functions when
7945 SSE math is enabled or for functions with sseregparm attribute. */
7946 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7947 {
7948 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7949 if ((sse_level >= 1 && mode == SFmode)
7950 || (sse_level == 2 && mode == DFmode))
7951 regno = FIRST_SSE_REG;
7952 }
7953
7954 /* OImode shouldn't be used directly. */
7955 gcc_assert (mode != OImode);
7956
7957 return gen_rtx_REG (orig_mode, regno);
7958 }
7959
7960 static rtx
7961 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7962 const_tree valtype)
7963 {
7964 rtx ret;
7965
7966 /* Handle libcalls, which don't provide a type node. */
7967 if (valtype == NULL)
7968 {
7969 unsigned int regno;
7970
7971 switch (mode)
7972 {
7973 case SFmode:
7974 case SCmode:
7975 case DFmode:
7976 case DCmode:
7977 case TFmode:
7978 case SDmode:
7979 case DDmode:
7980 case TDmode:
7981 regno = FIRST_SSE_REG;
7982 break;
7983 case XFmode:
7984 case XCmode:
7985 regno = FIRST_FLOAT_REG;
7986 break;
7987 case TCmode:
7988 return NULL;
7989 default:
7990 regno = AX_REG;
7991 }
7992
7993 return gen_rtx_REG (mode, regno);
7994 }
7995 else if (POINTER_TYPE_P (valtype))
7996 {
7997 /* Pointers are always returned in word_mode. */
7998 mode = word_mode;
7999 }
8000
8001 ret = construct_container (mode, orig_mode, valtype, 1,
8002 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
8003 x86_64_int_return_registers, 0);
8004
8005 /* For zero sized structures, construct_container returns NULL, but we
8006 need to keep rest of compiler happy by returning meaningful value. */
8007 if (!ret)
8008 ret = gen_rtx_REG (orig_mode, AX_REG);
8009
8010 return ret;
8011 }
8012
8013 static rtx
8014 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode,
8015 const_tree valtype)
8016 {
8017 unsigned int regno = AX_REG;
8018
8019 if (TARGET_SSE)
8020 {
8021 switch (GET_MODE_SIZE (mode))
8022 {
8023 case 16:
8024 if (valtype != NULL_TREE
8025 && !VECTOR_INTEGER_TYPE_P (valtype)
8026 && !VECTOR_INTEGER_TYPE_P (valtype)
8027 && !INTEGRAL_TYPE_P (valtype)
8028 && !VECTOR_FLOAT_TYPE_P (valtype))
8029 break;
8030 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8031 && !COMPLEX_MODE_P (mode))
8032 regno = FIRST_SSE_REG;
8033 break;
8034 case 8:
8035 case 4:
8036 if (mode == SFmode || mode == DFmode)
8037 regno = FIRST_SSE_REG;
8038 break;
8039 default:
8040 break;
8041 }
8042 }
8043 return gen_rtx_REG (orig_mode, regno);
8044 }
8045
8046 static rtx
8047 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
8048 enum machine_mode orig_mode, enum machine_mode mode)
8049 {
8050 const_tree fn, fntype;
8051
8052 fn = NULL_TREE;
8053 if (fntype_or_decl && DECL_P (fntype_or_decl))
8054 fn = fntype_or_decl;
8055 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
8056
8057 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
8058 return function_value_ms_64 (orig_mode, mode, valtype);
8059 else if (TARGET_64BIT)
8060 return function_value_64 (orig_mode, mode, valtype);
8061 else
8062 return function_value_32 (orig_mode, mode, fntype, fn);
8063 }
8064
8065 static rtx
8066 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
8067 {
8068 enum machine_mode mode, orig_mode;
8069
8070 orig_mode = TYPE_MODE (valtype);
8071 mode = type_natural_mode (valtype, NULL, true);
8072 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
8073 }
8074
8075 /* Pointer function arguments and return values are promoted to
8076 word_mode. */
8077
8078 static enum machine_mode
8079 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
8080 int *punsignedp, const_tree fntype,
8081 int for_return)
8082 {
8083 if (type != NULL_TREE && POINTER_TYPE_P (type))
8084 {
8085 *punsignedp = POINTERS_EXTEND_UNSIGNED;
8086 return word_mode;
8087 }
8088 return default_promote_function_mode (type, mode, punsignedp, fntype,
8089 for_return);
8090 }
8091
8092 /* Return true if a structure, union or array with MODE containing FIELD
8093 should be accessed using BLKmode. */
8094
8095 static bool
8096 ix86_member_type_forces_blk (const_tree field, enum machine_mode mode)
8097 {
8098 /* Union with XFmode must be in BLKmode. */
8099 return (mode == XFmode
8100 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8101 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8102 }
8103
8104 rtx
8105 ix86_libcall_value (enum machine_mode mode)
8106 {
8107 return ix86_function_value_1 (NULL, NULL, mode, mode);
8108 }
8109
8110 /* Return true iff type is returned in memory. */
8111
8112 static bool
8113 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8114 {
8115 #ifdef SUBTARGET_RETURN_IN_MEMORY
8116 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8117 #else
8118 const enum machine_mode mode = type_natural_mode (type, NULL, true);
8119 HOST_WIDE_INT size;
8120
8121 if (TARGET_64BIT)
8122 {
8123 if (ix86_function_type_abi (fntype) == MS_ABI)
8124 {
8125 size = int_size_in_bytes (type);
8126
8127 /* __m128 is returned in xmm0. */
8128 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8129 || INTEGRAL_TYPE_P (type)
8130 || VECTOR_FLOAT_TYPE_P (type))
8131 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8132 && !COMPLEX_MODE_P (mode)
8133 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8134 return false;
8135
8136 /* Otherwise, the size must be exactly in [1248]. */
8137 return size != 1 && size != 2 && size != 4 && size != 8;
8138 }
8139 else
8140 {
8141 int needed_intregs, needed_sseregs;
8142
8143 return examine_argument (mode, type, 1,
8144 &needed_intregs, &needed_sseregs);
8145 }
8146 }
8147 else
8148 {
8149 if (mode == BLKmode)
8150 return true;
8151
8152 size = int_size_in_bytes (type);
8153
8154 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8155 return false;
8156
8157 if (VECTOR_MODE_P (mode) || mode == TImode)
8158 {
8159 /* User-created vectors small enough to fit in EAX. */
8160 if (size < 8)
8161 return false;
8162
8163 /* Unless ABI prescibes otherwise,
8164 MMX/3dNow values are returned in MM0 if available. */
8165
8166 if (size == 8)
8167 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8168
8169 /* SSE values are returned in XMM0 if available. */
8170 if (size == 16)
8171 return !TARGET_SSE;
8172
8173 /* AVX values are returned in YMM0 if available. */
8174 if (size == 32)
8175 return !TARGET_AVX;
8176
8177 /* AVX512F values are returned in ZMM0 if available. */
8178 if (size == 64)
8179 return !TARGET_AVX512F;
8180 }
8181
8182 if (mode == XFmode)
8183 return false;
8184
8185 if (size > 12)
8186 return true;
8187
8188 /* OImode shouldn't be used directly. */
8189 gcc_assert (mode != OImode);
8190
8191 return false;
8192 }
8193 #endif
8194 }
8195
8196 \f
8197 /* Create the va_list data type. */
8198
8199 /* Returns the calling convention specific va_list date type.
8200 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8201
8202 static tree
8203 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8204 {
8205 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8206
8207 /* For i386 we use plain pointer to argument area. */
8208 if (!TARGET_64BIT || abi == MS_ABI)
8209 return build_pointer_type (char_type_node);
8210
8211 record = lang_hooks.types.make_type (RECORD_TYPE);
8212 type_decl = build_decl (BUILTINS_LOCATION,
8213 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8214
8215 f_gpr = build_decl (BUILTINS_LOCATION,
8216 FIELD_DECL, get_identifier ("gp_offset"),
8217 unsigned_type_node);
8218 f_fpr = build_decl (BUILTINS_LOCATION,
8219 FIELD_DECL, get_identifier ("fp_offset"),
8220 unsigned_type_node);
8221 f_ovf = build_decl (BUILTINS_LOCATION,
8222 FIELD_DECL, get_identifier ("overflow_arg_area"),
8223 ptr_type_node);
8224 f_sav = build_decl (BUILTINS_LOCATION,
8225 FIELD_DECL, get_identifier ("reg_save_area"),
8226 ptr_type_node);
8227
8228 va_list_gpr_counter_field = f_gpr;
8229 va_list_fpr_counter_field = f_fpr;
8230
8231 DECL_FIELD_CONTEXT (f_gpr) = record;
8232 DECL_FIELD_CONTEXT (f_fpr) = record;
8233 DECL_FIELD_CONTEXT (f_ovf) = record;
8234 DECL_FIELD_CONTEXT (f_sav) = record;
8235
8236 TYPE_STUB_DECL (record) = type_decl;
8237 TYPE_NAME (record) = type_decl;
8238 TYPE_FIELDS (record) = f_gpr;
8239 DECL_CHAIN (f_gpr) = f_fpr;
8240 DECL_CHAIN (f_fpr) = f_ovf;
8241 DECL_CHAIN (f_ovf) = f_sav;
8242
8243 layout_type (record);
8244
8245 /* The correct type is an array type of one element. */
8246 return build_array_type (record, build_index_type (size_zero_node));
8247 }
8248
8249 /* Setup the builtin va_list data type and for 64-bit the additional
8250 calling convention specific va_list data types. */
8251
8252 static tree
8253 ix86_build_builtin_va_list (void)
8254 {
8255 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8256
8257 /* Initialize abi specific va_list builtin types. */
8258 if (TARGET_64BIT)
8259 {
8260 tree t;
8261 if (ix86_abi == MS_ABI)
8262 {
8263 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8264 if (TREE_CODE (t) != RECORD_TYPE)
8265 t = build_variant_type_copy (t);
8266 sysv_va_list_type_node = t;
8267 }
8268 else
8269 {
8270 t = ret;
8271 if (TREE_CODE (t) != RECORD_TYPE)
8272 t = build_variant_type_copy (t);
8273 sysv_va_list_type_node = t;
8274 }
8275 if (ix86_abi != MS_ABI)
8276 {
8277 t = ix86_build_builtin_va_list_abi (MS_ABI);
8278 if (TREE_CODE (t) != RECORD_TYPE)
8279 t = build_variant_type_copy (t);
8280 ms_va_list_type_node = t;
8281 }
8282 else
8283 {
8284 t = ret;
8285 if (TREE_CODE (t) != RECORD_TYPE)
8286 t = build_variant_type_copy (t);
8287 ms_va_list_type_node = t;
8288 }
8289 }
8290
8291 return ret;
8292 }
8293
8294 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8295
8296 static void
8297 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8298 {
8299 rtx save_area, mem;
8300 alias_set_type set;
8301 int i, max;
8302
8303 /* GPR size of varargs save area. */
8304 if (cfun->va_list_gpr_size)
8305 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8306 else
8307 ix86_varargs_gpr_size = 0;
8308
8309 /* FPR size of varargs save area. We don't need it if we don't pass
8310 anything in SSE registers. */
8311 if (TARGET_SSE && cfun->va_list_fpr_size)
8312 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8313 else
8314 ix86_varargs_fpr_size = 0;
8315
8316 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8317 return;
8318
8319 save_area = frame_pointer_rtx;
8320 set = get_varargs_alias_set ();
8321
8322 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8323 if (max > X86_64_REGPARM_MAX)
8324 max = X86_64_REGPARM_MAX;
8325
8326 for (i = cum->regno; i < max; i++)
8327 {
8328 mem = gen_rtx_MEM (word_mode,
8329 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8330 MEM_NOTRAP_P (mem) = 1;
8331 set_mem_alias_set (mem, set);
8332 emit_move_insn (mem,
8333 gen_rtx_REG (word_mode,
8334 x86_64_int_parameter_registers[i]));
8335 }
8336
8337 if (ix86_varargs_fpr_size)
8338 {
8339 enum machine_mode smode;
8340 rtx_code_label *label;
8341 rtx test;
8342
8343 /* Now emit code to save SSE registers. The AX parameter contains number
8344 of SSE parameter registers used to call this function, though all we
8345 actually check here is the zero/non-zero status. */
8346
8347 label = gen_label_rtx ();
8348 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8349 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8350 label));
8351
8352 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8353 we used movdqa (i.e. TImode) instead? Perhaps even better would
8354 be if we could determine the real mode of the data, via a hook
8355 into pass_stdarg. Ignore all that for now. */
8356 smode = V4SFmode;
8357 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8358 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8359
8360 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8361 if (max > X86_64_SSE_REGPARM_MAX)
8362 max = X86_64_SSE_REGPARM_MAX;
8363
8364 for (i = cum->sse_regno; i < max; ++i)
8365 {
8366 mem = plus_constant (Pmode, save_area,
8367 i * 16 + ix86_varargs_gpr_size);
8368 mem = gen_rtx_MEM (smode, mem);
8369 MEM_NOTRAP_P (mem) = 1;
8370 set_mem_alias_set (mem, set);
8371 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8372
8373 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8374 }
8375
8376 emit_label (label);
8377 }
8378 }
8379
8380 static void
8381 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8382 {
8383 alias_set_type set = get_varargs_alias_set ();
8384 int i;
8385
8386 /* Reset to zero, as there might be a sysv vaarg used
8387 before. */
8388 ix86_varargs_gpr_size = 0;
8389 ix86_varargs_fpr_size = 0;
8390
8391 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8392 {
8393 rtx reg, mem;
8394
8395 mem = gen_rtx_MEM (Pmode,
8396 plus_constant (Pmode, virtual_incoming_args_rtx,
8397 i * UNITS_PER_WORD));
8398 MEM_NOTRAP_P (mem) = 1;
8399 set_mem_alias_set (mem, set);
8400
8401 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8402 emit_move_insn (mem, reg);
8403 }
8404 }
8405
8406 static void
8407 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
8408 tree type, int *, int no_rtl)
8409 {
8410 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8411 CUMULATIVE_ARGS next_cum;
8412 tree fntype;
8413
8414 /* This argument doesn't appear to be used anymore. Which is good,
8415 because the old code here didn't suppress rtl generation. */
8416 gcc_assert (!no_rtl);
8417
8418 if (!TARGET_64BIT)
8419 return;
8420
8421 fntype = TREE_TYPE (current_function_decl);
8422
8423 /* For varargs, we do not want to skip the dummy va_dcl argument.
8424 For stdargs, we do want to skip the last named argument. */
8425 next_cum = *cum;
8426 if (stdarg_p (fntype))
8427 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8428 true);
8429
8430 if (cum->call_abi == MS_ABI)
8431 setup_incoming_varargs_ms_64 (&next_cum);
8432 else
8433 setup_incoming_varargs_64 (&next_cum);
8434 }
8435
8436 /* Checks if TYPE is of kind va_list char *. */
8437
8438 static bool
8439 is_va_list_char_pointer (tree type)
8440 {
8441 tree canonic;
8442
8443 /* For 32-bit it is always true. */
8444 if (!TARGET_64BIT)
8445 return true;
8446 canonic = ix86_canonical_va_list_type (type);
8447 return (canonic == ms_va_list_type_node
8448 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8449 }
8450
8451 /* Implement va_start. */
8452
8453 static void
8454 ix86_va_start (tree valist, rtx nextarg)
8455 {
8456 HOST_WIDE_INT words, n_gpr, n_fpr;
8457 tree f_gpr, f_fpr, f_ovf, f_sav;
8458 tree gpr, fpr, ovf, sav, t;
8459 tree type;
8460 rtx ovf_rtx;
8461
8462 if (flag_split_stack
8463 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8464 {
8465 unsigned int scratch_regno;
8466
8467 /* When we are splitting the stack, we can't refer to the stack
8468 arguments using internal_arg_pointer, because they may be on
8469 the old stack. The split stack prologue will arrange to
8470 leave a pointer to the old stack arguments in a scratch
8471 register, which we here copy to a pseudo-register. The split
8472 stack prologue can't set the pseudo-register directly because
8473 it (the prologue) runs before any registers have been saved. */
8474
8475 scratch_regno = split_stack_prologue_scratch_regno ();
8476 if (scratch_regno != INVALID_REGNUM)
8477 {
8478 rtx reg;
8479 rtx_insn *seq;
8480
8481 reg = gen_reg_rtx (Pmode);
8482 cfun->machine->split_stack_varargs_pointer = reg;
8483
8484 start_sequence ();
8485 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8486 seq = get_insns ();
8487 end_sequence ();
8488
8489 push_topmost_sequence ();
8490 emit_insn_after (seq, entry_of_function ());
8491 pop_topmost_sequence ();
8492 }
8493 }
8494
8495 /* Only 64bit target needs something special. */
8496 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8497 {
8498 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8499 std_expand_builtin_va_start (valist, nextarg);
8500 else
8501 {
8502 rtx va_r, next;
8503
8504 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8505 next = expand_binop (ptr_mode, add_optab,
8506 cfun->machine->split_stack_varargs_pointer,
8507 crtl->args.arg_offset_rtx,
8508 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8509 convert_move (va_r, next, 0);
8510 }
8511 return;
8512 }
8513
8514 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8515 f_fpr = DECL_CHAIN (f_gpr);
8516 f_ovf = DECL_CHAIN (f_fpr);
8517 f_sav = DECL_CHAIN (f_ovf);
8518
8519 valist = build_simple_mem_ref (valist);
8520 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8521 /* The following should be folded into the MEM_REF offset. */
8522 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8523 f_gpr, NULL_TREE);
8524 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8525 f_fpr, NULL_TREE);
8526 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8527 f_ovf, NULL_TREE);
8528 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8529 f_sav, NULL_TREE);
8530
8531 /* Count number of gp and fp argument registers used. */
8532 words = crtl->args.info.words;
8533 n_gpr = crtl->args.info.regno;
8534 n_fpr = crtl->args.info.sse_regno;
8535
8536 if (cfun->va_list_gpr_size)
8537 {
8538 type = TREE_TYPE (gpr);
8539 t = build2 (MODIFY_EXPR, type,
8540 gpr, build_int_cst (type, n_gpr * 8));
8541 TREE_SIDE_EFFECTS (t) = 1;
8542 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8543 }
8544
8545 if (TARGET_SSE && cfun->va_list_fpr_size)
8546 {
8547 type = TREE_TYPE (fpr);
8548 t = build2 (MODIFY_EXPR, type, fpr,
8549 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8550 TREE_SIDE_EFFECTS (t) = 1;
8551 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8552 }
8553
8554 /* Find the overflow area. */
8555 type = TREE_TYPE (ovf);
8556 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8557 ovf_rtx = crtl->args.internal_arg_pointer;
8558 else
8559 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8560 t = make_tree (type, ovf_rtx);
8561 if (words != 0)
8562 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8563 t = build2 (MODIFY_EXPR, type, ovf, t);
8564 TREE_SIDE_EFFECTS (t) = 1;
8565 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8566
8567 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8568 {
8569 /* Find the register save area.
8570 Prologue of the function save it right above stack frame. */
8571 type = TREE_TYPE (sav);
8572 t = make_tree (type, frame_pointer_rtx);
8573 if (!ix86_varargs_gpr_size)
8574 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8575 t = build2 (MODIFY_EXPR, type, sav, t);
8576 TREE_SIDE_EFFECTS (t) = 1;
8577 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8578 }
8579 }
8580
8581 /* Implement va_arg. */
8582
8583 static tree
8584 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8585 gimple_seq *post_p)
8586 {
8587 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8588 tree f_gpr, f_fpr, f_ovf, f_sav;
8589 tree gpr, fpr, ovf, sav, t;
8590 int size, rsize;
8591 tree lab_false, lab_over = NULL_TREE;
8592 tree addr, t2;
8593 rtx container;
8594 int indirect_p = 0;
8595 tree ptrtype;
8596 enum machine_mode nat_mode;
8597 unsigned int arg_boundary;
8598
8599 /* Only 64bit target needs something special. */
8600 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8601 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8602
8603 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8604 f_fpr = DECL_CHAIN (f_gpr);
8605 f_ovf = DECL_CHAIN (f_fpr);
8606 f_sav = DECL_CHAIN (f_ovf);
8607
8608 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8609 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8610 valist = build_va_arg_indirect_ref (valist);
8611 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8612 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8613 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8614
8615 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8616 if (indirect_p)
8617 type = build_pointer_type (type);
8618 size = int_size_in_bytes (type);
8619 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8620
8621 nat_mode = type_natural_mode (type, NULL, false);
8622 switch (nat_mode)
8623 {
8624 case V8SFmode:
8625 case V8SImode:
8626 case V32QImode:
8627 case V16HImode:
8628 case V4DFmode:
8629 case V4DImode:
8630 case V16SFmode:
8631 case V16SImode:
8632 case V64QImode:
8633 case V32HImode:
8634 case V8DFmode:
8635 case V8DImode:
8636 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8637 if (!TARGET_64BIT_MS_ABI)
8638 {
8639 container = NULL;
8640 break;
8641 }
8642
8643 default:
8644 container = construct_container (nat_mode, TYPE_MODE (type),
8645 type, 0, X86_64_REGPARM_MAX,
8646 X86_64_SSE_REGPARM_MAX, intreg,
8647 0);
8648 break;
8649 }
8650
8651 /* Pull the value out of the saved registers. */
8652
8653 addr = create_tmp_var (ptr_type_node, "addr");
8654
8655 if (container)
8656 {
8657 int needed_intregs, needed_sseregs;
8658 bool need_temp;
8659 tree int_addr, sse_addr;
8660
8661 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8662 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8663
8664 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8665
8666 need_temp = (!REG_P (container)
8667 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8668 || TYPE_ALIGN (type) > 128));
8669
8670 /* In case we are passing structure, verify that it is consecutive block
8671 on the register save area. If not we need to do moves. */
8672 if (!need_temp && !REG_P (container))
8673 {
8674 /* Verify that all registers are strictly consecutive */
8675 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8676 {
8677 int i;
8678
8679 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8680 {
8681 rtx slot = XVECEXP (container, 0, i);
8682 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8683 || INTVAL (XEXP (slot, 1)) != i * 16)
8684 need_temp = 1;
8685 }
8686 }
8687 else
8688 {
8689 int i;
8690
8691 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8692 {
8693 rtx slot = XVECEXP (container, 0, i);
8694 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8695 || INTVAL (XEXP (slot, 1)) != i * 8)
8696 need_temp = 1;
8697 }
8698 }
8699 }
8700 if (!need_temp)
8701 {
8702 int_addr = addr;
8703 sse_addr = addr;
8704 }
8705 else
8706 {
8707 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8708 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8709 }
8710
8711 /* First ensure that we fit completely in registers. */
8712 if (needed_intregs)
8713 {
8714 t = build_int_cst (TREE_TYPE (gpr),
8715 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8716 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8717 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8718 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8719 gimplify_and_add (t, pre_p);
8720 }
8721 if (needed_sseregs)
8722 {
8723 t = build_int_cst (TREE_TYPE (fpr),
8724 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8725 + X86_64_REGPARM_MAX * 8);
8726 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8727 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8728 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8729 gimplify_and_add (t, pre_p);
8730 }
8731
8732 /* Compute index to start of area used for integer regs. */
8733 if (needed_intregs)
8734 {
8735 /* int_addr = gpr + sav; */
8736 t = fold_build_pointer_plus (sav, gpr);
8737 gimplify_assign (int_addr, t, pre_p);
8738 }
8739 if (needed_sseregs)
8740 {
8741 /* sse_addr = fpr + sav; */
8742 t = fold_build_pointer_plus (sav, fpr);
8743 gimplify_assign (sse_addr, t, pre_p);
8744 }
8745 if (need_temp)
8746 {
8747 int i, prev_size = 0;
8748 tree temp = create_tmp_var (type, "va_arg_tmp");
8749
8750 /* addr = &temp; */
8751 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8752 gimplify_assign (addr, t, pre_p);
8753
8754 for (i = 0; i < XVECLEN (container, 0); i++)
8755 {
8756 rtx slot = XVECEXP (container, 0, i);
8757 rtx reg = XEXP (slot, 0);
8758 enum machine_mode mode = GET_MODE (reg);
8759 tree piece_type;
8760 tree addr_type;
8761 tree daddr_type;
8762 tree src_addr, src;
8763 int src_offset;
8764 tree dest_addr, dest;
8765 int cur_size = GET_MODE_SIZE (mode);
8766
8767 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8768 prev_size = INTVAL (XEXP (slot, 1));
8769 if (prev_size + cur_size > size)
8770 {
8771 cur_size = size - prev_size;
8772 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8773 if (mode == BLKmode)
8774 mode = QImode;
8775 }
8776 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8777 if (mode == GET_MODE (reg))
8778 addr_type = build_pointer_type (piece_type);
8779 else
8780 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8781 true);
8782 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8783 true);
8784
8785 if (SSE_REGNO_P (REGNO (reg)))
8786 {
8787 src_addr = sse_addr;
8788 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8789 }
8790 else
8791 {
8792 src_addr = int_addr;
8793 src_offset = REGNO (reg) * 8;
8794 }
8795 src_addr = fold_convert (addr_type, src_addr);
8796 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
8797
8798 dest_addr = fold_convert (daddr_type, addr);
8799 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
8800 if (cur_size == GET_MODE_SIZE (mode))
8801 {
8802 src = build_va_arg_indirect_ref (src_addr);
8803 dest = build_va_arg_indirect_ref (dest_addr);
8804
8805 gimplify_assign (dest, src, pre_p);
8806 }
8807 else
8808 {
8809 tree copy
8810 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8811 3, dest_addr, src_addr,
8812 size_int (cur_size));
8813 gimplify_and_add (copy, pre_p);
8814 }
8815 prev_size += cur_size;
8816 }
8817 }
8818
8819 if (needed_intregs)
8820 {
8821 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8822 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8823 gimplify_assign (gpr, t, pre_p);
8824 }
8825
8826 if (needed_sseregs)
8827 {
8828 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8829 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8830 gimplify_assign (fpr, t, pre_p);
8831 }
8832
8833 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8834
8835 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8836 }
8837
8838 /* ... otherwise out of the overflow area. */
8839
8840 /* When we align parameter on stack for caller, if the parameter
8841 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8842 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8843 here with caller. */
8844 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8845 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8846 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8847
8848 /* Care for on-stack alignment if needed. */
8849 if (arg_boundary <= 64 || size == 0)
8850 t = ovf;
8851 else
8852 {
8853 HOST_WIDE_INT align = arg_boundary / 8;
8854 t = fold_build_pointer_plus_hwi (ovf, align - 1);
8855 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8856 build_int_cst (TREE_TYPE (t), -align));
8857 }
8858
8859 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8860 gimplify_assign (addr, t, pre_p);
8861
8862 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
8863 gimplify_assign (unshare_expr (ovf), t, pre_p);
8864
8865 if (container)
8866 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8867
8868 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8869 addr = fold_convert (ptrtype, addr);
8870
8871 if (indirect_p)
8872 addr = build_va_arg_indirect_ref (addr);
8873 return build_va_arg_indirect_ref (addr);
8874 }
8875 \f
8876 /* Return true if OPNUM's MEM should be matched
8877 in movabs* patterns. */
8878
8879 bool
8880 ix86_check_movabs (rtx insn, int opnum)
8881 {
8882 rtx set, mem;
8883
8884 set = PATTERN (insn);
8885 if (GET_CODE (set) == PARALLEL)
8886 set = XVECEXP (set, 0, 0);
8887 gcc_assert (GET_CODE (set) == SET);
8888 mem = XEXP (set, opnum);
8889 while (GET_CODE (mem) == SUBREG)
8890 mem = SUBREG_REG (mem);
8891 gcc_assert (MEM_P (mem));
8892 return volatile_ok || !MEM_VOLATILE_P (mem);
8893 }
8894 \f
8895 /* Initialize the table of extra 80387 mathematical constants. */
8896
8897 static void
8898 init_ext_80387_constants (void)
8899 {
8900 static const char * cst[5] =
8901 {
8902 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8903 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8904 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8905 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8906 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8907 };
8908 int i;
8909
8910 for (i = 0; i < 5; i++)
8911 {
8912 real_from_string (&ext_80387_constants_table[i], cst[i]);
8913 /* Ensure each constant is rounded to XFmode precision. */
8914 real_convert (&ext_80387_constants_table[i],
8915 XFmode, &ext_80387_constants_table[i]);
8916 }
8917
8918 ext_80387_constants_init = 1;
8919 }
8920
8921 /* Return non-zero if the constant is something that
8922 can be loaded with a special instruction. */
8923
8924 int
8925 standard_80387_constant_p (rtx x)
8926 {
8927 enum machine_mode mode = GET_MODE (x);
8928
8929 REAL_VALUE_TYPE r;
8930
8931 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8932 return -1;
8933
8934 if (x == CONST0_RTX (mode))
8935 return 1;
8936 if (x == CONST1_RTX (mode))
8937 return 2;
8938
8939 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8940
8941 /* For XFmode constants, try to find a special 80387 instruction when
8942 optimizing for size or on those CPUs that benefit from them. */
8943 if (mode == XFmode
8944 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8945 {
8946 int i;
8947
8948 if (! ext_80387_constants_init)
8949 init_ext_80387_constants ();
8950
8951 for (i = 0; i < 5; i++)
8952 if (real_identical (&r, &ext_80387_constants_table[i]))
8953 return i + 3;
8954 }
8955
8956 /* Load of the constant -0.0 or -1.0 will be split as
8957 fldz;fchs or fld1;fchs sequence. */
8958 if (real_isnegzero (&r))
8959 return 8;
8960 if (real_identical (&r, &dconstm1))
8961 return 9;
8962
8963 return 0;
8964 }
8965
8966 /* Return the opcode of the special instruction to be used to load
8967 the constant X. */
8968
8969 const char *
8970 standard_80387_constant_opcode (rtx x)
8971 {
8972 switch (standard_80387_constant_p (x))
8973 {
8974 case 1:
8975 return "fldz";
8976 case 2:
8977 return "fld1";
8978 case 3:
8979 return "fldlg2";
8980 case 4:
8981 return "fldln2";
8982 case 5:
8983 return "fldl2e";
8984 case 6:
8985 return "fldl2t";
8986 case 7:
8987 return "fldpi";
8988 case 8:
8989 case 9:
8990 return "#";
8991 default:
8992 gcc_unreachable ();
8993 }
8994 }
8995
8996 /* Return the CONST_DOUBLE representing the 80387 constant that is
8997 loaded by the specified special instruction. The argument IDX
8998 matches the return value from standard_80387_constant_p. */
8999
9000 rtx
9001 standard_80387_constant_rtx (int idx)
9002 {
9003 int i;
9004
9005 if (! ext_80387_constants_init)
9006 init_ext_80387_constants ();
9007
9008 switch (idx)
9009 {
9010 case 3:
9011 case 4:
9012 case 5:
9013 case 6:
9014 case 7:
9015 i = idx - 3;
9016 break;
9017
9018 default:
9019 gcc_unreachable ();
9020 }
9021
9022 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
9023 XFmode);
9024 }
9025
9026 /* Return 1 if X is all 0s and 2 if x is all 1s
9027 in supported SSE/AVX vector mode. */
9028
9029 int
9030 standard_sse_constant_p (rtx x)
9031 {
9032 enum machine_mode mode = GET_MODE (x);
9033
9034 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
9035 return 1;
9036 if (vector_all_ones_operand (x, mode))
9037 switch (mode)
9038 {
9039 case V16QImode:
9040 case V8HImode:
9041 case V4SImode:
9042 case V2DImode:
9043 if (TARGET_SSE2)
9044 return 2;
9045 case V32QImode:
9046 case V16HImode:
9047 case V8SImode:
9048 case V4DImode:
9049 if (TARGET_AVX2)
9050 return 2;
9051 case V64QImode:
9052 case V32HImode:
9053 case V16SImode:
9054 case V8DImode:
9055 if (TARGET_AVX512F)
9056 return 2;
9057 default:
9058 break;
9059 }
9060
9061 return 0;
9062 }
9063
9064 /* Return the opcode of the special instruction to be used to load
9065 the constant X. */
9066
9067 const char *
9068 standard_sse_constant_opcode (rtx_insn *insn, rtx x)
9069 {
9070 switch (standard_sse_constant_p (x))
9071 {
9072 case 1:
9073 switch (get_attr_mode (insn))
9074 {
9075 case MODE_XI:
9076 return "vpxord\t%g0, %g0, %g0";
9077 case MODE_V16SF:
9078 return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
9079 : "vpxord\t%g0, %g0, %g0";
9080 case MODE_V8DF:
9081 return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
9082 : "vpxorq\t%g0, %g0, %g0";
9083 case MODE_TI:
9084 return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
9085 : "%vpxor\t%0, %d0";
9086 case MODE_V2DF:
9087 return "%vxorpd\t%0, %d0";
9088 case MODE_V4SF:
9089 return "%vxorps\t%0, %d0";
9090
9091 case MODE_OI:
9092 return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
9093 : "vpxor\t%x0, %x0, %x0";
9094 case MODE_V4DF:
9095 return "vxorpd\t%x0, %x0, %x0";
9096 case MODE_V8SF:
9097 return "vxorps\t%x0, %x0, %x0";
9098
9099 default:
9100 break;
9101 }
9102
9103 case 2:
9104 if (TARGET_AVX512VL
9105 || get_attr_mode (insn) == MODE_XI
9106 || get_attr_mode (insn) == MODE_V8DF
9107 || get_attr_mode (insn) == MODE_V16SF)
9108 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9109 if (TARGET_AVX)
9110 return "vpcmpeqd\t%0, %0, %0";
9111 else
9112 return "pcmpeqd\t%0, %0";
9113
9114 default:
9115 break;
9116 }
9117 gcc_unreachable ();
9118 }
9119
9120 /* Returns true if OP contains a symbol reference */
9121
9122 bool
9123 symbolic_reference_mentioned_p (rtx op)
9124 {
9125 const char *fmt;
9126 int i;
9127
9128 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9129 return true;
9130
9131 fmt = GET_RTX_FORMAT (GET_CODE (op));
9132 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9133 {
9134 if (fmt[i] == 'E')
9135 {
9136 int j;
9137
9138 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9139 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9140 return true;
9141 }
9142
9143 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9144 return true;
9145 }
9146
9147 return false;
9148 }
9149
9150 /* Return true if it is appropriate to emit `ret' instructions in the
9151 body of a function. Do this only if the epilogue is simple, needing a
9152 couple of insns. Prior to reloading, we can't tell how many registers
9153 must be saved, so return false then. Return false if there is no frame
9154 marker to de-allocate. */
9155
9156 bool
9157 ix86_can_use_return_insn_p (void)
9158 {
9159 struct ix86_frame frame;
9160
9161 if (! reload_completed || frame_pointer_needed)
9162 return 0;
9163
9164 /* Don't allow more than 32k pop, since that's all we can do
9165 with one instruction. */
9166 if (crtl->args.pops_args && crtl->args.size >= 32768)
9167 return 0;
9168
9169 ix86_compute_frame_layout (&frame);
9170 return (frame.stack_pointer_offset == UNITS_PER_WORD
9171 && (frame.nregs + frame.nsseregs) == 0);
9172 }
9173 \f
9174 /* Value should be nonzero if functions must have frame pointers.
9175 Zero means the frame pointer need not be set up (and parms may
9176 be accessed via the stack pointer) in functions that seem suitable. */
9177
9178 static bool
9179 ix86_frame_pointer_required (void)
9180 {
9181 /* If we accessed previous frames, then the generated code expects
9182 to be able to access the saved ebp value in our frame. */
9183 if (cfun->machine->accesses_prev_frame)
9184 return true;
9185
9186 /* Several x86 os'es need a frame pointer for other reasons,
9187 usually pertaining to setjmp. */
9188 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9189 return true;
9190
9191 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9192 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9193 return true;
9194
9195 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9196 allocation is 4GB. */
9197 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9198 return true;
9199
9200 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9201 turns off the frame pointer by default. Turn it back on now if
9202 we've not got a leaf function. */
9203 if (TARGET_OMIT_LEAF_FRAME_POINTER
9204 && (!crtl->is_leaf
9205 || ix86_current_function_calls_tls_descriptor))
9206 return true;
9207
9208 if (crtl->profile && !flag_fentry)
9209 return true;
9210
9211 return false;
9212 }
9213
9214 /* Record that the current function accesses previous call frames. */
9215
9216 void
9217 ix86_setup_frame_addresses (void)
9218 {
9219 cfun->machine->accesses_prev_frame = 1;
9220 }
9221 \f
9222 #ifndef USE_HIDDEN_LINKONCE
9223 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9224 # define USE_HIDDEN_LINKONCE 1
9225 # else
9226 # define USE_HIDDEN_LINKONCE 0
9227 # endif
9228 #endif
9229
9230 static int pic_labels_used;
9231
9232 /* Fills in the label name that should be used for a pc thunk for
9233 the given register. */
9234
9235 static void
9236 get_pc_thunk_name (char name[32], unsigned int regno)
9237 {
9238 gcc_assert (!TARGET_64BIT);
9239
9240 if (USE_HIDDEN_LINKONCE)
9241 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9242 else
9243 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9244 }
9245
9246
9247 /* This function generates code for -fpic that loads %ebx with
9248 the return address of the caller and then returns. */
9249
9250 static void
9251 ix86_code_end (void)
9252 {
9253 rtx xops[2];
9254 int regno;
9255
9256 for (regno = AX_REG; regno <= SP_REG; regno++)
9257 {
9258 char name[32];
9259 tree decl;
9260
9261 if (!(pic_labels_used & (1 << regno)))
9262 continue;
9263
9264 get_pc_thunk_name (name, regno);
9265
9266 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9267 get_identifier (name),
9268 build_function_type_list (void_type_node, NULL_TREE));
9269 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9270 NULL_TREE, void_type_node);
9271 TREE_PUBLIC (decl) = 1;
9272 TREE_STATIC (decl) = 1;
9273 DECL_IGNORED_P (decl) = 1;
9274
9275 #if TARGET_MACHO
9276 if (TARGET_MACHO)
9277 {
9278 switch_to_section (darwin_sections[text_coal_section]);
9279 fputs ("\t.weak_definition\t", asm_out_file);
9280 assemble_name (asm_out_file, name);
9281 fputs ("\n\t.private_extern\t", asm_out_file);
9282 assemble_name (asm_out_file, name);
9283 putc ('\n', asm_out_file);
9284 ASM_OUTPUT_LABEL (asm_out_file, name);
9285 DECL_WEAK (decl) = 1;
9286 }
9287 else
9288 #endif
9289 if (USE_HIDDEN_LINKONCE)
9290 {
9291 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
9292
9293 targetm.asm_out.unique_section (decl, 0);
9294 switch_to_section (get_named_section (decl, NULL, 0));
9295
9296 targetm.asm_out.globalize_label (asm_out_file, name);
9297 fputs ("\t.hidden\t", asm_out_file);
9298 assemble_name (asm_out_file, name);
9299 putc ('\n', asm_out_file);
9300 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9301 }
9302 else
9303 {
9304 switch_to_section (text_section);
9305 ASM_OUTPUT_LABEL (asm_out_file, name);
9306 }
9307
9308 DECL_INITIAL (decl) = make_node (BLOCK);
9309 current_function_decl = decl;
9310 init_function_start (decl);
9311 first_function_block_is_cold = false;
9312 /* Make sure unwind info is emitted for the thunk if needed. */
9313 final_start_function (emit_barrier (), asm_out_file, 1);
9314
9315 /* Pad stack IP move with 4 instructions (two NOPs count
9316 as one instruction). */
9317 if (TARGET_PAD_SHORT_FUNCTION)
9318 {
9319 int i = 8;
9320
9321 while (i--)
9322 fputs ("\tnop\n", asm_out_file);
9323 }
9324
9325 xops[0] = gen_rtx_REG (Pmode, regno);
9326 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9327 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9328 fputs ("\tret\n", asm_out_file);
9329 final_end_function ();
9330 init_insn_lengths ();
9331 free_after_compilation (cfun);
9332 set_cfun (NULL);
9333 current_function_decl = NULL;
9334 }
9335
9336 if (flag_split_stack)
9337 file_end_indicate_split_stack ();
9338 }
9339
9340 /* Emit code for the SET_GOT patterns. */
9341
9342 const char *
9343 output_set_got (rtx dest, rtx label)
9344 {
9345 rtx xops[3];
9346
9347 xops[0] = dest;
9348
9349 if (TARGET_VXWORKS_RTP && flag_pic)
9350 {
9351 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9352 xops[2] = gen_rtx_MEM (Pmode,
9353 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9354 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9355
9356 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9357 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9358 an unadorned address. */
9359 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9360 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9361 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9362 return "";
9363 }
9364
9365 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9366
9367 if (!flag_pic)
9368 {
9369 if (TARGET_MACHO)
9370 /* We don't need a pic base, we're not producing pic. */
9371 gcc_unreachable ();
9372
9373 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9374 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9375 targetm.asm_out.internal_label (asm_out_file, "L",
9376 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9377 }
9378 else
9379 {
9380 char name[32];
9381 get_pc_thunk_name (name, REGNO (dest));
9382 pic_labels_used |= 1 << REGNO (dest);
9383
9384 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9385 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9386 output_asm_insn ("call\t%X2", xops);
9387
9388 #if TARGET_MACHO
9389 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9390 This is what will be referenced by the Mach-O PIC subsystem. */
9391 if (machopic_should_output_picbase_label () || !label)
9392 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9393
9394 /* When we are restoring the pic base at the site of a nonlocal label,
9395 and we decided to emit the pic base above, we will still output a
9396 local label used for calculating the correction offset (even though
9397 the offset will be 0 in that case). */
9398 if (label)
9399 targetm.asm_out.internal_label (asm_out_file, "L",
9400 CODE_LABEL_NUMBER (label));
9401 #endif
9402 }
9403
9404 if (!TARGET_MACHO)
9405 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9406
9407 return "";
9408 }
9409
9410 /* Generate an "push" pattern for input ARG. */
9411
9412 static rtx
9413 gen_push (rtx arg)
9414 {
9415 struct machine_function *m = cfun->machine;
9416
9417 if (m->fs.cfa_reg == stack_pointer_rtx)
9418 m->fs.cfa_offset += UNITS_PER_WORD;
9419 m->fs.sp_offset += UNITS_PER_WORD;
9420
9421 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9422 arg = gen_rtx_REG (word_mode, REGNO (arg));
9423
9424 return gen_rtx_SET (VOIDmode,
9425 gen_rtx_MEM (word_mode,
9426 gen_rtx_PRE_DEC (Pmode,
9427 stack_pointer_rtx)),
9428 arg);
9429 }
9430
9431 /* Generate an "pop" pattern for input ARG. */
9432
9433 static rtx
9434 gen_pop (rtx arg)
9435 {
9436 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9437 arg = gen_rtx_REG (word_mode, REGNO (arg));
9438
9439 return gen_rtx_SET (VOIDmode,
9440 arg,
9441 gen_rtx_MEM (word_mode,
9442 gen_rtx_POST_INC (Pmode,
9443 stack_pointer_rtx)));
9444 }
9445
9446 /* Return >= 0 if there is an unused call-clobbered register available
9447 for the entire function. */
9448
9449 static unsigned int
9450 ix86_select_alt_pic_regnum (void)
9451 {
9452 if (ix86_use_pseudo_pic_reg ())
9453 return INVALID_REGNUM;
9454
9455 if (crtl->is_leaf
9456 && !crtl->profile
9457 && !ix86_current_function_calls_tls_descriptor)
9458 {
9459 int i, drap;
9460 /* Can't use the same register for both PIC and DRAP. */
9461 if (crtl->drap_reg)
9462 drap = REGNO (crtl->drap_reg);
9463 else
9464 drap = -1;
9465 for (i = 2; i >= 0; --i)
9466 if (i != drap && !df_regs_ever_live_p (i))
9467 return i;
9468 }
9469
9470 return INVALID_REGNUM;
9471 }
9472
9473 /* Return TRUE if we need to save REGNO. */
9474
9475 static bool
9476 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9477 {
9478 if (pic_offset_table_rtx
9479 && !ix86_use_pseudo_pic_reg ()
9480 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
9481 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9482 || crtl->profile
9483 || crtl->calls_eh_return
9484 || crtl->uses_const_pool
9485 || cfun->has_nonlocal_label))
9486 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9487
9488 if (crtl->calls_eh_return && maybe_eh_return)
9489 {
9490 unsigned i;
9491 for (i = 0; ; i++)
9492 {
9493 unsigned test = EH_RETURN_DATA_REGNO (i);
9494 if (test == INVALID_REGNUM)
9495 break;
9496 if (test == regno)
9497 return true;
9498 }
9499 }
9500
9501 if (crtl->drap_reg
9502 && regno == REGNO (crtl->drap_reg)
9503 && !cfun->machine->no_drap_save_restore)
9504 return true;
9505
9506 return (df_regs_ever_live_p (regno)
9507 && !call_used_regs[regno]
9508 && !fixed_regs[regno]
9509 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9510 }
9511
9512 /* Return number of saved general prupose registers. */
9513
9514 static int
9515 ix86_nsaved_regs (void)
9516 {
9517 int nregs = 0;
9518 int regno;
9519
9520 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9521 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9522 nregs ++;
9523 return nregs;
9524 }
9525
9526 /* Return number of saved SSE registrers. */
9527
9528 static int
9529 ix86_nsaved_sseregs (void)
9530 {
9531 int nregs = 0;
9532 int regno;
9533
9534 if (!TARGET_64BIT_MS_ABI)
9535 return 0;
9536 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9537 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9538 nregs ++;
9539 return nregs;
9540 }
9541
9542 /* Given FROM and TO register numbers, say whether this elimination is
9543 allowed. If stack alignment is needed, we can only replace argument
9544 pointer with hard frame pointer, or replace frame pointer with stack
9545 pointer. Otherwise, frame pointer elimination is automatically
9546 handled and all other eliminations are valid. */
9547
9548 static bool
9549 ix86_can_eliminate (const int from, const int to)
9550 {
9551 if (stack_realign_fp)
9552 return ((from == ARG_POINTER_REGNUM
9553 && to == HARD_FRAME_POINTER_REGNUM)
9554 || (from == FRAME_POINTER_REGNUM
9555 && to == STACK_POINTER_REGNUM));
9556 else
9557 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9558 }
9559
9560 /* Return the offset between two registers, one to be eliminated, and the other
9561 its replacement, at the start of a routine. */
9562
9563 HOST_WIDE_INT
9564 ix86_initial_elimination_offset (int from, int to)
9565 {
9566 struct ix86_frame frame;
9567 ix86_compute_frame_layout (&frame);
9568
9569 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9570 return frame.hard_frame_pointer_offset;
9571 else if (from == FRAME_POINTER_REGNUM
9572 && to == HARD_FRAME_POINTER_REGNUM)
9573 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9574 else
9575 {
9576 gcc_assert (to == STACK_POINTER_REGNUM);
9577
9578 if (from == ARG_POINTER_REGNUM)
9579 return frame.stack_pointer_offset;
9580
9581 gcc_assert (from == FRAME_POINTER_REGNUM);
9582 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9583 }
9584 }
9585
9586 /* In a dynamically-aligned function, we can't know the offset from
9587 stack pointer to frame pointer, so we must ensure that setjmp
9588 eliminates fp against the hard fp (%ebp) rather than trying to
9589 index from %esp up to the top of the frame across a gap that is
9590 of unknown (at compile-time) size. */
9591 static rtx
9592 ix86_builtin_setjmp_frame_value (void)
9593 {
9594 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9595 }
9596
9597 /* When using -fsplit-stack, the allocation routines set a field in
9598 the TCB to the bottom of the stack plus this much space, measured
9599 in bytes. */
9600
9601 #define SPLIT_STACK_AVAILABLE 256
9602
9603 /* Fill structure ix86_frame about frame of currently computed function. */
9604
9605 static void
9606 ix86_compute_frame_layout (struct ix86_frame *frame)
9607 {
9608 unsigned HOST_WIDE_INT stack_alignment_needed;
9609 HOST_WIDE_INT offset;
9610 unsigned HOST_WIDE_INT preferred_alignment;
9611 HOST_WIDE_INT size = get_frame_size ();
9612 HOST_WIDE_INT to_allocate;
9613
9614 frame->nregs = ix86_nsaved_regs ();
9615 frame->nsseregs = ix86_nsaved_sseregs ();
9616
9617 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9618 function prologues and leaf. */
9619 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
9620 && (!crtl->is_leaf || cfun->calls_alloca != 0
9621 || ix86_current_function_calls_tls_descriptor))
9622 {
9623 crtl->preferred_stack_boundary = 128;
9624 crtl->stack_alignment_needed = 128;
9625 }
9626 /* preferred_stack_boundary is never updated for call
9627 expanded from tls descriptor. Update it here. We don't update it in
9628 expand stage because according to the comments before
9629 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
9630 away. */
9631 else if (ix86_current_function_calls_tls_descriptor
9632 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
9633 {
9634 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
9635 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
9636 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
9637 }
9638
9639 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9640 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9641
9642 gcc_assert (!size || stack_alignment_needed);
9643 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9644 gcc_assert (preferred_alignment <= stack_alignment_needed);
9645
9646 /* For SEH we have to limit the amount of code movement into the prologue.
9647 At present we do this via a BLOCKAGE, at which point there's very little
9648 scheduling that can be done, which means that there's very little point
9649 in doing anything except PUSHs. */
9650 if (TARGET_SEH)
9651 cfun->machine->use_fast_prologue_epilogue = false;
9652
9653 /* During reload iteration the amount of registers saved can change.
9654 Recompute the value as needed. Do not recompute when amount of registers
9655 didn't change as reload does multiple calls to the function and does not
9656 expect the decision to change within single iteration. */
9657 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
9658 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9659 {
9660 int count = frame->nregs;
9661 struct cgraph_node *node = cgraph_node::get (current_function_decl);
9662
9663 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9664
9665 /* The fast prologue uses move instead of push to save registers. This
9666 is significantly longer, but also executes faster as modern hardware
9667 can execute the moves in parallel, but can't do that for push/pop.
9668
9669 Be careful about choosing what prologue to emit: When function takes
9670 many instructions to execute we may use slow version as well as in
9671 case function is known to be outside hot spot (this is known with
9672 feedback only). Weight the size of function by number of registers
9673 to save as it is cheap to use one or two push instructions but very
9674 slow to use many of them. */
9675 if (count)
9676 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9677 if (node->frequency < NODE_FREQUENCY_NORMAL
9678 || (flag_branch_probabilities
9679 && node->frequency < NODE_FREQUENCY_HOT))
9680 cfun->machine->use_fast_prologue_epilogue = false;
9681 else
9682 cfun->machine->use_fast_prologue_epilogue
9683 = !expensive_function_p (count);
9684 }
9685
9686 frame->save_regs_using_mov
9687 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
9688 /* If static stack checking is enabled and done with probes,
9689 the registers need to be saved before allocating the frame. */
9690 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
9691
9692 /* Skip return address. */
9693 offset = UNITS_PER_WORD;
9694
9695 /* Skip pushed static chain. */
9696 if (ix86_static_chain_on_stack)
9697 offset += UNITS_PER_WORD;
9698
9699 /* Skip saved base pointer. */
9700 if (frame_pointer_needed)
9701 offset += UNITS_PER_WORD;
9702 frame->hfp_save_offset = offset;
9703
9704 /* The traditional frame pointer location is at the top of the frame. */
9705 frame->hard_frame_pointer_offset = offset;
9706
9707 /* Register save area */
9708 offset += frame->nregs * UNITS_PER_WORD;
9709 frame->reg_save_offset = offset;
9710
9711 /* On SEH target, registers are pushed just before the frame pointer
9712 location. */
9713 if (TARGET_SEH)
9714 frame->hard_frame_pointer_offset = offset;
9715
9716 /* Align and set SSE register save area. */
9717 if (frame->nsseregs)
9718 {
9719 /* The only ABI that has saved SSE registers (Win64) also has a
9720 16-byte aligned default stack, and thus we don't need to be
9721 within the re-aligned local stack frame to save them. */
9722 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9723 offset = (offset + 16 - 1) & -16;
9724 offset += frame->nsseregs * 16;
9725 }
9726 frame->sse_reg_save_offset = offset;
9727
9728 /* The re-aligned stack starts here. Values before this point are not
9729 directly comparable with values below this point. In order to make
9730 sure that no value happens to be the same before and after, force
9731 the alignment computation below to add a non-zero value. */
9732 if (stack_realign_fp)
9733 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9734
9735 /* Va-arg area */
9736 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9737 offset += frame->va_arg_size;
9738
9739 /* Align start of frame for local function. */
9740 if (stack_realign_fp
9741 || offset != frame->sse_reg_save_offset
9742 || size != 0
9743 || !crtl->is_leaf
9744 || cfun->calls_alloca
9745 || ix86_current_function_calls_tls_descriptor)
9746 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9747
9748 /* Frame pointer points here. */
9749 frame->frame_pointer_offset = offset;
9750
9751 offset += size;
9752
9753 /* Add outgoing arguments area. Can be skipped if we eliminated
9754 all the function calls as dead code.
9755 Skipping is however impossible when function calls alloca. Alloca
9756 expander assumes that last crtl->outgoing_args_size
9757 of stack frame are unused. */
9758 if (ACCUMULATE_OUTGOING_ARGS
9759 && (!crtl->is_leaf || cfun->calls_alloca
9760 || ix86_current_function_calls_tls_descriptor))
9761 {
9762 offset += crtl->outgoing_args_size;
9763 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9764 }
9765 else
9766 frame->outgoing_arguments_size = 0;
9767
9768 /* Align stack boundary. Only needed if we're calling another function
9769 or using alloca. */
9770 if (!crtl->is_leaf || cfun->calls_alloca
9771 || ix86_current_function_calls_tls_descriptor)
9772 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9773
9774 /* We've reached end of stack frame. */
9775 frame->stack_pointer_offset = offset;
9776
9777 /* Size prologue needs to allocate. */
9778 to_allocate = offset - frame->sse_reg_save_offset;
9779
9780 if ((!to_allocate && frame->nregs <= 1)
9781 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9782 frame->save_regs_using_mov = false;
9783
9784 if (ix86_using_red_zone ()
9785 && crtl->sp_is_unchanging
9786 && crtl->is_leaf
9787 && !ix86_current_function_calls_tls_descriptor)
9788 {
9789 frame->red_zone_size = to_allocate;
9790 if (frame->save_regs_using_mov)
9791 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9792 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9793 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9794 }
9795 else
9796 frame->red_zone_size = 0;
9797 frame->stack_pointer_offset -= frame->red_zone_size;
9798
9799 /* The SEH frame pointer location is near the bottom of the frame.
9800 This is enforced by the fact that the difference between the
9801 stack pointer and the frame pointer is limited to 240 bytes in
9802 the unwind data structure. */
9803 if (TARGET_SEH)
9804 {
9805 HOST_WIDE_INT diff;
9806
9807 /* If we can leave the frame pointer where it is, do so. Also, returns
9808 the establisher frame for __builtin_frame_address (0). */
9809 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9810 if (diff <= SEH_MAX_FRAME_SIZE
9811 && (diff > 240 || (diff & 15) != 0)
9812 && !crtl->accesses_prior_frames)
9813 {
9814 /* Ideally we'd determine what portion of the local stack frame
9815 (within the constraint of the lowest 240) is most heavily used.
9816 But without that complication, simply bias the frame pointer
9817 by 128 bytes so as to maximize the amount of the local stack
9818 frame that is addressable with 8-bit offsets. */
9819 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9820 }
9821 }
9822 }
9823
9824 /* This is semi-inlined memory_address_length, but simplified
9825 since we know that we're always dealing with reg+offset, and
9826 to avoid having to create and discard all that rtl. */
9827
9828 static inline int
9829 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9830 {
9831 int len = 4;
9832
9833 if (offset == 0)
9834 {
9835 /* EBP and R13 cannot be encoded without an offset. */
9836 len = (regno == BP_REG || regno == R13_REG);
9837 }
9838 else if (IN_RANGE (offset, -128, 127))
9839 len = 1;
9840
9841 /* ESP and R12 must be encoded with a SIB byte. */
9842 if (regno == SP_REG || regno == R12_REG)
9843 len++;
9844
9845 return len;
9846 }
9847
9848 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9849 The valid base registers are taken from CFUN->MACHINE->FS. */
9850
9851 static rtx
9852 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9853 {
9854 const struct machine_function *m = cfun->machine;
9855 rtx base_reg = NULL;
9856 HOST_WIDE_INT base_offset = 0;
9857
9858 if (m->use_fast_prologue_epilogue)
9859 {
9860 /* Choose the base register most likely to allow the most scheduling
9861 opportunities. Generally FP is valid throughout the function,
9862 while DRAP must be reloaded within the epilogue. But choose either
9863 over the SP due to increased encoding size. */
9864
9865 if (m->fs.fp_valid)
9866 {
9867 base_reg = hard_frame_pointer_rtx;
9868 base_offset = m->fs.fp_offset - cfa_offset;
9869 }
9870 else if (m->fs.drap_valid)
9871 {
9872 base_reg = crtl->drap_reg;
9873 base_offset = 0 - cfa_offset;
9874 }
9875 else if (m->fs.sp_valid)
9876 {
9877 base_reg = stack_pointer_rtx;
9878 base_offset = m->fs.sp_offset - cfa_offset;
9879 }
9880 }
9881 else
9882 {
9883 HOST_WIDE_INT toffset;
9884 int len = 16, tlen;
9885
9886 /* Choose the base register with the smallest address encoding.
9887 With a tie, choose FP > DRAP > SP. */
9888 if (m->fs.sp_valid)
9889 {
9890 base_reg = stack_pointer_rtx;
9891 base_offset = m->fs.sp_offset - cfa_offset;
9892 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9893 }
9894 if (m->fs.drap_valid)
9895 {
9896 toffset = 0 - cfa_offset;
9897 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9898 if (tlen <= len)
9899 {
9900 base_reg = crtl->drap_reg;
9901 base_offset = toffset;
9902 len = tlen;
9903 }
9904 }
9905 if (m->fs.fp_valid)
9906 {
9907 toffset = m->fs.fp_offset - cfa_offset;
9908 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9909 if (tlen <= len)
9910 {
9911 base_reg = hard_frame_pointer_rtx;
9912 base_offset = toffset;
9913 len = tlen;
9914 }
9915 }
9916 }
9917 gcc_assert (base_reg != NULL);
9918
9919 return plus_constant (Pmode, base_reg, base_offset);
9920 }
9921
9922 /* Emit code to save registers in the prologue. */
9923
9924 static void
9925 ix86_emit_save_regs (void)
9926 {
9927 unsigned int regno;
9928 rtx insn;
9929
9930 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9931 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9932 {
9933 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
9934 RTX_FRAME_RELATED_P (insn) = 1;
9935 }
9936 }
9937
9938 /* Emit a single register save at CFA - CFA_OFFSET. */
9939
9940 static void
9941 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9942 HOST_WIDE_INT cfa_offset)
9943 {
9944 struct machine_function *m = cfun->machine;
9945 rtx reg = gen_rtx_REG (mode, regno);
9946 rtx mem, addr, base, insn;
9947
9948 addr = choose_baseaddr (cfa_offset);
9949 mem = gen_frame_mem (mode, addr);
9950
9951 /* For SSE saves, we need to indicate the 128-bit alignment. */
9952 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9953
9954 insn = emit_move_insn (mem, reg);
9955 RTX_FRAME_RELATED_P (insn) = 1;
9956
9957 base = addr;
9958 if (GET_CODE (base) == PLUS)
9959 base = XEXP (base, 0);
9960 gcc_checking_assert (REG_P (base));
9961
9962 /* When saving registers into a re-aligned local stack frame, avoid
9963 any tricky guessing by dwarf2out. */
9964 if (m->fs.realigned)
9965 {
9966 gcc_checking_assert (stack_realign_drap);
9967
9968 if (regno == REGNO (crtl->drap_reg))
9969 {
9970 /* A bit of a hack. We force the DRAP register to be saved in
9971 the re-aligned stack frame, which provides us with a copy
9972 of the CFA that will last past the prologue. Install it. */
9973 gcc_checking_assert (cfun->machine->fs.fp_valid);
9974 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9975 cfun->machine->fs.fp_offset - cfa_offset);
9976 mem = gen_rtx_MEM (mode, addr);
9977 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9978 }
9979 else
9980 {
9981 /* The frame pointer is a stable reference within the
9982 aligned frame. Use it. */
9983 gcc_checking_assert (cfun->machine->fs.fp_valid);
9984 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9985 cfun->machine->fs.fp_offset - cfa_offset);
9986 mem = gen_rtx_MEM (mode, addr);
9987 add_reg_note (insn, REG_CFA_EXPRESSION,
9988 gen_rtx_SET (VOIDmode, mem, reg));
9989 }
9990 }
9991
9992 /* The memory may not be relative to the current CFA register,
9993 which means that we may need to generate a new pattern for
9994 use by the unwind info. */
9995 else if (base != m->fs.cfa_reg)
9996 {
9997 addr = plus_constant (Pmode, m->fs.cfa_reg,
9998 m->fs.cfa_offset - cfa_offset);
9999 mem = gen_rtx_MEM (mode, addr);
10000 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
10001 }
10002 }
10003
10004 /* Emit code to save registers using MOV insns.
10005 First register is stored at CFA - CFA_OFFSET. */
10006 static void
10007 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
10008 {
10009 unsigned int regno;
10010
10011 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10012 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10013 {
10014 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
10015 cfa_offset -= UNITS_PER_WORD;
10016 }
10017 }
10018
10019 /* Emit code to save SSE registers using MOV insns.
10020 First register is stored at CFA - CFA_OFFSET. */
10021 static void
10022 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
10023 {
10024 unsigned int regno;
10025
10026 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10027 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10028 {
10029 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
10030 cfa_offset -= 16;
10031 }
10032 }
10033
10034 static GTY(()) rtx queued_cfa_restores;
10035
10036 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
10037 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
10038 Don't add the note if the previously saved value will be left untouched
10039 within stack red-zone till return, as unwinders can find the same value
10040 in the register and on the stack. */
10041
10042 static void
10043 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
10044 {
10045 if (!crtl->shrink_wrapped
10046 && cfa_offset <= cfun->machine->fs.red_zone_offset)
10047 return;
10048
10049 if (insn)
10050 {
10051 add_reg_note (insn, REG_CFA_RESTORE, reg);
10052 RTX_FRAME_RELATED_P (insn) = 1;
10053 }
10054 else
10055 queued_cfa_restores
10056 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
10057 }
10058
10059 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
10060
10061 static void
10062 ix86_add_queued_cfa_restore_notes (rtx insn)
10063 {
10064 rtx last;
10065 if (!queued_cfa_restores)
10066 return;
10067 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
10068 ;
10069 XEXP (last, 1) = REG_NOTES (insn);
10070 REG_NOTES (insn) = queued_cfa_restores;
10071 queued_cfa_restores = NULL_RTX;
10072 RTX_FRAME_RELATED_P (insn) = 1;
10073 }
10074
10075 /* Expand prologue or epilogue stack adjustment.
10076 The pattern exist to put a dependency on all ebp-based memory accesses.
10077 STYLE should be negative if instructions should be marked as frame related,
10078 zero if %r11 register is live and cannot be freely used and positive
10079 otherwise. */
10080
10081 static void
10082 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
10083 int style, bool set_cfa)
10084 {
10085 struct machine_function *m = cfun->machine;
10086 rtx insn;
10087 bool add_frame_related_expr = false;
10088
10089 if (Pmode == SImode)
10090 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
10091 else if (x86_64_immediate_operand (offset, DImode))
10092 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
10093 else
10094 {
10095 rtx tmp;
10096 /* r11 is used by indirect sibcall return as well, set before the
10097 epilogue and used after the epilogue. */
10098 if (style)
10099 tmp = gen_rtx_REG (DImode, R11_REG);
10100 else
10101 {
10102 gcc_assert (src != hard_frame_pointer_rtx
10103 && dest != hard_frame_pointer_rtx);
10104 tmp = hard_frame_pointer_rtx;
10105 }
10106 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
10107 if (style < 0)
10108 add_frame_related_expr = true;
10109
10110 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10111 }
10112
10113 insn = emit_insn (insn);
10114 if (style >= 0)
10115 ix86_add_queued_cfa_restore_notes (insn);
10116
10117 if (set_cfa)
10118 {
10119 rtx r;
10120
10121 gcc_assert (m->fs.cfa_reg == src);
10122 m->fs.cfa_offset += INTVAL (offset);
10123 m->fs.cfa_reg = dest;
10124
10125 r = gen_rtx_PLUS (Pmode, src, offset);
10126 r = gen_rtx_SET (VOIDmode, dest, r);
10127 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10128 RTX_FRAME_RELATED_P (insn) = 1;
10129 }
10130 else if (style < 0)
10131 {
10132 RTX_FRAME_RELATED_P (insn) = 1;
10133 if (add_frame_related_expr)
10134 {
10135 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10136 r = gen_rtx_SET (VOIDmode, dest, r);
10137 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10138 }
10139 }
10140
10141 if (dest == stack_pointer_rtx)
10142 {
10143 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10144 bool valid = m->fs.sp_valid;
10145
10146 if (src == hard_frame_pointer_rtx)
10147 {
10148 valid = m->fs.fp_valid;
10149 ooffset = m->fs.fp_offset;
10150 }
10151 else if (src == crtl->drap_reg)
10152 {
10153 valid = m->fs.drap_valid;
10154 ooffset = 0;
10155 }
10156 else
10157 {
10158 /* Else there are two possibilities: SP itself, which we set
10159 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10160 taken care of this by hand along the eh_return path. */
10161 gcc_checking_assert (src == stack_pointer_rtx
10162 || offset == const0_rtx);
10163 }
10164
10165 m->fs.sp_offset = ooffset - INTVAL (offset);
10166 m->fs.sp_valid = valid;
10167 }
10168 }
10169
10170 /* Find an available register to be used as dynamic realign argument
10171 pointer regsiter. Such a register will be written in prologue and
10172 used in begin of body, so it must not be
10173 1. parameter passing register.
10174 2. GOT pointer.
10175 We reuse static-chain register if it is available. Otherwise, we
10176 use DI for i386 and R13 for x86-64. We chose R13 since it has
10177 shorter encoding.
10178
10179 Return: the regno of chosen register. */
10180
10181 static unsigned int
10182 find_drap_reg (void)
10183 {
10184 tree decl = cfun->decl;
10185
10186 if (TARGET_64BIT)
10187 {
10188 /* Use R13 for nested function or function need static chain.
10189 Since function with tail call may use any caller-saved
10190 registers in epilogue, DRAP must not use caller-saved
10191 register in such case. */
10192 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10193 return R13_REG;
10194
10195 return R10_REG;
10196 }
10197 else
10198 {
10199 /* Use DI for nested function or function need static chain.
10200 Since function with tail call may use any caller-saved
10201 registers in epilogue, DRAP must not use caller-saved
10202 register in such case. */
10203 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10204 return DI_REG;
10205
10206 /* Reuse static chain register if it isn't used for parameter
10207 passing. */
10208 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10209 {
10210 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10211 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10212 return CX_REG;
10213 }
10214 return DI_REG;
10215 }
10216 }
10217
10218 /* Return minimum incoming stack alignment. */
10219
10220 static unsigned int
10221 ix86_minimum_incoming_stack_boundary (bool sibcall)
10222 {
10223 unsigned int incoming_stack_boundary;
10224
10225 /* Prefer the one specified at command line. */
10226 if (ix86_user_incoming_stack_boundary)
10227 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10228 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10229 if -mstackrealign is used, it isn't used for sibcall check and
10230 estimated stack alignment is 128bit. */
10231 else if (!sibcall
10232 && !TARGET_64BIT
10233 && ix86_force_align_arg_pointer
10234 && crtl->stack_alignment_estimated == 128)
10235 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10236 else
10237 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10238
10239 /* Incoming stack alignment can be changed on individual functions
10240 via force_align_arg_pointer attribute. We use the smallest
10241 incoming stack boundary. */
10242 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10243 && lookup_attribute (ix86_force_align_arg_pointer_string,
10244 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10245 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10246
10247 /* The incoming stack frame has to be aligned at least at
10248 parm_stack_boundary. */
10249 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10250 incoming_stack_boundary = crtl->parm_stack_boundary;
10251
10252 /* Stack at entrance of main is aligned by runtime. We use the
10253 smallest incoming stack boundary. */
10254 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10255 && DECL_NAME (current_function_decl)
10256 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10257 && DECL_FILE_SCOPE_P (current_function_decl))
10258 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10259
10260 return incoming_stack_boundary;
10261 }
10262
10263 /* Update incoming stack boundary and estimated stack alignment. */
10264
10265 static void
10266 ix86_update_stack_boundary (void)
10267 {
10268 ix86_incoming_stack_boundary
10269 = ix86_minimum_incoming_stack_boundary (false);
10270
10271 /* x86_64 vararg needs 16byte stack alignment for register save
10272 area. */
10273 if (TARGET_64BIT
10274 && cfun->stdarg
10275 && crtl->stack_alignment_estimated < 128)
10276 crtl->stack_alignment_estimated = 128;
10277 }
10278
10279 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10280 needed or an rtx for DRAP otherwise. */
10281
10282 static rtx
10283 ix86_get_drap_rtx (void)
10284 {
10285 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10286 crtl->need_drap = true;
10287
10288 if (stack_realign_drap)
10289 {
10290 /* Assign DRAP to vDRAP and returns vDRAP */
10291 unsigned int regno = find_drap_reg ();
10292 rtx drap_vreg;
10293 rtx arg_ptr;
10294 rtx_insn *seq, *insn;
10295
10296 arg_ptr = gen_rtx_REG (Pmode, regno);
10297 crtl->drap_reg = arg_ptr;
10298
10299 start_sequence ();
10300 drap_vreg = copy_to_reg (arg_ptr);
10301 seq = get_insns ();
10302 end_sequence ();
10303
10304 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10305 if (!optimize)
10306 {
10307 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10308 RTX_FRAME_RELATED_P (insn) = 1;
10309 }
10310 return drap_vreg;
10311 }
10312 else
10313 return NULL;
10314 }
10315
10316 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10317
10318 static rtx
10319 ix86_internal_arg_pointer (void)
10320 {
10321 return virtual_incoming_args_rtx;
10322 }
10323
10324 struct scratch_reg {
10325 rtx reg;
10326 bool saved;
10327 };
10328
10329 /* Return a short-lived scratch register for use on function entry.
10330 In 32-bit mode, it is valid only after the registers are saved
10331 in the prologue. This register must be released by means of
10332 release_scratch_register_on_entry once it is dead. */
10333
10334 static void
10335 get_scratch_register_on_entry (struct scratch_reg *sr)
10336 {
10337 int regno;
10338
10339 sr->saved = false;
10340
10341 if (TARGET_64BIT)
10342 {
10343 /* We always use R11 in 64-bit mode. */
10344 regno = R11_REG;
10345 }
10346 else
10347 {
10348 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10349 bool fastcall_p
10350 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10351 bool thiscall_p
10352 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10353 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10354 int regparm = ix86_function_regparm (fntype, decl);
10355 int drap_regno
10356 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10357
10358 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10359 for the static chain register. */
10360 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10361 && drap_regno != AX_REG)
10362 regno = AX_REG;
10363 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10364 for the static chain register. */
10365 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10366 regno = AX_REG;
10367 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10368 regno = DX_REG;
10369 /* ecx is the static chain register. */
10370 else if (regparm < 3 && !fastcall_p && !thiscall_p
10371 && !static_chain_p
10372 && drap_regno != CX_REG)
10373 regno = CX_REG;
10374 else if (ix86_save_reg (BX_REG, true))
10375 regno = BX_REG;
10376 /* esi is the static chain register. */
10377 else if (!(regparm == 3 && static_chain_p)
10378 && ix86_save_reg (SI_REG, true))
10379 regno = SI_REG;
10380 else if (ix86_save_reg (DI_REG, true))
10381 regno = DI_REG;
10382 else
10383 {
10384 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10385 sr->saved = true;
10386 }
10387 }
10388
10389 sr->reg = gen_rtx_REG (Pmode, regno);
10390 if (sr->saved)
10391 {
10392 rtx insn = emit_insn (gen_push (sr->reg));
10393 RTX_FRAME_RELATED_P (insn) = 1;
10394 }
10395 }
10396
10397 /* Release a scratch register obtained from the preceding function. */
10398
10399 static void
10400 release_scratch_register_on_entry (struct scratch_reg *sr)
10401 {
10402 if (sr->saved)
10403 {
10404 struct machine_function *m = cfun->machine;
10405 rtx x, insn = emit_insn (gen_pop (sr->reg));
10406
10407 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10408 RTX_FRAME_RELATED_P (insn) = 1;
10409 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10410 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10411 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10412 m->fs.sp_offset -= UNITS_PER_WORD;
10413 }
10414 }
10415
10416 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10417
10418 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10419
10420 static void
10421 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10422 {
10423 /* We skip the probe for the first interval + a small dope of 4 words and
10424 probe that many bytes past the specified size to maintain a protection
10425 area at the botton of the stack. */
10426 const int dope = 4 * UNITS_PER_WORD;
10427 rtx size_rtx = GEN_INT (size), last;
10428
10429 /* See if we have a constant small number of probes to generate. If so,
10430 that's the easy case. The run-time loop is made up of 11 insns in the
10431 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10432 for n # of intervals. */
10433 if (size <= 5 * PROBE_INTERVAL)
10434 {
10435 HOST_WIDE_INT i, adjust;
10436 bool first_probe = true;
10437
10438 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10439 values of N from 1 until it exceeds SIZE. If only one probe is
10440 needed, this will not generate any code. Then adjust and probe
10441 to PROBE_INTERVAL + SIZE. */
10442 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10443 {
10444 if (first_probe)
10445 {
10446 adjust = 2 * PROBE_INTERVAL + dope;
10447 first_probe = false;
10448 }
10449 else
10450 adjust = PROBE_INTERVAL;
10451
10452 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10453 plus_constant (Pmode, stack_pointer_rtx,
10454 -adjust)));
10455 emit_stack_probe (stack_pointer_rtx);
10456 }
10457
10458 if (first_probe)
10459 adjust = size + PROBE_INTERVAL + dope;
10460 else
10461 adjust = size + PROBE_INTERVAL - i;
10462
10463 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10464 plus_constant (Pmode, stack_pointer_rtx,
10465 -adjust)));
10466 emit_stack_probe (stack_pointer_rtx);
10467
10468 /* Adjust back to account for the additional first interval. */
10469 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10470 plus_constant (Pmode, stack_pointer_rtx,
10471 PROBE_INTERVAL + dope)));
10472 }
10473
10474 /* Otherwise, do the same as above, but in a loop. Note that we must be
10475 extra careful with variables wrapping around because we might be at
10476 the very top (or the very bottom) of the address space and we have
10477 to be able to handle this case properly; in particular, we use an
10478 equality test for the loop condition. */
10479 else
10480 {
10481 HOST_WIDE_INT rounded_size;
10482 struct scratch_reg sr;
10483
10484 get_scratch_register_on_entry (&sr);
10485
10486
10487 /* Step 1: round SIZE to the previous multiple of the interval. */
10488
10489 rounded_size = size & -PROBE_INTERVAL;
10490
10491
10492 /* Step 2: compute initial and final value of the loop counter. */
10493
10494 /* SP = SP_0 + PROBE_INTERVAL. */
10495 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10496 plus_constant (Pmode, stack_pointer_rtx,
10497 - (PROBE_INTERVAL + dope))));
10498
10499 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10500 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10501 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10502 gen_rtx_PLUS (Pmode, sr.reg,
10503 stack_pointer_rtx)));
10504
10505
10506 /* Step 3: the loop
10507
10508 while (SP != LAST_ADDR)
10509 {
10510 SP = SP + PROBE_INTERVAL
10511 probe at SP
10512 }
10513
10514 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10515 values of N from 1 until it is equal to ROUNDED_SIZE. */
10516
10517 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10518
10519
10520 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10521 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10522
10523 if (size != rounded_size)
10524 {
10525 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10526 plus_constant (Pmode, stack_pointer_rtx,
10527 rounded_size - size)));
10528 emit_stack_probe (stack_pointer_rtx);
10529 }
10530
10531 /* Adjust back to account for the additional first interval. */
10532 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10533 plus_constant (Pmode, stack_pointer_rtx,
10534 PROBE_INTERVAL + dope)));
10535
10536 release_scratch_register_on_entry (&sr);
10537 }
10538
10539 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10540
10541 /* Even if the stack pointer isn't the CFA register, we need to correctly
10542 describe the adjustments made to it, in particular differentiate the
10543 frame-related ones from the frame-unrelated ones. */
10544 if (size > 0)
10545 {
10546 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10547 XVECEXP (expr, 0, 0)
10548 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10549 plus_constant (Pmode, stack_pointer_rtx, -size));
10550 XVECEXP (expr, 0, 1)
10551 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10552 plus_constant (Pmode, stack_pointer_rtx,
10553 PROBE_INTERVAL + dope + size));
10554 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10555 RTX_FRAME_RELATED_P (last) = 1;
10556
10557 cfun->machine->fs.sp_offset += size;
10558 }
10559
10560 /* Make sure nothing is scheduled before we are done. */
10561 emit_insn (gen_blockage ());
10562 }
10563
10564 /* Adjust the stack pointer up to REG while probing it. */
10565
10566 const char *
10567 output_adjust_stack_and_probe (rtx reg)
10568 {
10569 static int labelno = 0;
10570 char loop_lab[32], end_lab[32];
10571 rtx xops[2];
10572
10573 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10574 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10575
10576 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10577
10578 /* Jump to END_LAB if SP == LAST_ADDR. */
10579 xops[0] = stack_pointer_rtx;
10580 xops[1] = reg;
10581 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10582 fputs ("\tje\t", asm_out_file);
10583 assemble_name_raw (asm_out_file, end_lab);
10584 fputc ('\n', asm_out_file);
10585
10586 /* SP = SP + PROBE_INTERVAL. */
10587 xops[1] = GEN_INT (PROBE_INTERVAL);
10588 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10589
10590 /* Probe at SP. */
10591 xops[1] = const0_rtx;
10592 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10593
10594 fprintf (asm_out_file, "\tjmp\t");
10595 assemble_name_raw (asm_out_file, loop_lab);
10596 fputc ('\n', asm_out_file);
10597
10598 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10599
10600 return "";
10601 }
10602
10603 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10604 inclusive. These are offsets from the current stack pointer. */
10605
10606 static void
10607 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10608 {
10609 /* See if we have a constant small number of probes to generate. If so,
10610 that's the easy case. The run-time loop is made up of 7 insns in the
10611 generic case while the compile-time loop is made up of n insns for n #
10612 of intervals. */
10613 if (size <= 7 * PROBE_INTERVAL)
10614 {
10615 HOST_WIDE_INT i;
10616
10617 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10618 it exceeds SIZE. If only one probe is needed, this will not
10619 generate any code. Then probe at FIRST + SIZE. */
10620 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10621 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10622 -(first + i)));
10623
10624 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10625 -(first + size)));
10626 }
10627
10628 /* Otherwise, do the same as above, but in a loop. Note that we must be
10629 extra careful with variables wrapping around because we might be at
10630 the very top (or the very bottom) of the address space and we have
10631 to be able to handle this case properly; in particular, we use an
10632 equality test for the loop condition. */
10633 else
10634 {
10635 HOST_WIDE_INT rounded_size, last;
10636 struct scratch_reg sr;
10637
10638 get_scratch_register_on_entry (&sr);
10639
10640
10641 /* Step 1: round SIZE to the previous multiple of the interval. */
10642
10643 rounded_size = size & -PROBE_INTERVAL;
10644
10645
10646 /* Step 2: compute initial and final value of the loop counter. */
10647
10648 /* TEST_OFFSET = FIRST. */
10649 emit_move_insn (sr.reg, GEN_INT (-first));
10650
10651 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10652 last = first + rounded_size;
10653
10654
10655 /* Step 3: the loop
10656
10657 while (TEST_ADDR != LAST_ADDR)
10658 {
10659 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10660 probe at TEST_ADDR
10661 }
10662
10663 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10664 until it is equal to ROUNDED_SIZE. */
10665
10666 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10667
10668
10669 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10670 that SIZE is equal to ROUNDED_SIZE. */
10671
10672 if (size != rounded_size)
10673 emit_stack_probe (plus_constant (Pmode,
10674 gen_rtx_PLUS (Pmode,
10675 stack_pointer_rtx,
10676 sr.reg),
10677 rounded_size - size));
10678
10679 release_scratch_register_on_entry (&sr);
10680 }
10681
10682 /* Make sure nothing is scheduled before we are done. */
10683 emit_insn (gen_blockage ());
10684 }
10685
10686 /* Probe a range of stack addresses from REG to END, inclusive. These are
10687 offsets from the current stack pointer. */
10688
10689 const char *
10690 output_probe_stack_range (rtx reg, rtx end)
10691 {
10692 static int labelno = 0;
10693 char loop_lab[32], end_lab[32];
10694 rtx xops[3];
10695
10696 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10697 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10698
10699 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10700
10701 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10702 xops[0] = reg;
10703 xops[1] = end;
10704 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10705 fputs ("\tje\t", asm_out_file);
10706 assemble_name_raw (asm_out_file, end_lab);
10707 fputc ('\n', asm_out_file);
10708
10709 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10710 xops[1] = GEN_INT (PROBE_INTERVAL);
10711 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10712
10713 /* Probe at TEST_ADDR. */
10714 xops[0] = stack_pointer_rtx;
10715 xops[1] = reg;
10716 xops[2] = const0_rtx;
10717 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10718
10719 fprintf (asm_out_file, "\tjmp\t");
10720 assemble_name_raw (asm_out_file, loop_lab);
10721 fputc ('\n', asm_out_file);
10722
10723 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10724
10725 return "";
10726 }
10727
10728 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10729 to be generated in correct form. */
10730 static void
10731 ix86_finalize_stack_realign_flags (void)
10732 {
10733 /* Check if stack realign is really needed after reload, and
10734 stores result in cfun */
10735 unsigned int incoming_stack_boundary
10736 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
10737 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
10738 unsigned int stack_realign = (incoming_stack_boundary
10739 < (crtl->is_leaf
10740 ? crtl->max_used_stack_slot_alignment
10741 : crtl->stack_alignment_needed));
10742
10743 if (crtl->stack_realign_finalized)
10744 {
10745 /* After stack_realign_needed is finalized, we can't no longer
10746 change it. */
10747 gcc_assert (crtl->stack_realign_needed == stack_realign);
10748 return;
10749 }
10750
10751 /* If the only reason for frame_pointer_needed is that we conservatively
10752 assumed stack realignment might be needed, but in the end nothing that
10753 needed the stack alignment had been spilled, clear frame_pointer_needed
10754 and say we don't need stack realignment. */
10755 if (stack_realign
10756 && frame_pointer_needed
10757 && crtl->is_leaf
10758 && flag_omit_frame_pointer
10759 && crtl->sp_is_unchanging
10760 && !ix86_current_function_calls_tls_descriptor
10761 && !crtl->accesses_prior_frames
10762 && !cfun->calls_alloca
10763 && !crtl->calls_eh_return
10764 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
10765 && !ix86_frame_pointer_required ()
10766 && get_frame_size () == 0
10767 && ix86_nsaved_sseregs () == 0
10768 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
10769 {
10770 HARD_REG_SET set_up_by_prologue, prologue_used;
10771 basic_block bb;
10772
10773 CLEAR_HARD_REG_SET (prologue_used);
10774 CLEAR_HARD_REG_SET (set_up_by_prologue);
10775 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
10776 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
10777 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
10778 HARD_FRAME_POINTER_REGNUM);
10779 FOR_EACH_BB_FN (bb, cfun)
10780 {
10781 rtx_insn *insn;
10782 FOR_BB_INSNS (bb, insn)
10783 if (NONDEBUG_INSN_P (insn)
10784 && requires_stack_frame_p (insn, prologue_used,
10785 set_up_by_prologue))
10786 {
10787 crtl->stack_realign_needed = stack_realign;
10788 crtl->stack_realign_finalized = true;
10789 return;
10790 }
10791 }
10792
10793 /* If drap has been set, but it actually isn't live at the start
10794 of the function, there is no reason to set it up. */
10795 if (crtl->drap_reg)
10796 {
10797 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
10798 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
10799 {
10800 crtl->drap_reg = NULL_RTX;
10801 crtl->need_drap = false;
10802 }
10803 }
10804 else
10805 cfun->machine->no_drap_save_restore = true;
10806
10807 frame_pointer_needed = false;
10808 stack_realign = false;
10809 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
10810 crtl->stack_alignment_needed = incoming_stack_boundary;
10811 crtl->stack_alignment_estimated = incoming_stack_boundary;
10812 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
10813 crtl->preferred_stack_boundary = incoming_stack_boundary;
10814 df_finish_pass (true);
10815 df_scan_alloc (NULL);
10816 df_scan_blocks ();
10817 df_compute_regs_ever_live (true);
10818 df_analyze ();
10819 }
10820
10821 crtl->stack_realign_needed = stack_realign;
10822 crtl->stack_realign_finalized = true;
10823 }
10824
10825 /* Expand the prologue into a bunch of separate insns. */
10826
10827 void
10828 ix86_expand_prologue (void)
10829 {
10830 struct machine_function *m = cfun->machine;
10831 rtx insn, t;
10832 struct ix86_frame frame;
10833 HOST_WIDE_INT allocate;
10834 bool int_registers_saved;
10835 bool sse_registers_saved;
10836
10837 ix86_finalize_stack_realign_flags ();
10838
10839 /* DRAP should not coexist with stack_realign_fp */
10840 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10841
10842 memset (&m->fs, 0, sizeof (m->fs));
10843
10844 /* Initialize CFA state for before the prologue. */
10845 m->fs.cfa_reg = stack_pointer_rtx;
10846 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10847
10848 /* Track SP offset to the CFA. We continue tracking this after we've
10849 swapped the CFA register away from SP. In the case of re-alignment
10850 this is fudged; we're interested to offsets within the local frame. */
10851 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10852 m->fs.sp_valid = true;
10853
10854 ix86_compute_frame_layout (&frame);
10855
10856 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10857 {
10858 /* We should have already generated an error for any use of
10859 ms_hook on a nested function. */
10860 gcc_checking_assert (!ix86_static_chain_on_stack);
10861
10862 /* Check if profiling is active and we shall use profiling before
10863 prologue variant. If so sorry. */
10864 if (crtl->profile && flag_fentry != 0)
10865 sorry ("ms_hook_prologue attribute isn%'t compatible "
10866 "with -mfentry for 32-bit");
10867
10868 /* In ix86_asm_output_function_label we emitted:
10869 8b ff movl.s %edi,%edi
10870 55 push %ebp
10871 8b ec movl.s %esp,%ebp
10872
10873 This matches the hookable function prologue in Win32 API
10874 functions in Microsoft Windows XP Service Pack 2 and newer.
10875 Wine uses this to enable Windows apps to hook the Win32 API
10876 functions provided by Wine.
10877
10878 What that means is that we've already set up the frame pointer. */
10879
10880 if (frame_pointer_needed
10881 && !(crtl->drap_reg && crtl->stack_realign_needed))
10882 {
10883 rtx push, mov;
10884
10885 /* We've decided to use the frame pointer already set up.
10886 Describe this to the unwinder by pretending that both
10887 push and mov insns happen right here.
10888
10889 Putting the unwind info here at the end of the ms_hook
10890 is done so that we can make absolutely certain we get
10891 the required byte sequence at the start of the function,
10892 rather than relying on an assembler that can produce
10893 the exact encoding required.
10894
10895 However it does mean (in the unpatched case) that we have
10896 a 1 insn window where the asynchronous unwind info is
10897 incorrect. However, if we placed the unwind info at
10898 its correct location we would have incorrect unwind info
10899 in the patched case. Which is probably all moot since
10900 I don't expect Wine generates dwarf2 unwind info for the
10901 system libraries that use this feature. */
10902
10903 insn = emit_insn (gen_blockage ());
10904
10905 push = gen_push (hard_frame_pointer_rtx);
10906 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10907 stack_pointer_rtx);
10908 RTX_FRAME_RELATED_P (push) = 1;
10909 RTX_FRAME_RELATED_P (mov) = 1;
10910
10911 RTX_FRAME_RELATED_P (insn) = 1;
10912 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10913 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10914
10915 /* Note that gen_push incremented m->fs.cfa_offset, even
10916 though we didn't emit the push insn here. */
10917 m->fs.cfa_reg = hard_frame_pointer_rtx;
10918 m->fs.fp_offset = m->fs.cfa_offset;
10919 m->fs.fp_valid = true;
10920 }
10921 else
10922 {
10923 /* The frame pointer is not needed so pop %ebp again.
10924 This leaves us with a pristine state. */
10925 emit_insn (gen_pop (hard_frame_pointer_rtx));
10926 }
10927 }
10928
10929 /* The first insn of a function that accepts its static chain on the
10930 stack is to push the register that would be filled in by a direct
10931 call. This insn will be skipped by the trampoline. */
10932 else if (ix86_static_chain_on_stack)
10933 {
10934 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10935 emit_insn (gen_blockage ());
10936
10937 /* We don't want to interpret this push insn as a register save,
10938 only as a stack adjustment. The real copy of the register as
10939 a save will be done later, if needed. */
10940 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
10941 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10942 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10943 RTX_FRAME_RELATED_P (insn) = 1;
10944 }
10945
10946 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10947 of DRAP is needed and stack realignment is really needed after reload */
10948 if (stack_realign_drap)
10949 {
10950 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10951
10952 /* Only need to push parameter pointer reg if it is caller saved. */
10953 if (!call_used_regs[REGNO (crtl->drap_reg)])
10954 {
10955 /* Push arg pointer reg */
10956 insn = emit_insn (gen_push (crtl->drap_reg));
10957 RTX_FRAME_RELATED_P (insn) = 1;
10958 }
10959
10960 /* Grab the argument pointer. */
10961 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
10962 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10963 RTX_FRAME_RELATED_P (insn) = 1;
10964 m->fs.cfa_reg = crtl->drap_reg;
10965 m->fs.cfa_offset = 0;
10966
10967 /* Align the stack. */
10968 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10969 stack_pointer_rtx,
10970 GEN_INT (-align_bytes)));
10971 RTX_FRAME_RELATED_P (insn) = 1;
10972
10973 /* Replicate the return address on the stack so that return
10974 address can be reached via (argp - 1) slot. This is needed
10975 to implement macro RETURN_ADDR_RTX and intrinsic function
10976 expand_builtin_return_addr etc. */
10977 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
10978 t = gen_frame_mem (word_mode, t);
10979 insn = emit_insn (gen_push (t));
10980 RTX_FRAME_RELATED_P (insn) = 1;
10981
10982 /* For the purposes of frame and register save area addressing,
10983 we've started over with a new frame. */
10984 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10985 m->fs.realigned = true;
10986 }
10987
10988 int_registers_saved = (frame.nregs == 0);
10989 sse_registers_saved = (frame.nsseregs == 0);
10990
10991 if (frame_pointer_needed && !m->fs.fp_valid)
10992 {
10993 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10994 slower on all targets. Also sdb doesn't like it. */
10995 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10996 RTX_FRAME_RELATED_P (insn) = 1;
10997
10998 /* Push registers now, before setting the frame pointer
10999 on SEH target. */
11000 if (!int_registers_saved
11001 && TARGET_SEH
11002 && !frame.save_regs_using_mov)
11003 {
11004 ix86_emit_save_regs ();
11005 int_registers_saved = true;
11006 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11007 }
11008
11009 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
11010 {
11011 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11012 RTX_FRAME_RELATED_P (insn) = 1;
11013
11014 if (m->fs.cfa_reg == stack_pointer_rtx)
11015 m->fs.cfa_reg = hard_frame_pointer_rtx;
11016 m->fs.fp_offset = m->fs.sp_offset;
11017 m->fs.fp_valid = true;
11018 }
11019 }
11020
11021 if (!int_registers_saved)
11022 {
11023 /* If saving registers via PUSH, do so now. */
11024 if (!frame.save_regs_using_mov)
11025 {
11026 ix86_emit_save_regs ();
11027 int_registers_saved = true;
11028 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11029 }
11030
11031 /* When using red zone we may start register saving before allocating
11032 the stack frame saving one cycle of the prologue. However, avoid
11033 doing this if we have to probe the stack; at least on x86_64 the
11034 stack probe can turn into a call that clobbers a red zone location. */
11035 else if (ix86_using_red_zone ()
11036 && (! TARGET_STACK_PROBE
11037 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
11038 {
11039 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11040 int_registers_saved = true;
11041 }
11042 }
11043
11044 if (stack_realign_fp)
11045 {
11046 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11047 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
11048
11049 /* The computation of the size of the re-aligned stack frame means
11050 that we must allocate the size of the register save area before
11051 performing the actual alignment. Otherwise we cannot guarantee
11052 that there's enough storage above the realignment point. */
11053 if (m->fs.sp_offset != frame.sse_reg_save_offset)
11054 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11055 GEN_INT (m->fs.sp_offset
11056 - frame.sse_reg_save_offset),
11057 -1, false);
11058
11059 /* Align the stack. */
11060 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11061 stack_pointer_rtx,
11062 GEN_INT (-align_bytes)));
11063
11064 /* For the purposes of register save area addressing, the stack
11065 pointer is no longer valid. As for the value of sp_offset,
11066 see ix86_compute_frame_layout, which we need to match in order
11067 to pass verification of stack_pointer_offset at the end. */
11068 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
11069 m->fs.sp_valid = false;
11070 }
11071
11072 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
11073
11074 if (flag_stack_usage_info)
11075 {
11076 /* We start to count from ARG_POINTER. */
11077 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
11078
11079 /* If it was realigned, take into account the fake frame. */
11080 if (stack_realign_drap)
11081 {
11082 if (ix86_static_chain_on_stack)
11083 stack_size += UNITS_PER_WORD;
11084
11085 if (!call_used_regs[REGNO (crtl->drap_reg)])
11086 stack_size += UNITS_PER_WORD;
11087
11088 /* This over-estimates by 1 minimal-stack-alignment-unit but
11089 mitigates that by counting in the new return address slot. */
11090 current_function_dynamic_stack_size
11091 += crtl->stack_alignment_needed / BITS_PER_UNIT;
11092 }
11093
11094 current_function_static_stack_size = stack_size;
11095 }
11096
11097 /* On SEH target with very large frame size, allocate an area to save
11098 SSE registers (as the very large allocation won't be described). */
11099 if (TARGET_SEH
11100 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
11101 && !sse_registers_saved)
11102 {
11103 HOST_WIDE_INT sse_size =
11104 frame.sse_reg_save_offset - frame.reg_save_offset;
11105
11106 gcc_assert (int_registers_saved);
11107
11108 /* No need to do stack checking as the area will be immediately
11109 written. */
11110 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11111 GEN_INT (-sse_size), -1,
11112 m->fs.cfa_reg == stack_pointer_rtx);
11113 allocate -= sse_size;
11114 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11115 sse_registers_saved = true;
11116 }
11117
11118 /* The stack has already been decremented by the instruction calling us
11119 so probe if the size is non-negative to preserve the protection area. */
11120 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11121 {
11122 /* We expect the registers to be saved when probes are used. */
11123 gcc_assert (int_registers_saved);
11124
11125 if (STACK_CHECK_MOVING_SP)
11126 {
11127 if (!(crtl->is_leaf && !cfun->calls_alloca
11128 && allocate <= PROBE_INTERVAL))
11129 {
11130 ix86_adjust_stack_and_probe (allocate);
11131 allocate = 0;
11132 }
11133 }
11134 else
11135 {
11136 HOST_WIDE_INT size = allocate;
11137
11138 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11139 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11140
11141 if (TARGET_STACK_PROBE)
11142 {
11143 if (crtl->is_leaf && !cfun->calls_alloca)
11144 {
11145 if (size > PROBE_INTERVAL)
11146 ix86_emit_probe_stack_range (0, size);
11147 }
11148 else
11149 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11150 }
11151 else
11152 {
11153 if (crtl->is_leaf && !cfun->calls_alloca)
11154 {
11155 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11156 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11157 size - STACK_CHECK_PROTECT);
11158 }
11159 else
11160 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11161 }
11162 }
11163 }
11164
11165 if (allocate == 0)
11166 ;
11167 else if (!ix86_target_stack_probe ()
11168 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11169 {
11170 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11171 GEN_INT (-allocate), -1,
11172 m->fs.cfa_reg == stack_pointer_rtx);
11173 }
11174 else
11175 {
11176 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11177 rtx r10 = NULL;
11178 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11179 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11180 bool eax_live = ix86_eax_live_at_start_p ();
11181 bool r10_live = false;
11182
11183 if (TARGET_64BIT)
11184 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11185
11186 if (eax_live)
11187 {
11188 insn = emit_insn (gen_push (eax));
11189 allocate -= UNITS_PER_WORD;
11190 /* Note that SEH directives need to continue tracking the stack
11191 pointer even after the frame pointer has been set up. */
11192 if (sp_is_cfa_reg || TARGET_SEH)
11193 {
11194 if (sp_is_cfa_reg)
11195 m->fs.cfa_offset += UNITS_PER_WORD;
11196 RTX_FRAME_RELATED_P (insn) = 1;
11197 }
11198 }
11199
11200 if (r10_live)
11201 {
11202 r10 = gen_rtx_REG (Pmode, R10_REG);
11203 insn = emit_insn (gen_push (r10));
11204 allocate -= UNITS_PER_WORD;
11205 if (sp_is_cfa_reg || TARGET_SEH)
11206 {
11207 if (sp_is_cfa_reg)
11208 m->fs.cfa_offset += UNITS_PER_WORD;
11209 RTX_FRAME_RELATED_P (insn) = 1;
11210 }
11211 }
11212
11213 emit_move_insn (eax, GEN_INT (allocate));
11214 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11215
11216 /* Use the fact that AX still contains ALLOCATE. */
11217 adjust_stack_insn = (Pmode == DImode
11218 ? gen_pro_epilogue_adjust_stack_di_sub
11219 : gen_pro_epilogue_adjust_stack_si_sub);
11220
11221 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11222 stack_pointer_rtx, eax));
11223
11224 if (sp_is_cfa_reg || TARGET_SEH)
11225 {
11226 if (sp_is_cfa_reg)
11227 m->fs.cfa_offset += allocate;
11228 RTX_FRAME_RELATED_P (insn) = 1;
11229 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11230 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11231 plus_constant (Pmode, stack_pointer_rtx,
11232 -allocate)));
11233 }
11234 m->fs.sp_offset += allocate;
11235
11236 /* Use stack_pointer_rtx for relative addressing so that code
11237 works for realigned stack, too. */
11238 if (r10_live && eax_live)
11239 {
11240 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11241 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11242 gen_frame_mem (word_mode, t));
11243 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11244 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11245 gen_frame_mem (word_mode, t));
11246 }
11247 else if (eax_live || r10_live)
11248 {
11249 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11250 emit_move_insn (gen_rtx_REG (word_mode,
11251 (eax_live ? AX_REG : R10_REG)),
11252 gen_frame_mem (word_mode, t));
11253 }
11254 }
11255 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11256
11257 /* If we havn't already set up the frame pointer, do so now. */
11258 if (frame_pointer_needed && !m->fs.fp_valid)
11259 {
11260 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11261 GEN_INT (frame.stack_pointer_offset
11262 - frame.hard_frame_pointer_offset));
11263 insn = emit_insn (insn);
11264 RTX_FRAME_RELATED_P (insn) = 1;
11265 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11266
11267 if (m->fs.cfa_reg == stack_pointer_rtx)
11268 m->fs.cfa_reg = hard_frame_pointer_rtx;
11269 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11270 m->fs.fp_valid = true;
11271 }
11272
11273 if (!int_registers_saved)
11274 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11275 if (!sse_registers_saved)
11276 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11277
11278 if (crtl->drap_reg && !crtl->stack_realign_needed)
11279 {
11280 /* vDRAP is setup but after reload it turns out stack realign
11281 isn't necessary, here we will emit prologue to setup DRAP
11282 without stack realign adjustment */
11283 t = choose_baseaddr (0);
11284 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11285 }
11286
11287 /* Prevent instructions from being scheduled into register save push
11288 sequence when access to the redzone area is done through frame pointer.
11289 The offset between the frame pointer and the stack pointer is calculated
11290 relative to the value of the stack pointer at the end of the function
11291 prologue, and moving instructions that access redzone area via frame
11292 pointer inside push sequence violates this assumption. */
11293 if (frame_pointer_needed && frame.red_zone_size)
11294 emit_insn (gen_memory_blockage ());
11295
11296 /* Emit cld instruction if stringops are used in the function. */
11297 if (TARGET_CLD && ix86_current_function_needs_cld)
11298 emit_insn (gen_cld ());
11299
11300 /* SEH requires that the prologue end within 256 bytes of the start of
11301 the function. Prevent instruction schedules that would extend that.
11302 Further, prevent alloca modifications to the stack pointer from being
11303 combined with prologue modifications. */
11304 if (TARGET_SEH)
11305 emit_insn (gen_prologue_use (stack_pointer_rtx));
11306 }
11307
11308 /* Emit code to restore REG using a POP insn. */
11309
11310 static void
11311 ix86_emit_restore_reg_using_pop (rtx reg)
11312 {
11313 struct machine_function *m = cfun->machine;
11314 rtx insn = emit_insn (gen_pop (reg));
11315
11316 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11317 m->fs.sp_offset -= UNITS_PER_WORD;
11318
11319 if (m->fs.cfa_reg == crtl->drap_reg
11320 && REGNO (reg) == REGNO (crtl->drap_reg))
11321 {
11322 /* Previously we'd represented the CFA as an expression
11323 like *(%ebp - 8). We've just popped that value from
11324 the stack, which means we need to reset the CFA to
11325 the drap register. This will remain until we restore
11326 the stack pointer. */
11327 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11328 RTX_FRAME_RELATED_P (insn) = 1;
11329
11330 /* This means that the DRAP register is valid for addressing too. */
11331 m->fs.drap_valid = true;
11332 return;
11333 }
11334
11335 if (m->fs.cfa_reg == stack_pointer_rtx)
11336 {
11337 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11338 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11339 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11340 RTX_FRAME_RELATED_P (insn) = 1;
11341
11342 m->fs.cfa_offset -= UNITS_PER_WORD;
11343 }
11344
11345 /* When the frame pointer is the CFA, and we pop it, we are
11346 swapping back to the stack pointer as the CFA. This happens
11347 for stack frames that don't allocate other data, so we assume
11348 the stack pointer is now pointing at the return address, i.e.
11349 the function entry state, which makes the offset be 1 word. */
11350 if (reg == hard_frame_pointer_rtx)
11351 {
11352 m->fs.fp_valid = false;
11353 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11354 {
11355 m->fs.cfa_reg = stack_pointer_rtx;
11356 m->fs.cfa_offset -= UNITS_PER_WORD;
11357
11358 add_reg_note (insn, REG_CFA_DEF_CFA,
11359 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11360 GEN_INT (m->fs.cfa_offset)));
11361 RTX_FRAME_RELATED_P (insn) = 1;
11362 }
11363 }
11364 }
11365
11366 /* Emit code to restore saved registers using POP insns. */
11367
11368 static void
11369 ix86_emit_restore_regs_using_pop (void)
11370 {
11371 unsigned int regno;
11372
11373 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11374 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11375 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11376 }
11377
11378 /* Emit code and notes for the LEAVE instruction. */
11379
11380 static void
11381 ix86_emit_leave (void)
11382 {
11383 struct machine_function *m = cfun->machine;
11384 rtx insn = emit_insn (ix86_gen_leave ());
11385
11386 ix86_add_queued_cfa_restore_notes (insn);
11387
11388 gcc_assert (m->fs.fp_valid);
11389 m->fs.sp_valid = true;
11390 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11391 m->fs.fp_valid = false;
11392
11393 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11394 {
11395 m->fs.cfa_reg = stack_pointer_rtx;
11396 m->fs.cfa_offset = m->fs.sp_offset;
11397
11398 add_reg_note (insn, REG_CFA_DEF_CFA,
11399 plus_constant (Pmode, stack_pointer_rtx,
11400 m->fs.sp_offset));
11401 RTX_FRAME_RELATED_P (insn) = 1;
11402 }
11403 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11404 m->fs.fp_offset);
11405 }
11406
11407 /* Emit code to restore saved registers using MOV insns.
11408 First register is restored from CFA - CFA_OFFSET. */
11409 static void
11410 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11411 bool maybe_eh_return)
11412 {
11413 struct machine_function *m = cfun->machine;
11414 unsigned int regno;
11415
11416 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11417 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11418 {
11419 rtx reg = gen_rtx_REG (word_mode, regno);
11420 rtx insn, mem;
11421
11422 mem = choose_baseaddr (cfa_offset);
11423 mem = gen_frame_mem (word_mode, mem);
11424 insn = emit_move_insn (reg, mem);
11425
11426 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11427 {
11428 /* Previously we'd represented the CFA as an expression
11429 like *(%ebp - 8). We've just popped that value from
11430 the stack, which means we need to reset the CFA to
11431 the drap register. This will remain until we restore
11432 the stack pointer. */
11433 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11434 RTX_FRAME_RELATED_P (insn) = 1;
11435
11436 /* This means that the DRAP register is valid for addressing. */
11437 m->fs.drap_valid = true;
11438 }
11439 else
11440 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11441
11442 cfa_offset -= UNITS_PER_WORD;
11443 }
11444 }
11445
11446 /* Emit code to restore saved registers using MOV insns.
11447 First register is restored from CFA - CFA_OFFSET. */
11448 static void
11449 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11450 bool maybe_eh_return)
11451 {
11452 unsigned int regno;
11453
11454 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11455 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11456 {
11457 rtx reg = gen_rtx_REG (V4SFmode, regno);
11458 rtx mem;
11459
11460 mem = choose_baseaddr (cfa_offset);
11461 mem = gen_rtx_MEM (V4SFmode, mem);
11462 set_mem_align (mem, 128);
11463 emit_move_insn (reg, mem);
11464
11465 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11466
11467 cfa_offset -= 16;
11468 }
11469 }
11470
11471 /* Restore function stack, frame, and registers. */
11472
11473 void
11474 ix86_expand_epilogue (int style)
11475 {
11476 struct machine_function *m = cfun->machine;
11477 struct machine_frame_state frame_state_save = m->fs;
11478 struct ix86_frame frame;
11479 bool restore_regs_via_mov;
11480 bool using_drap;
11481
11482 ix86_finalize_stack_realign_flags ();
11483 ix86_compute_frame_layout (&frame);
11484
11485 m->fs.sp_valid = (!frame_pointer_needed
11486 || (crtl->sp_is_unchanging
11487 && !stack_realign_fp));
11488 gcc_assert (!m->fs.sp_valid
11489 || m->fs.sp_offset == frame.stack_pointer_offset);
11490
11491 /* The FP must be valid if the frame pointer is present. */
11492 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11493 gcc_assert (!m->fs.fp_valid
11494 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11495
11496 /* We must have *some* valid pointer to the stack frame. */
11497 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11498
11499 /* The DRAP is never valid at this point. */
11500 gcc_assert (!m->fs.drap_valid);
11501
11502 /* See the comment about red zone and frame
11503 pointer usage in ix86_expand_prologue. */
11504 if (frame_pointer_needed && frame.red_zone_size)
11505 emit_insn (gen_memory_blockage ());
11506
11507 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11508 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11509
11510 /* Determine the CFA offset of the end of the red-zone. */
11511 m->fs.red_zone_offset = 0;
11512 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11513 {
11514 /* The red-zone begins below the return address. */
11515 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11516
11517 /* When the register save area is in the aligned portion of
11518 the stack, determine the maximum runtime displacement that
11519 matches up with the aligned frame. */
11520 if (stack_realign_drap)
11521 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
11522 + UNITS_PER_WORD);
11523 }
11524
11525 /* Special care must be taken for the normal return case of a function
11526 using eh_return: the eax and edx registers are marked as saved, but
11527 not restored along this path. Adjust the save location to match. */
11528 if (crtl->calls_eh_return && style != 2)
11529 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
11530
11531 /* EH_RETURN requires the use of moves to function properly. */
11532 if (crtl->calls_eh_return)
11533 restore_regs_via_mov = true;
11534 /* SEH requires the use of pops to identify the epilogue. */
11535 else if (TARGET_SEH)
11536 restore_regs_via_mov = false;
11537 /* If we're only restoring one register and sp is not valid then
11538 using a move instruction to restore the register since it's
11539 less work than reloading sp and popping the register. */
11540 else if (!m->fs.sp_valid && frame.nregs <= 1)
11541 restore_regs_via_mov = true;
11542 else if (TARGET_EPILOGUE_USING_MOVE
11543 && cfun->machine->use_fast_prologue_epilogue
11544 && (frame.nregs > 1
11545 || m->fs.sp_offset != frame.reg_save_offset))
11546 restore_regs_via_mov = true;
11547 else if (frame_pointer_needed
11548 && !frame.nregs
11549 && m->fs.sp_offset != frame.reg_save_offset)
11550 restore_regs_via_mov = true;
11551 else if (frame_pointer_needed
11552 && TARGET_USE_LEAVE
11553 && cfun->machine->use_fast_prologue_epilogue
11554 && frame.nregs == 1)
11555 restore_regs_via_mov = true;
11556 else
11557 restore_regs_via_mov = false;
11558
11559 if (restore_regs_via_mov || frame.nsseregs)
11560 {
11561 /* Ensure that the entire register save area is addressable via
11562 the stack pointer, if we will restore via sp. */
11563 if (TARGET_64BIT
11564 && m->fs.sp_offset > 0x7fffffff
11565 && !(m->fs.fp_valid || m->fs.drap_valid)
11566 && (frame.nsseregs + frame.nregs) != 0)
11567 {
11568 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11569 GEN_INT (m->fs.sp_offset
11570 - frame.sse_reg_save_offset),
11571 style,
11572 m->fs.cfa_reg == stack_pointer_rtx);
11573 }
11574 }
11575
11576 /* If there are any SSE registers to restore, then we have to do it
11577 via moves, since there's obviously no pop for SSE regs. */
11578 if (frame.nsseregs)
11579 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11580 style == 2);
11581
11582 if (restore_regs_via_mov)
11583 {
11584 rtx t;
11585
11586 if (frame.nregs)
11587 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11588
11589 /* eh_return epilogues need %ecx added to the stack pointer. */
11590 if (style == 2)
11591 {
11592 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11593
11594 /* Stack align doesn't work with eh_return. */
11595 gcc_assert (!stack_realign_drap);
11596 /* Neither does regparm nested functions. */
11597 gcc_assert (!ix86_static_chain_on_stack);
11598
11599 if (frame_pointer_needed)
11600 {
11601 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11602 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
11603 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11604
11605 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11606 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11607
11608 /* Note that we use SA as a temporary CFA, as the return
11609 address is at the proper place relative to it. We
11610 pretend this happens at the FP restore insn because
11611 prior to this insn the FP would be stored at the wrong
11612 offset relative to SA, and after this insn we have no
11613 other reasonable register to use for the CFA. We don't
11614 bother resetting the CFA to the SP for the duration of
11615 the return insn. */
11616 add_reg_note (insn, REG_CFA_DEF_CFA,
11617 plus_constant (Pmode, sa, UNITS_PER_WORD));
11618 ix86_add_queued_cfa_restore_notes (insn);
11619 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11620 RTX_FRAME_RELATED_P (insn) = 1;
11621
11622 m->fs.cfa_reg = sa;
11623 m->fs.cfa_offset = UNITS_PER_WORD;
11624 m->fs.fp_valid = false;
11625
11626 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11627 const0_rtx, style, false);
11628 }
11629 else
11630 {
11631 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11632 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
11633 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11634 ix86_add_queued_cfa_restore_notes (insn);
11635
11636 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
11637 if (m->fs.cfa_offset != UNITS_PER_WORD)
11638 {
11639 m->fs.cfa_offset = UNITS_PER_WORD;
11640 add_reg_note (insn, REG_CFA_DEF_CFA,
11641 plus_constant (Pmode, stack_pointer_rtx,
11642 UNITS_PER_WORD));
11643 RTX_FRAME_RELATED_P (insn) = 1;
11644 }
11645 }
11646 m->fs.sp_offset = UNITS_PER_WORD;
11647 m->fs.sp_valid = true;
11648 }
11649 }
11650 else
11651 {
11652 /* SEH requires that the function end with (1) a stack adjustment
11653 if necessary, (2) a sequence of pops, and (3) a return or
11654 jump instruction. Prevent insns from the function body from
11655 being scheduled into this sequence. */
11656 if (TARGET_SEH)
11657 {
11658 /* Prevent a catch region from being adjacent to the standard
11659 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11660 several other flags that would be interesting to test are
11661 not yet set up. */
11662 if (flag_non_call_exceptions)
11663 emit_insn (gen_nops (const1_rtx));
11664 else
11665 emit_insn (gen_blockage ());
11666 }
11667
11668 /* First step is to deallocate the stack frame so that we can
11669 pop the registers. Also do it on SEH target for very large
11670 frame as the emitted instructions aren't allowed by the ABI in
11671 epilogues. */
11672 if (!m->fs.sp_valid
11673 || (TARGET_SEH
11674 && (m->fs.sp_offset - frame.reg_save_offset
11675 >= SEH_MAX_FRAME_SIZE)))
11676 {
11677 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
11678 GEN_INT (m->fs.fp_offset
11679 - frame.reg_save_offset),
11680 style, false);
11681 }
11682 else if (m->fs.sp_offset != frame.reg_save_offset)
11683 {
11684 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11685 GEN_INT (m->fs.sp_offset
11686 - frame.reg_save_offset),
11687 style,
11688 m->fs.cfa_reg == stack_pointer_rtx);
11689 }
11690
11691 ix86_emit_restore_regs_using_pop ();
11692 }
11693
11694 /* If we used a stack pointer and haven't already got rid of it,
11695 then do so now. */
11696 if (m->fs.fp_valid)
11697 {
11698 /* If the stack pointer is valid and pointing at the frame
11699 pointer store address, then we only need a pop. */
11700 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
11701 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11702 /* Leave results in shorter dependency chains on CPUs that are
11703 able to grok it fast. */
11704 else if (TARGET_USE_LEAVE
11705 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
11706 || !cfun->machine->use_fast_prologue_epilogue)
11707 ix86_emit_leave ();
11708 else
11709 {
11710 pro_epilogue_adjust_stack (stack_pointer_rtx,
11711 hard_frame_pointer_rtx,
11712 const0_rtx, style, !using_drap);
11713 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11714 }
11715 }
11716
11717 if (using_drap)
11718 {
11719 int param_ptr_offset = UNITS_PER_WORD;
11720 rtx insn;
11721
11722 gcc_assert (stack_realign_drap);
11723
11724 if (ix86_static_chain_on_stack)
11725 param_ptr_offset += UNITS_PER_WORD;
11726 if (!call_used_regs[REGNO (crtl->drap_reg)])
11727 param_ptr_offset += UNITS_PER_WORD;
11728
11729 insn = emit_insn (gen_rtx_SET
11730 (VOIDmode, stack_pointer_rtx,
11731 gen_rtx_PLUS (Pmode,
11732 crtl->drap_reg,
11733 GEN_INT (-param_ptr_offset))));
11734 m->fs.cfa_reg = stack_pointer_rtx;
11735 m->fs.cfa_offset = param_ptr_offset;
11736 m->fs.sp_offset = param_ptr_offset;
11737 m->fs.realigned = false;
11738
11739 add_reg_note (insn, REG_CFA_DEF_CFA,
11740 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11741 GEN_INT (param_ptr_offset)));
11742 RTX_FRAME_RELATED_P (insn) = 1;
11743
11744 if (!call_used_regs[REGNO (crtl->drap_reg)])
11745 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
11746 }
11747
11748 /* At this point the stack pointer must be valid, and we must have
11749 restored all of the registers. We may not have deallocated the
11750 entire stack frame. We've delayed this until now because it may
11751 be possible to merge the local stack deallocation with the
11752 deallocation forced by ix86_static_chain_on_stack. */
11753 gcc_assert (m->fs.sp_valid);
11754 gcc_assert (!m->fs.fp_valid);
11755 gcc_assert (!m->fs.realigned);
11756 if (m->fs.sp_offset != UNITS_PER_WORD)
11757 {
11758 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11759 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
11760 style, true);
11761 }
11762 else
11763 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11764
11765 /* Sibcall epilogues don't want a return instruction. */
11766 if (style == 0)
11767 {
11768 m->fs = frame_state_save;
11769 return;
11770 }
11771
11772 if (crtl->args.pops_args && crtl->args.size)
11773 {
11774 rtx popc = GEN_INT (crtl->args.pops_args);
11775
11776 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11777 address, do explicit add, and jump indirectly to the caller. */
11778
11779 if (crtl->args.pops_args >= 65536)
11780 {
11781 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11782 rtx insn;
11783
11784 /* There is no "pascal" calling convention in any 64bit ABI. */
11785 gcc_assert (!TARGET_64BIT);
11786
11787 insn = emit_insn (gen_pop (ecx));
11788 m->fs.cfa_offset -= UNITS_PER_WORD;
11789 m->fs.sp_offset -= UNITS_PER_WORD;
11790
11791 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11792 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11793 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11794 add_reg_note (insn, REG_CFA_REGISTER,
11795 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11796 RTX_FRAME_RELATED_P (insn) = 1;
11797
11798 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11799 popc, -1, true);
11800 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
11801 }
11802 else
11803 emit_jump_insn (gen_simple_return_pop_internal (popc));
11804 }
11805 else
11806 emit_jump_insn (gen_simple_return_internal ());
11807
11808 /* Restore the state back to the state from the prologue,
11809 so that it's correct for the next epilogue. */
11810 m->fs = frame_state_save;
11811 }
11812
11813 /* Reset from the function's potential modifications. */
11814
11815 static void
11816 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
11817 {
11818 if (pic_offset_table_rtx
11819 && !ix86_use_pseudo_pic_reg ())
11820 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11821 #if TARGET_MACHO
11822 /* Mach-O doesn't support labels at the end of objects, so if
11823 it looks like we might want one, insert a NOP. */
11824 {
11825 rtx_insn *insn = get_last_insn ();
11826 rtx_insn *deleted_debug_label = NULL;
11827 while (insn
11828 && NOTE_P (insn)
11829 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11830 {
11831 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11832 notes only, instead set their CODE_LABEL_NUMBER to -1,
11833 otherwise there would be code generation differences
11834 in between -g and -g0. */
11835 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11836 deleted_debug_label = insn;
11837 insn = PREV_INSN (insn);
11838 }
11839 if (insn
11840 && (LABEL_P (insn)
11841 || (NOTE_P (insn)
11842 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11843 fputs ("\tnop\n", file);
11844 else if (deleted_debug_label)
11845 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
11846 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11847 CODE_LABEL_NUMBER (insn) = -1;
11848 }
11849 #endif
11850
11851 }
11852
11853 /* Return a scratch register to use in the split stack prologue. The
11854 split stack prologue is used for -fsplit-stack. It is the first
11855 instructions in the function, even before the regular prologue.
11856 The scratch register can be any caller-saved register which is not
11857 used for parameters or for the static chain. */
11858
11859 static unsigned int
11860 split_stack_prologue_scratch_regno (void)
11861 {
11862 if (TARGET_64BIT)
11863 return R11_REG;
11864 else
11865 {
11866 bool is_fastcall, is_thiscall;
11867 int regparm;
11868
11869 is_fastcall = (lookup_attribute ("fastcall",
11870 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11871 != NULL);
11872 is_thiscall = (lookup_attribute ("thiscall",
11873 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11874 != NULL);
11875 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11876
11877 if (is_fastcall)
11878 {
11879 if (DECL_STATIC_CHAIN (cfun->decl))
11880 {
11881 sorry ("-fsplit-stack does not support fastcall with "
11882 "nested function");
11883 return INVALID_REGNUM;
11884 }
11885 return AX_REG;
11886 }
11887 else if (is_thiscall)
11888 {
11889 if (!DECL_STATIC_CHAIN (cfun->decl))
11890 return DX_REG;
11891 return AX_REG;
11892 }
11893 else if (regparm < 3)
11894 {
11895 if (!DECL_STATIC_CHAIN (cfun->decl))
11896 return CX_REG;
11897 else
11898 {
11899 if (regparm >= 2)
11900 {
11901 sorry ("-fsplit-stack does not support 2 register "
11902 "parameters for a nested function");
11903 return INVALID_REGNUM;
11904 }
11905 return DX_REG;
11906 }
11907 }
11908 else
11909 {
11910 /* FIXME: We could make this work by pushing a register
11911 around the addition and comparison. */
11912 sorry ("-fsplit-stack does not support 3 register parameters");
11913 return INVALID_REGNUM;
11914 }
11915 }
11916 }
11917
11918 /* A SYMBOL_REF for the function which allocates new stackspace for
11919 -fsplit-stack. */
11920
11921 static GTY(()) rtx split_stack_fn;
11922
11923 /* A SYMBOL_REF for the more stack function when using the large
11924 model. */
11925
11926 static GTY(()) rtx split_stack_fn_large;
11927
11928 /* Handle -fsplit-stack. These are the first instructions in the
11929 function, even before the regular prologue. */
11930
11931 void
11932 ix86_expand_split_stack_prologue (void)
11933 {
11934 struct ix86_frame frame;
11935 HOST_WIDE_INT allocate;
11936 unsigned HOST_WIDE_INT args_size;
11937 rtx_code_label *label;
11938 rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11939 rtx scratch_reg = NULL_RTX;
11940 rtx_code_label *varargs_label = NULL;
11941 rtx fn;
11942
11943 gcc_assert (flag_split_stack && reload_completed);
11944
11945 ix86_finalize_stack_realign_flags ();
11946 ix86_compute_frame_layout (&frame);
11947 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11948
11949 /* This is the label we will branch to if we have enough stack
11950 space. We expect the basic block reordering pass to reverse this
11951 branch if optimizing, so that we branch in the unlikely case. */
11952 label = gen_label_rtx ();
11953
11954 /* We need to compare the stack pointer minus the frame size with
11955 the stack boundary in the TCB. The stack boundary always gives
11956 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11957 can compare directly. Otherwise we need to do an addition. */
11958
11959 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11960 UNSPEC_STACK_CHECK);
11961 limit = gen_rtx_CONST (Pmode, limit);
11962 limit = gen_rtx_MEM (Pmode, limit);
11963 if (allocate < SPLIT_STACK_AVAILABLE)
11964 current = stack_pointer_rtx;
11965 else
11966 {
11967 unsigned int scratch_regno;
11968 rtx offset;
11969
11970 /* We need a scratch register to hold the stack pointer minus
11971 the required frame size. Since this is the very start of the
11972 function, the scratch register can be any caller-saved
11973 register which is not used for parameters. */
11974 offset = GEN_INT (- allocate);
11975 scratch_regno = split_stack_prologue_scratch_regno ();
11976 if (scratch_regno == INVALID_REGNUM)
11977 return;
11978 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11979 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11980 {
11981 /* We don't use ix86_gen_add3 in this case because it will
11982 want to split to lea, but when not optimizing the insn
11983 will not be split after this point. */
11984 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11985 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11986 offset)));
11987 }
11988 else
11989 {
11990 emit_move_insn (scratch_reg, offset);
11991 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
11992 stack_pointer_rtx));
11993 }
11994 current = scratch_reg;
11995 }
11996
11997 ix86_expand_branch (GEU, current, limit, label);
11998 jump_insn = get_last_insn ();
11999 JUMP_LABEL (jump_insn) = label;
12000
12001 /* Mark the jump as very likely to be taken. */
12002 add_int_reg_note (jump_insn, REG_BR_PROB,
12003 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
12004
12005 if (split_stack_fn == NULL_RTX)
12006 {
12007 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
12008 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
12009 }
12010 fn = split_stack_fn;
12011
12012 /* Get more stack space. We pass in the desired stack space and the
12013 size of the arguments to copy to the new stack. In 32-bit mode
12014 we push the parameters; __morestack will return on a new stack
12015 anyhow. In 64-bit mode we pass the parameters in r10 and
12016 r11. */
12017 allocate_rtx = GEN_INT (allocate);
12018 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
12019 call_fusage = NULL_RTX;
12020 if (TARGET_64BIT)
12021 {
12022 rtx reg10, reg11;
12023
12024 reg10 = gen_rtx_REG (Pmode, R10_REG);
12025 reg11 = gen_rtx_REG (Pmode, R11_REG);
12026
12027 /* If this function uses a static chain, it will be in %r10.
12028 Preserve it across the call to __morestack. */
12029 if (DECL_STATIC_CHAIN (cfun->decl))
12030 {
12031 rtx rax;
12032
12033 rax = gen_rtx_REG (word_mode, AX_REG);
12034 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
12035 use_reg (&call_fusage, rax);
12036 }
12037
12038 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
12039 && !TARGET_PECOFF)
12040 {
12041 HOST_WIDE_INT argval;
12042
12043 gcc_assert (Pmode == DImode);
12044 /* When using the large model we need to load the address
12045 into a register, and we've run out of registers. So we
12046 switch to a different calling convention, and we call a
12047 different function: __morestack_large. We pass the
12048 argument size in the upper 32 bits of r10 and pass the
12049 frame size in the lower 32 bits. */
12050 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
12051 gcc_assert ((args_size & 0xffffffff) == args_size);
12052
12053 if (split_stack_fn_large == NULL_RTX)
12054 {
12055 split_stack_fn_large =
12056 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12057 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
12058 }
12059 if (ix86_cmodel == CM_LARGE_PIC)
12060 {
12061 rtx_code_label *label;
12062 rtx x;
12063
12064 label = gen_label_rtx ();
12065 emit_label (label);
12066 LABEL_PRESERVE_P (label) = 1;
12067 emit_insn (gen_set_rip_rex64 (reg10, label));
12068 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12069 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12070 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12071 UNSPEC_GOT);
12072 x = gen_rtx_CONST (Pmode, x);
12073 emit_move_insn (reg11, x);
12074 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12075 x = gen_const_mem (Pmode, x);
12076 emit_move_insn (reg11, x);
12077 }
12078 else
12079 emit_move_insn (reg11, split_stack_fn_large);
12080
12081 fn = reg11;
12082
12083 argval = ((args_size << 16) << 16) + allocate;
12084 emit_move_insn (reg10, GEN_INT (argval));
12085 }
12086 else
12087 {
12088 emit_move_insn (reg10, allocate_rtx);
12089 emit_move_insn (reg11, GEN_INT (args_size));
12090 use_reg (&call_fusage, reg11);
12091 }
12092
12093 use_reg (&call_fusage, reg10);
12094 }
12095 else
12096 {
12097 emit_insn (gen_push (GEN_INT (args_size)));
12098 emit_insn (gen_push (allocate_rtx));
12099 }
12100 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12101 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12102 NULL_RTX, false);
12103 add_function_usage_to (call_insn, call_fusage);
12104
12105 /* In order to make call/return prediction work right, we now need
12106 to execute a return instruction. See
12107 libgcc/config/i386/morestack.S for the details on how this works.
12108
12109 For flow purposes gcc must not see this as a return
12110 instruction--we need control flow to continue at the subsequent
12111 label. Therefore, we use an unspec. */
12112 gcc_assert (crtl->args.pops_args < 65536);
12113 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12114
12115 /* If we are in 64-bit mode and this function uses a static chain,
12116 we saved %r10 in %rax before calling _morestack. */
12117 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12118 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12119 gen_rtx_REG (word_mode, AX_REG));
12120
12121 /* If this function calls va_start, we need to store a pointer to
12122 the arguments on the old stack, because they may not have been
12123 all copied to the new stack. At this point the old stack can be
12124 found at the frame pointer value used by __morestack, because
12125 __morestack has set that up before calling back to us. Here we
12126 store that pointer in a scratch register, and in
12127 ix86_expand_prologue we store the scratch register in a stack
12128 slot. */
12129 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12130 {
12131 unsigned int scratch_regno;
12132 rtx frame_reg;
12133 int words;
12134
12135 scratch_regno = split_stack_prologue_scratch_regno ();
12136 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12137 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12138
12139 /* 64-bit:
12140 fp -> old fp value
12141 return address within this function
12142 return address of caller of this function
12143 stack arguments
12144 So we add three words to get to the stack arguments.
12145
12146 32-bit:
12147 fp -> old fp value
12148 return address within this function
12149 first argument to __morestack
12150 second argument to __morestack
12151 return address of caller of this function
12152 stack arguments
12153 So we add five words to get to the stack arguments.
12154 */
12155 words = TARGET_64BIT ? 3 : 5;
12156 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12157 gen_rtx_PLUS (Pmode, frame_reg,
12158 GEN_INT (words * UNITS_PER_WORD))));
12159
12160 varargs_label = gen_label_rtx ();
12161 emit_jump_insn (gen_jump (varargs_label));
12162 JUMP_LABEL (get_last_insn ()) = varargs_label;
12163
12164 emit_barrier ();
12165 }
12166
12167 emit_label (label);
12168 LABEL_NUSES (label) = 1;
12169
12170 /* If this function calls va_start, we now have to set the scratch
12171 register for the case where we do not call __morestack. In this
12172 case we need to set it based on the stack pointer. */
12173 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12174 {
12175 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12176 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12177 GEN_INT (UNITS_PER_WORD))));
12178
12179 emit_label (varargs_label);
12180 LABEL_NUSES (varargs_label) = 1;
12181 }
12182 }
12183
12184 /* We may have to tell the dataflow pass that the split stack prologue
12185 is initializing a scratch register. */
12186
12187 static void
12188 ix86_live_on_entry (bitmap regs)
12189 {
12190 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12191 {
12192 gcc_assert (flag_split_stack);
12193 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12194 }
12195 }
12196 \f
12197 /* Extract the parts of an RTL expression that is a valid memory address
12198 for an instruction. Return 0 if the structure of the address is
12199 grossly off. Return -1 if the address contains ASHIFT, so it is not
12200 strictly valid, but still used for computing length of lea instruction. */
12201
12202 int
12203 ix86_decompose_address (rtx addr, struct ix86_address *out)
12204 {
12205 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12206 rtx base_reg, index_reg;
12207 HOST_WIDE_INT scale = 1;
12208 rtx scale_rtx = NULL_RTX;
12209 rtx tmp;
12210 int retval = 1;
12211 enum ix86_address_seg seg = SEG_DEFAULT;
12212
12213 /* Allow zero-extended SImode addresses,
12214 they will be emitted with addr32 prefix. */
12215 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12216 {
12217 if (GET_CODE (addr) == ZERO_EXTEND
12218 && GET_MODE (XEXP (addr, 0)) == SImode)
12219 {
12220 addr = XEXP (addr, 0);
12221 if (CONST_INT_P (addr))
12222 return 0;
12223 }
12224 else if (GET_CODE (addr) == AND
12225 && const_32bit_mask (XEXP (addr, 1), DImode))
12226 {
12227 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12228 if (addr == NULL_RTX)
12229 return 0;
12230
12231 if (CONST_INT_P (addr))
12232 return 0;
12233 }
12234 }
12235
12236 /* Allow SImode subregs of DImode addresses,
12237 they will be emitted with addr32 prefix. */
12238 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12239 {
12240 if (GET_CODE (addr) == SUBREG
12241 && GET_MODE (SUBREG_REG (addr)) == DImode)
12242 {
12243 addr = SUBREG_REG (addr);
12244 if (CONST_INT_P (addr))
12245 return 0;
12246 }
12247 }
12248
12249 if (REG_P (addr))
12250 base = addr;
12251 else if (GET_CODE (addr) == SUBREG)
12252 {
12253 if (REG_P (SUBREG_REG (addr)))
12254 base = addr;
12255 else
12256 return 0;
12257 }
12258 else if (GET_CODE (addr) == PLUS)
12259 {
12260 rtx addends[4], op;
12261 int n = 0, i;
12262
12263 op = addr;
12264 do
12265 {
12266 if (n >= 4)
12267 return 0;
12268 addends[n++] = XEXP (op, 1);
12269 op = XEXP (op, 0);
12270 }
12271 while (GET_CODE (op) == PLUS);
12272 if (n >= 4)
12273 return 0;
12274 addends[n] = op;
12275
12276 for (i = n; i >= 0; --i)
12277 {
12278 op = addends[i];
12279 switch (GET_CODE (op))
12280 {
12281 case MULT:
12282 if (index)
12283 return 0;
12284 index = XEXP (op, 0);
12285 scale_rtx = XEXP (op, 1);
12286 break;
12287
12288 case ASHIFT:
12289 if (index)
12290 return 0;
12291 index = XEXP (op, 0);
12292 tmp = XEXP (op, 1);
12293 if (!CONST_INT_P (tmp))
12294 return 0;
12295 scale = INTVAL (tmp);
12296 if ((unsigned HOST_WIDE_INT) scale > 3)
12297 return 0;
12298 scale = 1 << scale;
12299 break;
12300
12301 case ZERO_EXTEND:
12302 op = XEXP (op, 0);
12303 if (GET_CODE (op) != UNSPEC)
12304 return 0;
12305 /* FALLTHRU */
12306
12307 case UNSPEC:
12308 if (XINT (op, 1) == UNSPEC_TP
12309 && TARGET_TLS_DIRECT_SEG_REFS
12310 && seg == SEG_DEFAULT)
12311 seg = DEFAULT_TLS_SEG_REG;
12312 else
12313 return 0;
12314 break;
12315
12316 case SUBREG:
12317 if (!REG_P (SUBREG_REG (op)))
12318 return 0;
12319 /* FALLTHRU */
12320
12321 case REG:
12322 if (!base)
12323 base = op;
12324 else if (!index)
12325 index = op;
12326 else
12327 return 0;
12328 break;
12329
12330 case CONST:
12331 case CONST_INT:
12332 case SYMBOL_REF:
12333 case LABEL_REF:
12334 if (disp)
12335 return 0;
12336 disp = op;
12337 break;
12338
12339 default:
12340 return 0;
12341 }
12342 }
12343 }
12344 else if (GET_CODE (addr) == MULT)
12345 {
12346 index = XEXP (addr, 0); /* index*scale */
12347 scale_rtx = XEXP (addr, 1);
12348 }
12349 else if (GET_CODE (addr) == ASHIFT)
12350 {
12351 /* We're called for lea too, which implements ashift on occasion. */
12352 index = XEXP (addr, 0);
12353 tmp = XEXP (addr, 1);
12354 if (!CONST_INT_P (tmp))
12355 return 0;
12356 scale = INTVAL (tmp);
12357 if ((unsigned HOST_WIDE_INT) scale > 3)
12358 return 0;
12359 scale = 1 << scale;
12360 retval = -1;
12361 }
12362 else
12363 disp = addr; /* displacement */
12364
12365 if (index)
12366 {
12367 if (REG_P (index))
12368 ;
12369 else if (GET_CODE (index) == SUBREG
12370 && REG_P (SUBREG_REG (index)))
12371 ;
12372 else
12373 return 0;
12374 }
12375
12376 /* Extract the integral value of scale. */
12377 if (scale_rtx)
12378 {
12379 if (!CONST_INT_P (scale_rtx))
12380 return 0;
12381 scale = INTVAL (scale_rtx);
12382 }
12383
12384 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12385 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12386
12387 /* Avoid useless 0 displacement. */
12388 if (disp == const0_rtx && (base || index))
12389 disp = NULL_RTX;
12390
12391 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12392 if (base_reg && index_reg && scale == 1
12393 && (index_reg == arg_pointer_rtx
12394 || index_reg == frame_pointer_rtx
12395 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12396 {
12397 rtx tmp;
12398 tmp = base, base = index, index = tmp;
12399 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
12400 }
12401
12402 /* Special case: %ebp cannot be encoded as a base without a displacement.
12403 Similarly %r13. */
12404 if (!disp
12405 && base_reg
12406 && (base_reg == hard_frame_pointer_rtx
12407 || base_reg == frame_pointer_rtx
12408 || base_reg == arg_pointer_rtx
12409 || (REG_P (base_reg)
12410 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12411 || REGNO (base_reg) == R13_REG))))
12412 disp = const0_rtx;
12413
12414 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12415 Avoid this by transforming to [%esi+0].
12416 Reload calls address legitimization without cfun defined, so we need
12417 to test cfun for being non-NULL. */
12418 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12419 && base_reg && !index_reg && !disp
12420 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12421 disp = const0_rtx;
12422
12423 /* Special case: encode reg+reg instead of reg*2. */
12424 if (!base && index && scale == 2)
12425 base = index, base_reg = index_reg, scale = 1;
12426
12427 /* Special case: scaling cannot be encoded without base or displacement. */
12428 if (!base && !disp && index && scale != 1)
12429 disp = const0_rtx;
12430
12431 out->base = base;
12432 out->index = index;
12433 out->disp = disp;
12434 out->scale = scale;
12435 out->seg = seg;
12436
12437 return retval;
12438 }
12439 \f
12440 /* Return cost of the memory address x.
12441 For i386, it is better to use a complex address than let gcc copy
12442 the address into a reg and make a new pseudo. But not if the address
12443 requires to two regs - that would mean more pseudos with longer
12444 lifetimes. */
12445 static int
12446 ix86_address_cost (rtx x, enum machine_mode, addr_space_t, bool)
12447 {
12448 struct ix86_address parts;
12449 int cost = 1;
12450 int ok = ix86_decompose_address (x, &parts);
12451
12452 gcc_assert (ok);
12453
12454 if (parts.base && GET_CODE (parts.base) == SUBREG)
12455 parts.base = SUBREG_REG (parts.base);
12456 if (parts.index && GET_CODE (parts.index) == SUBREG)
12457 parts.index = SUBREG_REG (parts.index);
12458
12459 /* Attempt to minimize number of registers in the address. */
12460 if ((parts.base
12461 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
12462 || (parts.index
12463 && (!REG_P (parts.index)
12464 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
12465 cost++;
12466
12467 /* When address base or index is "pic_offset_table_rtx" we don't increase
12468 address cost. When a memopt with "pic_offset_table_rtx" is not invariant
12469 itself it most likely means that base or index is not invariant.
12470 Therefore only "pic_offset_table_rtx" could be hoisted out, which is not
12471 profitable for x86. */
12472 if (parts.base
12473 && (!pic_offset_table_rtx
12474 || REGNO (pic_offset_table_rtx) != REGNO(parts.base))
12475 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12476 && parts.index
12477 && (!pic_offset_table_rtx
12478 || REGNO (pic_offset_table_rtx) != REGNO(parts.index))
12479 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12480 && parts.base != parts.index)
12481 cost++;
12482
12483 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12484 since it's predecode logic can't detect the length of instructions
12485 and it degenerates to vector decoded. Increase cost of such
12486 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12487 to split such addresses or even refuse such addresses at all.
12488
12489 Following addressing modes are affected:
12490 [base+scale*index]
12491 [scale*index+disp]
12492 [base+index]
12493
12494 The first and last case may be avoidable by explicitly coding the zero in
12495 memory address, but I don't have AMD-K6 machine handy to check this
12496 theory. */
12497
12498 if (TARGET_K6
12499 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12500 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12501 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12502 cost += 10;
12503
12504 return cost;
12505 }
12506 \f
12507 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12508 this is used for to form addresses to local data when -fPIC is in
12509 use. */
12510
12511 static bool
12512 darwin_local_data_pic (rtx disp)
12513 {
12514 return (GET_CODE (disp) == UNSPEC
12515 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
12516 }
12517
12518 /* Determine if a given RTX is a valid constant. We already know this
12519 satisfies CONSTANT_P. */
12520
12521 static bool
12522 ix86_legitimate_constant_p (enum machine_mode, rtx x)
12523 {
12524 switch (GET_CODE (x))
12525 {
12526 case CONST:
12527 x = XEXP (x, 0);
12528
12529 if (GET_CODE (x) == PLUS)
12530 {
12531 if (!CONST_INT_P (XEXP (x, 1)))
12532 return false;
12533 x = XEXP (x, 0);
12534 }
12535
12536 if (TARGET_MACHO && darwin_local_data_pic (x))
12537 return true;
12538
12539 /* Only some unspecs are valid as "constants". */
12540 if (GET_CODE (x) == UNSPEC)
12541 switch (XINT (x, 1))
12542 {
12543 case UNSPEC_GOT:
12544 case UNSPEC_GOTOFF:
12545 case UNSPEC_PLTOFF:
12546 return TARGET_64BIT;
12547 case UNSPEC_TPOFF:
12548 case UNSPEC_NTPOFF:
12549 x = XVECEXP (x, 0, 0);
12550 return (GET_CODE (x) == SYMBOL_REF
12551 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12552 case UNSPEC_DTPOFF:
12553 x = XVECEXP (x, 0, 0);
12554 return (GET_CODE (x) == SYMBOL_REF
12555 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
12556 default:
12557 return false;
12558 }
12559
12560 /* We must have drilled down to a symbol. */
12561 if (GET_CODE (x) == LABEL_REF)
12562 return true;
12563 if (GET_CODE (x) != SYMBOL_REF)
12564 return false;
12565 /* FALLTHRU */
12566
12567 case SYMBOL_REF:
12568 /* TLS symbols are never valid. */
12569 if (SYMBOL_REF_TLS_MODEL (x))
12570 return false;
12571
12572 /* DLLIMPORT symbols are never valid. */
12573 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12574 && SYMBOL_REF_DLLIMPORT_P (x))
12575 return false;
12576
12577 #if TARGET_MACHO
12578 /* mdynamic-no-pic */
12579 if (MACHO_DYNAMIC_NO_PIC_P)
12580 return machopic_symbol_defined_p (x);
12581 #endif
12582 break;
12583
12584 case CONST_DOUBLE:
12585 if (GET_MODE (x) == TImode
12586 && x != CONST0_RTX (TImode)
12587 && !TARGET_64BIT)
12588 return false;
12589 break;
12590
12591 case CONST_VECTOR:
12592 if (!standard_sse_constant_p (x))
12593 return false;
12594
12595 default:
12596 break;
12597 }
12598
12599 /* Otherwise we handle everything else in the move patterns. */
12600 return true;
12601 }
12602
12603 /* Determine if it's legal to put X into the constant pool. This
12604 is not possible for the address of thread-local symbols, which
12605 is checked above. */
12606
12607 static bool
12608 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
12609 {
12610 /* We can always put integral constants and vectors in memory. */
12611 switch (GET_CODE (x))
12612 {
12613 case CONST_INT:
12614 case CONST_DOUBLE:
12615 case CONST_VECTOR:
12616 return false;
12617
12618 default:
12619 break;
12620 }
12621 return !ix86_legitimate_constant_p (mode, x);
12622 }
12623
12624 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12625 otherwise zero. */
12626
12627 static bool
12628 is_imported_p (rtx x)
12629 {
12630 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12631 || GET_CODE (x) != SYMBOL_REF)
12632 return false;
12633
12634 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
12635 }
12636
12637
12638 /* Nonzero if the constant value X is a legitimate general operand
12639 when generating PIC code. It is given that flag_pic is on and
12640 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12641
12642 bool
12643 legitimate_pic_operand_p (rtx x)
12644 {
12645 rtx inner;
12646
12647 switch (GET_CODE (x))
12648 {
12649 case CONST:
12650 inner = XEXP (x, 0);
12651 if (GET_CODE (inner) == PLUS
12652 && CONST_INT_P (XEXP (inner, 1)))
12653 inner = XEXP (inner, 0);
12654
12655 /* Only some unspecs are valid as "constants". */
12656 if (GET_CODE (inner) == UNSPEC)
12657 switch (XINT (inner, 1))
12658 {
12659 case UNSPEC_GOT:
12660 case UNSPEC_GOTOFF:
12661 case UNSPEC_PLTOFF:
12662 return TARGET_64BIT;
12663 case UNSPEC_TPOFF:
12664 x = XVECEXP (inner, 0, 0);
12665 return (GET_CODE (x) == SYMBOL_REF
12666 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12667 case UNSPEC_MACHOPIC_OFFSET:
12668 return legitimate_pic_address_disp_p (x);
12669 default:
12670 return false;
12671 }
12672 /* FALLTHRU */
12673
12674 case SYMBOL_REF:
12675 case LABEL_REF:
12676 return legitimate_pic_address_disp_p (x);
12677
12678 default:
12679 return true;
12680 }
12681 }
12682
12683 /* Determine if a given CONST RTX is a valid memory displacement
12684 in PIC mode. */
12685
12686 bool
12687 legitimate_pic_address_disp_p (rtx disp)
12688 {
12689 bool saw_plus;
12690
12691 /* In 64bit mode we can allow direct addresses of symbols and labels
12692 when they are not dynamic symbols. */
12693 if (TARGET_64BIT)
12694 {
12695 rtx op0 = disp, op1;
12696
12697 switch (GET_CODE (disp))
12698 {
12699 case LABEL_REF:
12700 return true;
12701
12702 case CONST:
12703 if (GET_CODE (XEXP (disp, 0)) != PLUS)
12704 break;
12705 op0 = XEXP (XEXP (disp, 0), 0);
12706 op1 = XEXP (XEXP (disp, 0), 1);
12707 if (!CONST_INT_P (op1)
12708 || INTVAL (op1) >= 16*1024*1024
12709 || INTVAL (op1) < -16*1024*1024)
12710 break;
12711 if (GET_CODE (op0) == LABEL_REF)
12712 return true;
12713 if (GET_CODE (op0) == CONST
12714 && GET_CODE (XEXP (op0, 0)) == UNSPEC
12715 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
12716 return true;
12717 if (GET_CODE (op0) == UNSPEC
12718 && XINT (op0, 1) == UNSPEC_PCREL)
12719 return true;
12720 if (GET_CODE (op0) != SYMBOL_REF)
12721 break;
12722 /* FALLTHRU */
12723
12724 case SYMBOL_REF:
12725 /* TLS references should always be enclosed in UNSPEC.
12726 The dllimported symbol needs always to be resolved. */
12727 if (SYMBOL_REF_TLS_MODEL (op0)
12728 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
12729 return false;
12730
12731 if (TARGET_PECOFF)
12732 {
12733 if (is_imported_p (op0))
12734 return true;
12735
12736 if (SYMBOL_REF_FAR_ADDR_P (op0)
12737 || !SYMBOL_REF_LOCAL_P (op0))
12738 break;
12739
12740 /* Function-symbols need to be resolved only for
12741 large-model.
12742 For the small-model we don't need to resolve anything
12743 here. */
12744 if ((ix86_cmodel != CM_LARGE_PIC
12745 && SYMBOL_REF_FUNCTION_P (op0))
12746 || ix86_cmodel == CM_SMALL_PIC)
12747 return true;
12748 /* Non-external symbols don't need to be resolved for
12749 large, and medium-model. */
12750 if ((ix86_cmodel == CM_LARGE_PIC
12751 || ix86_cmodel == CM_MEDIUM_PIC)
12752 && !SYMBOL_REF_EXTERNAL_P (op0))
12753 return true;
12754 }
12755 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
12756 && SYMBOL_REF_LOCAL_P (op0)
12757 && ix86_cmodel != CM_LARGE_PIC)
12758 return true;
12759 break;
12760
12761 default:
12762 break;
12763 }
12764 }
12765 if (GET_CODE (disp) != CONST)
12766 return false;
12767 disp = XEXP (disp, 0);
12768
12769 if (TARGET_64BIT)
12770 {
12771 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12772 of GOT tables. We should not need these anyway. */
12773 if (GET_CODE (disp) != UNSPEC
12774 || (XINT (disp, 1) != UNSPEC_GOTPCREL
12775 && XINT (disp, 1) != UNSPEC_GOTOFF
12776 && XINT (disp, 1) != UNSPEC_PCREL
12777 && XINT (disp, 1) != UNSPEC_PLTOFF))
12778 return false;
12779
12780 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
12781 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
12782 return false;
12783 return true;
12784 }
12785
12786 saw_plus = false;
12787 if (GET_CODE (disp) == PLUS)
12788 {
12789 if (!CONST_INT_P (XEXP (disp, 1)))
12790 return false;
12791 disp = XEXP (disp, 0);
12792 saw_plus = true;
12793 }
12794
12795 if (TARGET_MACHO && darwin_local_data_pic (disp))
12796 return true;
12797
12798 if (GET_CODE (disp) != UNSPEC)
12799 return false;
12800
12801 switch (XINT (disp, 1))
12802 {
12803 case UNSPEC_GOT:
12804 if (saw_plus)
12805 return false;
12806 /* We need to check for both symbols and labels because VxWorks loads
12807 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12808 details. */
12809 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12810 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
12811 case UNSPEC_GOTOFF:
12812 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12813 While ABI specify also 32bit relocation but we don't produce it in
12814 small PIC model at all. */
12815 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12816 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
12817 && !TARGET_64BIT)
12818 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12819 return false;
12820 case UNSPEC_GOTTPOFF:
12821 case UNSPEC_GOTNTPOFF:
12822 case UNSPEC_INDNTPOFF:
12823 if (saw_plus)
12824 return false;
12825 disp = XVECEXP (disp, 0, 0);
12826 return (GET_CODE (disp) == SYMBOL_REF
12827 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12828 case UNSPEC_NTPOFF:
12829 disp = XVECEXP (disp, 0, 0);
12830 return (GET_CODE (disp) == SYMBOL_REF
12831 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12832 case UNSPEC_DTPOFF:
12833 disp = XVECEXP (disp, 0, 0);
12834 return (GET_CODE (disp) == SYMBOL_REF
12835 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12836 }
12837
12838 return false;
12839 }
12840
12841 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12842 replace the input X, or the original X if no replacement is called for.
12843 The output parameter *WIN is 1 if the calling macro should goto WIN,
12844 0 if it should not. */
12845
12846 bool
12847 ix86_legitimize_reload_address (rtx x, enum machine_mode, int opnum, int type,
12848 int)
12849 {
12850 /* Reload can generate:
12851
12852 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12853 (reg:DI 97))
12854 (reg:DI 2 cx))
12855
12856 This RTX is rejected from ix86_legitimate_address_p due to
12857 non-strictness of base register 97. Following this rejection,
12858 reload pushes all three components into separate registers,
12859 creating invalid memory address RTX.
12860
12861 Following code reloads only the invalid part of the
12862 memory address RTX. */
12863
12864 if (GET_CODE (x) == PLUS
12865 && REG_P (XEXP (x, 1))
12866 && GET_CODE (XEXP (x, 0)) == PLUS
12867 && REG_P (XEXP (XEXP (x, 0), 1)))
12868 {
12869 rtx base, index;
12870 bool something_reloaded = false;
12871
12872 base = XEXP (XEXP (x, 0), 1);
12873 if (!REG_OK_FOR_BASE_STRICT_P (base))
12874 {
12875 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
12876 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12877 opnum, (enum reload_type) type);
12878 something_reloaded = true;
12879 }
12880
12881 index = XEXP (x, 1);
12882 if (!REG_OK_FOR_INDEX_STRICT_P (index))
12883 {
12884 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
12885 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12886 opnum, (enum reload_type) type);
12887 something_reloaded = true;
12888 }
12889
12890 gcc_assert (something_reloaded);
12891 return true;
12892 }
12893
12894 return false;
12895 }
12896
12897 /* Determine if op is suitable RTX for an address register.
12898 Return naked register if a register or a register subreg is
12899 found, otherwise return NULL_RTX. */
12900
12901 static rtx
12902 ix86_validate_address_register (rtx op)
12903 {
12904 enum machine_mode mode = GET_MODE (op);
12905
12906 /* Only SImode or DImode registers can form the address. */
12907 if (mode != SImode && mode != DImode)
12908 return NULL_RTX;
12909
12910 if (REG_P (op))
12911 return op;
12912 else if (GET_CODE (op) == SUBREG)
12913 {
12914 rtx reg = SUBREG_REG (op);
12915
12916 if (!REG_P (reg))
12917 return NULL_RTX;
12918
12919 mode = GET_MODE (reg);
12920
12921 /* Don't allow SUBREGs that span more than a word. It can
12922 lead to spill failures when the register is one word out
12923 of a two word structure. */
12924 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
12925 return NULL_RTX;
12926
12927 /* Allow only SUBREGs of non-eliminable hard registers. */
12928 if (register_no_elim_operand (reg, mode))
12929 return reg;
12930 }
12931
12932 /* Op is not a register. */
12933 return NULL_RTX;
12934 }
12935
12936 /* Recognizes RTL expressions that are valid memory addresses for an
12937 instruction. The MODE argument is the machine mode for the MEM
12938 expression that wants to use this address.
12939
12940 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12941 convert common non-canonical forms to canonical form so that they will
12942 be recognized. */
12943
12944 static bool
12945 ix86_legitimate_address_p (enum machine_mode, rtx addr, bool strict)
12946 {
12947 struct ix86_address parts;
12948 rtx base, index, disp;
12949 HOST_WIDE_INT scale;
12950 enum ix86_address_seg seg;
12951
12952 if (ix86_decompose_address (addr, &parts) <= 0)
12953 /* Decomposition failed. */
12954 return false;
12955
12956 base = parts.base;
12957 index = parts.index;
12958 disp = parts.disp;
12959 scale = parts.scale;
12960 seg = parts.seg;
12961
12962 /* Validate base register. */
12963 if (base)
12964 {
12965 rtx reg = ix86_validate_address_register (base);
12966
12967 if (reg == NULL_RTX)
12968 return false;
12969
12970 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12971 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12972 /* Base is not valid. */
12973 return false;
12974 }
12975
12976 /* Validate index register. */
12977 if (index)
12978 {
12979 rtx reg = ix86_validate_address_register (index);
12980
12981 if (reg == NULL_RTX)
12982 return false;
12983
12984 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12985 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12986 /* Index is not valid. */
12987 return false;
12988 }
12989
12990 /* Index and base should have the same mode. */
12991 if (base && index
12992 && GET_MODE (base) != GET_MODE (index))
12993 return false;
12994
12995 /* Address override works only on the (%reg) part of %fs:(%reg). */
12996 if (seg != SEG_DEFAULT
12997 && ((base && GET_MODE (base) != word_mode)
12998 || (index && GET_MODE (index) != word_mode)))
12999 return false;
13000
13001 /* Validate scale factor. */
13002 if (scale != 1)
13003 {
13004 if (!index)
13005 /* Scale without index. */
13006 return false;
13007
13008 if (scale != 2 && scale != 4 && scale != 8)
13009 /* Scale is not a valid multiplier. */
13010 return false;
13011 }
13012
13013 /* Validate displacement. */
13014 if (disp)
13015 {
13016 if (GET_CODE (disp) == CONST
13017 && GET_CODE (XEXP (disp, 0)) == UNSPEC
13018 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
13019 switch (XINT (XEXP (disp, 0), 1))
13020 {
13021 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
13022 used. While ABI specify also 32bit relocations, we don't produce
13023 them at all and use IP relative instead. */
13024 case UNSPEC_GOT:
13025 case UNSPEC_GOTOFF:
13026 gcc_assert (flag_pic);
13027 if (!TARGET_64BIT)
13028 goto is_legitimate_pic;
13029
13030 /* 64bit address unspec. */
13031 return false;
13032
13033 case UNSPEC_GOTPCREL:
13034 case UNSPEC_PCREL:
13035 gcc_assert (flag_pic);
13036 goto is_legitimate_pic;
13037
13038 case UNSPEC_GOTTPOFF:
13039 case UNSPEC_GOTNTPOFF:
13040 case UNSPEC_INDNTPOFF:
13041 case UNSPEC_NTPOFF:
13042 case UNSPEC_DTPOFF:
13043 break;
13044
13045 case UNSPEC_STACK_CHECK:
13046 gcc_assert (flag_split_stack);
13047 break;
13048
13049 default:
13050 /* Invalid address unspec. */
13051 return false;
13052 }
13053
13054 else if (SYMBOLIC_CONST (disp)
13055 && (flag_pic
13056 || (TARGET_MACHO
13057 #if TARGET_MACHO
13058 && MACHOPIC_INDIRECT
13059 && !machopic_operand_p (disp)
13060 #endif
13061 )))
13062 {
13063
13064 is_legitimate_pic:
13065 if (TARGET_64BIT && (index || base))
13066 {
13067 /* foo@dtpoff(%rX) is ok. */
13068 if (GET_CODE (disp) != CONST
13069 || GET_CODE (XEXP (disp, 0)) != PLUS
13070 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13071 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13072 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13073 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13074 /* Non-constant pic memory reference. */
13075 return false;
13076 }
13077 else if ((!TARGET_MACHO || flag_pic)
13078 && ! legitimate_pic_address_disp_p (disp))
13079 /* Displacement is an invalid pic construct. */
13080 return false;
13081 #if TARGET_MACHO
13082 else if (MACHO_DYNAMIC_NO_PIC_P
13083 && !ix86_legitimate_constant_p (Pmode, disp))
13084 /* displacment must be referenced via non_lazy_pointer */
13085 return false;
13086 #endif
13087
13088 /* This code used to verify that a symbolic pic displacement
13089 includes the pic_offset_table_rtx register.
13090
13091 While this is good idea, unfortunately these constructs may
13092 be created by "adds using lea" optimization for incorrect
13093 code like:
13094
13095 int a;
13096 int foo(int i)
13097 {
13098 return *(&a+i);
13099 }
13100
13101 This code is nonsensical, but results in addressing
13102 GOT table with pic_offset_table_rtx base. We can't
13103 just refuse it easily, since it gets matched by
13104 "addsi3" pattern, that later gets split to lea in the
13105 case output register differs from input. While this
13106 can be handled by separate addsi pattern for this case
13107 that never results in lea, this seems to be easier and
13108 correct fix for crash to disable this test. */
13109 }
13110 else if (GET_CODE (disp) != LABEL_REF
13111 && !CONST_INT_P (disp)
13112 && (GET_CODE (disp) != CONST
13113 || !ix86_legitimate_constant_p (Pmode, disp))
13114 && (GET_CODE (disp) != SYMBOL_REF
13115 || !ix86_legitimate_constant_p (Pmode, disp)))
13116 /* Displacement is not constant. */
13117 return false;
13118 else if (TARGET_64BIT
13119 && !x86_64_immediate_operand (disp, VOIDmode))
13120 /* Displacement is out of range. */
13121 return false;
13122 /* In x32 mode, constant addresses are sign extended to 64bit, so
13123 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13124 else if (TARGET_X32 && !(index || base)
13125 && CONST_INT_P (disp)
13126 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13127 return false;
13128 }
13129
13130 /* Everything looks valid. */
13131 return true;
13132 }
13133
13134 /* Determine if a given RTX is a valid constant address. */
13135
13136 bool
13137 constant_address_p (rtx x)
13138 {
13139 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13140 }
13141 \f
13142 /* Return a unique alias set for the GOT. */
13143
13144 static alias_set_type
13145 ix86_GOT_alias_set (void)
13146 {
13147 static alias_set_type set = -1;
13148 if (set == -1)
13149 set = new_alias_set ();
13150 return set;
13151 }
13152
13153 /* Set regs_ever_live for PIC base address register
13154 to true if required. */
13155 static void
13156 set_pic_reg_ever_live ()
13157 {
13158 if (reload_in_progress)
13159 df_set_regs_ever_live (REGNO (pic_offset_table_rtx), true);
13160 }
13161
13162 /* Return a legitimate reference for ORIG (an address) using the
13163 register REG. If REG is 0, a new pseudo is generated.
13164
13165 There are two types of references that must be handled:
13166
13167 1. Global data references must load the address from the GOT, via
13168 the PIC reg. An insn is emitted to do this load, and the reg is
13169 returned.
13170
13171 2. Static data references, constant pool addresses, and code labels
13172 compute the address as an offset from the GOT, whose base is in
13173 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13174 differentiate them from global data objects. The returned
13175 address is the PIC reg + an unspec constant.
13176
13177 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13178 reg also appears in the address. */
13179
13180 static rtx
13181 legitimize_pic_address (rtx orig, rtx reg)
13182 {
13183 rtx addr = orig;
13184 rtx new_rtx = orig;
13185
13186 #if TARGET_MACHO
13187 if (TARGET_MACHO && !TARGET_64BIT)
13188 {
13189 if (reg == 0)
13190 reg = gen_reg_rtx (Pmode);
13191 /* Use the generic Mach-O PIC machinery. */
13192 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13193 }
13194 #endif
13195
13196 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13197 {
13198 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13199 if (tmp)
13200 return tmp;
13201 }
13202
13203 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13204 new_rtx = addr;
13205 else if (TARGET_64BIT && !TARGET_PECOFF
13206 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13207 {
13208 rtx tmpreg;
13209 /* This symbol may be referenced via a displacement from the PIC
13210 base address (@GOTOFF). */
13211
13212 set_pic_reg_ever_live ();
13213 if (GET_CODE (addr) == CONST)
13214 addr = XEXP (addr, 0);
13215 if (GET_CODE (addr) == PLUS)
13216 {
13217 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13218 UNSPEC_GOTOFF);
13219 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13220 }
13221 else
13222 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13223 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13224 if (!reg)
13225 tmpreg = gen_reg_rtx (Pmode);
13226 else
13227 tmpreg = reg;
13228 emit_move_insn (tmpreg, new_rtx);
13229
13230 if (reg != 0)
13231 {
13232 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13233 tmpreg, 1, OPTAB_DIRECT);
13234 new_rtx = reg;
13235 }
13236 else
13237 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13238 }
13239 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13240 {
13241 /* This symbol may be referenced via a displacement from the PIC
13242 base address (@GOTOFF). */
13243
13244 set_pic_reg_ever_live ();
13245 if (GET_CODE (addr) == CONST)
13246 addr = XEXP (addr, 0);
13247 if (GET_CODE (addr) == PLUS)
13248 {
13249 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13250 UNSPEC_GOTOFF);
13251 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13252 }
13253 else
13254 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13255 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13256 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13257
13258 if (reg != 0)
13259 {
13260 emit_move_insn (reg, new_rtx);
13261 new_rtx = reg;
13262 }
13263 }
13264 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13265 /* We can't use @GOTOFF for text labels on VxWorks;
13266 see gotoff_operand. */
13267 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13268 {
13269 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13270 if (tmp)
13271 return tmp;
13272
13273 /* For x64 PE-COFF there is no GOT table. So we use address
13274 directly. */
13275 if (TARGET_64BIT && TARGET_PECOFF)
13276 {
13277 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13278 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13279
13280 if (reg == 0)
13281 reg = gen_reg_rtx (Pmode);
13282 emit_move_insn (reg, new_rtx);
13283 new_rtx = reg;
13284 }
13285 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13286 {
13287 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13288 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13289 new_rtx = gen_const_mem (Pmode, new_rtx);
13290 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13291
13292 if (reg == 0)
13293 reg = gen_reg_rtx (Pmode);
13294 /* Use directly gen_movsi, otherwise the address is loaded
13295 into register for CSE. We don't want to CSE this addresses,
13296 instead we CSE addresses from the GOT table, so skip this. */
13297 emit_insn (gen_movsi (reg, new_rtx));
13298 new_rtx = reg;
13299 }
13300 else
13301 {
13302 /* This symbol must be referenced via a load from the
13303 Global Offset Table (@GOT). */
13304
13305 set_pic_reg_ever_live ();
13306 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13307 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13308 if (TARGET_64BIT)
13309 new_rtx = force_reg (Pmode, new_rtx);
13310 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13311 new_rtx = gen_const_mem (Pmode, new_rtx);
13312 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13313
13314 if (reg == 0)
13315 reg = gen_reg_rtx (Pmode);
13316 emit_move_insn (reg, new_rtx);
13317 new_rtx = reg;
13318 }
13319 }
13320 else
13321 {
13322 if (CONST_INT_P (addr)
13323 && !x86_64_immediate_operand (addr, VOIDmode))
13324 {
13325 if (reg)
13326 {
13327 emit_move_insn (reg, addr);
13328 new_rtx = reg;
13329 }
13330 else
13331 new_rtx = force_reg (Pmode, addr);
13332 }
13333 else if (GET_CODE (addr) == CONST)
13334 {
13335 addr = XEXP (addr, 0);
13336
13337 /* We must match stuff we generate before. Assume the only
13338 unspecs that can get here are ours. Not that we could do
13339 anything with them anyway.... */
13340 if (GET_CODE (addr) == UNSPEC
13341 || (GET_CODE (addr) == PLUS
13342 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13343 return orig;
13344 gcc_assert (GET_CODE (addr) == PLUS);
13345 }
13346 if (GET_CODE (addr) == PLUS)
13347 {
13348 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13349
13350 /* Check first to see if this is a constant offset from a @GOTOFF
13351 symbol reference. */
13352 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13353 && CONST_INT_P (op1))
13354 {
13355 if (!TARGET_64BIT)
13356 {
13357 set_pic_reg_ever_live ();
13358 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13359 UNSPEC_GOTOFF);
13360 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13361 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13362 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13363
13364 if (reg != 0)
13365 {
13366 emit_move_insn (reg, new_rtx);
13367 new_rtx = reg;
13368 }
13369 }
13370 else
13371 {
13372 if (INTVAL (op1) < -16*1024*1024
13373 || INTVAL (op1) >= 16*1024*1024)
13374 {
13375 if (!x86_64_immediate_operand (op1, Pmode))
13376 op1 = force_reg (Pmode, op1);
13377 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13378 }
13379 }
13380 }
13381 else
13382 {
13383 rtx base = legitimize_pic_address (op0, reg);
13384 enum machine_mode mode = GET_MODE (base);
13385 new_rtx
13386 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13387
13388 if (CONST_INT_P (new_rtx))
13389 {
13390 if (INTVAL (new_rtx) < -16*1024*1024
13391 || INTVAL (new_rtx) >= 16*1024*1024)
13392 {
13393 if (!x86_64_immediate_operand (new_rtx, mode))
13394 new_rtx = force_reg (mode, new_rtx);
13395 new_rtx
13396 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13397 }
13398 else
13399 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13400 }
13401 else
13402 {
13403 if (GET_CODE (new_rtx) == PLUS
13404 && CONSTANT_P (XEXP (new_rtx, 1)))
13405 {
13406 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13407 new_rtx = XEXP (new_rtx, 1);
13408 }
13409 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13410 }
13411 }
13412 }
13413 }
13414 return new_rtx;
13415 }
13416 \f
13417 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13418
13419 static rtx
13420 get_thread_pointer (enum machine_mode tp_mode, bool to_reg)
13421 {
13422 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13423
13424 if (GET_MODE (tp) != tp_mode)
13425 {
13426 gcc_assert (GET_MODE (tp) == SImode);
13427 gcc_assert (tp_mode == DImode);
13428
13429 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13430 }
13431
13432 if (to_reg)
13433 tp = copy_to_mode_reg (tp_mode, tp);
13434
13435 return tp;
13436 }
13437
13438 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13439
13440 static GTY(()) rtx ix86_tls_symbol;
13441
13442 static rtx
13443 ix86_tls_get_addr (void)
13444 {
13445 if (!ix86_tls_symbol)
13446 {
13447 const char *sym
13448 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13449 ? "___tls_get_addr" : "__tls_get_addr");
13450
13451 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13452 }
13453
13454 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13455 {
13456 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13457 UNSPEC_PLTOFF);
13458 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13459 gen_rtx_CONST (Pmode, unspec));
13460 }
13461
13462 return ix86_tls_symbol;
13463 }
13464
13465 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13466
13467 static GTY(()) rtx ix86_tls_module_base_symbol;
13468
13469 rtx
13470 ix86_tls_module_base (void)
13471 {
13472 if (!ix86_tls_module_base_symbol)
13473 {
13474 ix86_tls_module_base_symbol
13475 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13476
13477 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13478 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13479 }
13480
13481 return ix86_tls_module_base_symbol;
13482 }
13483
13484 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13485 false if we expect this to be used for a memory address and true if
13486 we expect to load the address into a register. */
13487
13488 static rtx
13489 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13490 {
13491 rtx dest, base, off;
13492 rtx pic = NULL_RTX, tp = NULL_RTX;
13493 enum machine_mode tp_mode = Pmode;
13494 int type;
13495
13496 /* Fall back to global dynamic model if tool chain cannot support local
13497 dynamic. */
13498 if (TARGET_SUN_TLS && !TARGET_64BIT
13499 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
13500 && model == TLS_MODEL_LOCAL_DYNAMIC)
13501 model = TLS_MODEL_GLOBAL_DYNAMIC;
13502
13503 switch (model)
13504 {
13505 case TLS_MODEL_GLOBAL_DYNAMIC:
13506 dest = gen_reg_rtx (Pmode);
13507
13508 if (!TARGET_64BIT)
13509 {
13510 if (flag_pic && !TARGET_PECOFF)
13511 pic = pic_offset_table_rtx;
13512 else
13513 {
13514 pic = gen_reg_rtx (Pmode);
13515 emit_insn (gen_set_got (pic));
13516 }
13517 }
13518
13519 if (TARGET_GNU2_TLS)
13520 {
13521 if (TARGET_64BIT)
13522 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
13523 else
13524 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
13525
13526 tp = get_thread_pointer (Pmode, true);
13527 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
13528
13529 if (GET_MODE (x) != Pmode)
13530 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13531
13532 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13533 }
13534 else
13535 {
13536 rtx caddr = ix86_tls_get_addr ();
13537
13538 if (TARGET_64BIT)
13539 {
13540 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13541 rtx_insn *insns;
13542
13543 start_sequence ();
13544 emit_call_insn
13545 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
13546 insns = get_insns ();
13547 end_sequence ();
13548
13549 if (GET_MODE (x) != Pmode)
13550 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13551
13552 RTL_CONST_CALL_P (insns) = 1;
13553 emit_libcall_block (insns, dest, rax, x);
13554 }
13555 else
13556 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
13557 }
13558 break;
13559
13560 case TLS_MODEL_LOCAL_DYNAMIC:
13561 base = gen_reg_rtx (Pmode);
13562
13563 if (!TARGET_64BIT)
13564 {
13565 if (flag_pic)
13566 pic = pic_offset_table_rtx;
13567 else
13568 {
13569 pic = gen_reg_rtx (Pmode);
13570 emit_insn (gen_set_got (pic));
13571 }
13572 }
13573
13574 if (TARGET_GNU2_TLS)
13575 {
13576 rtx tmp = ix86_tls_module_base ();
13577
13578 if (TARGET_64BIT)
13579 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
13580 else
13581 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
13582
13583 tp = get_thread_pointer (Pmode, true);
13584 set_unique_reg_note (get_last_insn (), REG_EQUAL,
13585 gen_rtx_MINUS (Pmode, tmp, tp));
13586 }
13587 else
13588 {
13589 rtx caddr = ix86_tls_get_addr ();
13590
13591 if (TARGET_64BIT)
13592 {
13593 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13594 rtx_insn *insns;
13595 rtx eqv;
13596
13597 start_sequence ();
13598 emit_call_insn
13599 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
13600 insns = get_insns ();
13601 end_sequence ();
13602
13603 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13604 share the LD_BASE result with other LD model accesses. */
13605 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
13606 UNSPEC_TLS_LD_BASE);
13607
13608 RTL_CONST_CALL_P (insns) = 1;
13609 emit_libcall_block (insns, base, rax, eqv);
13610 }
13611 else
13612 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
13613 }
13614
13615 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
13616 off = gen_rtx_CONST (Pmode, off);
13617
13618 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
13619
13620 if (TARGET_GNU2_TLS)
13621 {
13622 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
13623
13624 if (GET_MODE (x) != Pmode)
13625 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13626
13627 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13628 }
13629 break;
13630
13631 case TLS_MODEL_INITIAL_EXEC:
13632 if (TARGET_64BIT)
13633 {
13634 if (TARGET_SUN_TLS && !TARGET_X32)
13635 {
13636 /* The Sun linker took the AMD64 TLS spec literally
13637 and can only handle %rax as destination of the
13638 initial executable code sequence. */
13639
13640 dest = gen_reg_rtx (DImode);
13641 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
13642 return dest;
13643 }
13644
13645 /* Generate DImode references to avoid %fs:(%reg32)
13646 problems and linker IE->LE relaxation bug. */
13647 tp_mode = DImode;
13648 pic = NULL;
13649 type = UNSPEC_GOTNTPOFF;
13650 }
13651 else if (flag_pic)
13652 {
13653 set_pic_reg_ever_live ();
13654 pic = pic_offset_table_rtx;
13655 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
13656 }
13657 else if (!TARGET_ANY_GNU_TLS)
13658 {
13659 pic = gen_reg_rtx (Pmode);
13660 emit_insn (gen_set_got (pic));
13661 type = UNSPEC_GOTTPOFF;
13662 }
13663 else
13664 {
13665 pic = NULL;
13666 type = UNSPEC_INDNTPOFF;
13667 }
13668
13669 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
13670 off = gen_rtx_CONST (tp_mode, off);
13671 if (pic)
13672 off = gen_rtx_PLUS (tp_mode, pic, off);
13673 off = gen_const_mem (tp_mode, off);
13674 set_mem_alias_set (off, ix86_GOT_alias_set ());
13675
13676 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13677 {
13678 base = get_thread_pointer (tp_mode,
13679 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13680 off = force_reg (tp_mode, off);
13681 return gen_rtx_PLUS (tp_mode, base, off);
13682 }
13683 else
13684 {
13685 base = get_thread_pointer (Pmode, true);
13686 dest = gen_reg_rtx (Pmode);
13687 emit_insn (ix86_gen_sub3 (dest, base, off));
13688 }
13689 break;
13690
13691 case TLS_MODEL_LOCAL_EXEC:
13692 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
13693 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13694 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
13695 off = gen_rtx_CONST (Pmode, off);
13696
13697 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13698 {
13699 base = get_thread_pointer (Pmode,
13700 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13701 return gen_rtx_PLUS (Pmode, base, off);
13702 }
13703 else
13704 {
13705 base = get_thread_pointer (Pmode, true);
13706 dest = gen_reg_rtx (Pmode);
13707 emit_insn (ix86_gen_sub3 (dest, base, off));
13708 }
13709 break;
13710
13711 default:
13712 gcc_unreachable ();
13713 }
13714
13715 return dest;
13716 }
13717
13718 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13719 to symbol DECL if BEIMPORT is true. Otherwise create or return the
13720 unique refptr-DECL symbol corresponding to symbol DECL. */
13721
13722 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
13723 htab_t dllimport_map;
13724
13725 static tree
13726 get_dllimport_decl (tree decl, bool beimport)
13727 {
13728 struct tree_map *h, in;
13729 void **loc;
13730 const char *name;
13731 const char *prefix;
13732 size_t namelen, prefixlen;
13733 char *imp_name;
13734 tree to;
13735 rtx rtl;
13736
13737 if (!dllimport_map)
13738 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
13739
13740 in.hash = htab_hash_pointer (decl);
13741 in.base.from = decl;
13742 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
13743 h = (struct tree_map *) *loc;
13744 if (h)
13745 return h->to;
13746
13747 *loc = h = ggc_alloc<tree_map> ();
13748 h->hash = in.hash;
13749 h->base.from = decl;
13750 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
13751 VAR_DECL, NULL, ptr_type_node);
13752 DECL_ARTIFICIAL (to) = 1;
13753 DECL_IGNORED_P (to) = 1;
13754 DECL_EXTERNAL (to) = 1;
13755 TREE_READONLY (to) = 1;
13756
13757 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
13758 name = targetm.strip_name_encoding (name);
13759 if (beimport)
13760 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
13761 ? "*__imp_" : "*__imp__";
13762 else
13763 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
13764 namelen = strlen (name);
13765 prefixlen = strlen (prefix);
13766 imp_name = (char *) alloca (namelen + prefixlen + 1);
13767 memcpy (imp_name, prefix, prefixlen);
13768 memcpy (imp_name + prefixlen, name, namelen + 1);
13769
13770 name = ggc_alloc_string (imp_name, namelen + prefixlen);
13771 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
13772 SET_SYMBOL_REF_DECL (rtl, to);
13773 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
13774 if (!beimport)
13775 {
13776 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
13777 #ifdef SUB_TARGET_RECORD_STUB
13778 SUB_TARGET_RECORD_STUB (name);
13779 #endif
13780 }
13781
13782 rtl = gen_const_mem (Pmode, rtl);
13783 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
13784
13785 SET_DECL_RTL (to, rtl);
13786 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
13787
13788 return to;
13789 }
13790
13791 /* Expand SYMBOL into its corresponding far-addresse symbol.
13792 WANT_REG is true if we require the result be a register. */
13793
13794 static rtx
13795 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
13796 {
13797 tree imp_decl;
13798 rtx x;
13799
13800 gcc_assert (SYMBOL_REF_DECL (symbol));
13801 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
13802
13803 x = DECL_RTL (imp_decl);
13804 if (want_reg)
13805 x = force_reg (Pmode, x);
13806 return x;
13807 }
13808
13809 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13810 true if we require the result be a register. */
13811
13812 static rtx
13813 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
13814 {
13815 tree imp_decl;
13816 rtx x;
13817
13818 gcc_assert (SYMBOL_REF_DECL (symbol));
13819 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
13820
13821 x = DECL_RTL (imp_decl);
13822 if (want_reg)
13823 x = force_reg (Pmode, x);
13824 return x;
13825 }
13826
13827 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
13828 is true if we require the result be a register. */
13829
13830 static rtx
13831 legitimize_pe_coff_symbol (rtx addr, bool inreg)
13832 {
13833 if (!TARGET_PECOFF)
13834 return NULL_RTX;
13835
13836 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13837 {
13838 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
13839 return legitimize_dllimport_symbol (addr, inreg);
13840 if (GET_CODE (addr) == CONST
13841 && GET_CODE (XEXP (addr, 0)) == PLUS
13842 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13843 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
13844 {
13845 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
13846 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13847 }
13848 }
13849
13850 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
13851 return NULL_RTX;
13852 if (GET_CODE (addr) == SYMBOL_REF
13853 && !is_imported_p (addr)
13854 && SYMBOL_REF_EXTERNAL_P (addr)
13855 && SYMBOL_REF_DECL (addr))
13856 return legitimize_pe_coff_extern_decl (addr, inreg);
13857
13858 if (GET_CODE (addr) == CONST
13859 && GET_CODE (XEXP (addr, 0)) == PLUS
13860 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13861 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
13862 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
13863 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
13864 {
13865 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
13866 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13867 }
13868 return NULL_RTX;
13869 }
13870
13871 /* Try machine-dependent ways of modifying an illegitimate address
13872 to be legitimate. If we find one, return the new, valid address.
13873 This macro is used in only one place: `memory_address' in explow.c.
13874
13875 OLDX is the address as it was before break_out_memory_refs was called.
13876 In some cases it is useful to look at this to decide what needs to be done.
13877
13878 It is always safe for this macro to do nothing. It exists to recognize
13879 opportunities to optimize the output.
13880
13881 For the 80386, we handle X+REG by loading X into a register R and
13882 using R+REG. R will go in a general reg and indexing will be used.
13883 However, if REG is a broken-out memory address or multiplication,
13884 nothing needs to be done because REG can certainly go in a general reg.
13885
13886 When -fpic is used, special handling is needed for symbolic references.
13887 See comments by legitimize_pic_address in i386.c for details. */
13888
13889 static rtx
13890 ix86_legitimize_address (rtx x, rtx, enum machine_mode mode)
13891 {
13892 int changed = 0;
13893 unsigned log;
13894
13895 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
13896 if (log)
13897 return legitimize_tls_address (x, (enum tls_model) log, false);
13898 if (GET_CODE (x) == CONST
13899 && GET_CODE (XEXP (x, 0)) == PLUS
13900 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
13901 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
13902 {
13903 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
13904 (enum tls_model) log, false);
13905 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
13906 }
13907
13908 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13909 {
13910 rtx tmp = legitimize_pe_coff_symbol (x, true);
13911 if (tmp)
13912 return tmp;
13913 }
13914
13915 if (flag_pic && SYMBOLIC_CONST (x))
13916 return legitimize_pic_address (x, 0);
13917
13918 #if TARGET_MACHO
13919 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
13920 return machopic_indirect_data_reference (x, 0);
13921 #endif
13922
13923 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13924 if (GET_CODE (x) == ASHIFT
13925 && CONST_INT_P (XEXP (x, 1))
13926 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
13927 {
13928 changed = 1;
13929 log = INTVAL (XEXP (x, 1));
13930 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
13931 GEN_INT (1 << log));
13932 }
13933
13934 if (GET_CODE (x) == PLUS)
13935 {
13936 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13937
13938 if (GET_CODE (XEXP (x, 0)) == ASHIFT
13939 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13940 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
13941 {
13942 changed = 1;
13943 log = INTVAL (XEXP (XEXP (x, 0), 1));
13944 XEXP (x, 0) = gen_rtx_MULT (Pmode,
13945 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
13946 GEN_INT (1 << log));
13947 }
13948
13949 if (GET_CODE (XEXP (x, 1)) == ASHIFT
13950 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
13951 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
13952 {
13953 changed = 1;
13954 log = INTVAL (XEXP (XEXP (x, 1), 1));
13955 XEXP (x, 1) = gen_rtx_MULT (Pmode,
13956 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
13957 GEN_INT (1 << log));
13958 }
13959
13960 /* Put multiply first if it isn't already. */
13961 if (GET_CODE (XEXP (x, 1)) == MULT)
13962 {
13963 rtx tmp = XEXP (x, 0);
13964 XEXP (x, 0) = XEXP (x, 1);
13965 XEXP (x, 1) = tmp;
13966 changed = 1;
13967 }
13968
13969 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13970 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13971 created by virtual register instantiation, register elimination, and
13972 similar optimizations. */
13973 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
13974 {
13975 changed = 1;
13976 x = gen_rtx_PLUS (Pmode,
13977 gen_rtx_PLUS (Pmode, XEXP (x, 0),
13978 XEXP (XEXP (x, 1), 0)),
13979 XEXP (XEXP (x, 1), 1));
13980 }
13981
13982 /* Canonicalize
13983 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13984 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13985 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13986 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13987 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13988 && CONSTANT_P (XEXP (x, 1)))
13989 {
13990 rtx constant;
13991 rtx other = NULL_RTX;
13992
13993 if (CONST_INT_P (XEXP (x, 1)))
13994 {
13995 constant = XEXP (x, 1);
13996 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
13997 }
13998 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
13999 {
14000 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
14001 other = XEXP (x, 1);
14002 }
14003 else
14004 constant = 0;
14005
14006 if (constant)
14007 {
14008 changed = 1;
14009 x = gen_rtx_PLUS (Pmode,
14010 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
14011 XEXP (XEXP (XEXP (x, 0), 1), 0)),
14012 plus_constant (Pmode, other,
14013 INTVAL (constant)));
14014 }
14015 }
14016
14017 if (changed && ix86_legitimate_address_p (mode, x, false))
14018 return x;
14019
14020 if (GET_CODE (XEXP (x, 0)) == MULT)
14021 {
14022 changed = 1;
14023 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
14024 }
14025
14026 if (GET_CODE (XEXP (x, 1)) == MULT)
14027 {
14028 changed = 1;
14029 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
14030 }
14031
14032 if (changed
14033 && REG_P (XEXP (x, 1))
14034 && REG_P (XEXP (x, 0)))
14035 return x;
14036
14037 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
14038 {
14039 changed = 1;
14040 x = legitimize_pic_address (x, 0);
14041 }
14042
14043 if (changed && ix86_legitimate_address_p (mode, x, false))
14044 return x;
14045
14046 if (REG_P (XEXP (x, 0)))
14047 {
14048 rtx temp = gen_reg_rtx (Pmode);
14049 rtx val = force_operand (XEXP (x, 1), temp);
14050 if (val != temp)
14051 {
14052 val = convert_to_mode (Pmode, val, 1);
14053 emit_move_insn (temp, val);
14054 }
14055
14056 XEXP (x, 1) = temp;
14057 return x;
14058 }
14059
14060 else if (REG_P (XEXP (x, 1)))
14061 {
14062 rtx temp = gen_reg_rtx (Pmode);
14063 rtx val = force_operand (XEXP (x, 0), temp);
14064 if (val != temp)
14065 {
14066 val = convert_to_mode (Pmode, val, 1);
14067 emit_move_insn (temp, val);
14068 }
14069
14070 XEXP (x, 0) = temp;
14071 return x;
14072 }
14073 }
14074
14075 return x;
14076 }
14077 \f
14078 /* Print an integer constant expression in assembler syntax. Addition
14079 and subtraction are the only arithmetic that may appear in these
14080 expressions. FILE is the stdio stream to write to, X is the rtx, and
14081 CODE is the operand print code from the output string. */
14082
14083 static void
14084 output_pic_addr_const (FILE *file, rtx x, int code)
14085 {
14086 char buf[256];
14087
14088 switch (GET_CODE (x))
14089 {
14090 case PC:
14091 gcc_assert (flag_pic);
14092 putc ('.', file);
14093 break;
14094
14095 case SYMBOL_REF:
14096 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14097 output_addr_const (file, x);
14098 else
14099 {
14100 const char *name = XSTR (x, 0);
14101
14102 /* Mark the decl as referenced so that cgraph will
14103 output the function. */
14104 if (SYMBOL_REF_DECL (x))
14105 mark_decl_referenced (SYMBOL_REF_DECL (x));
14106
14107 #if TARGET_MACHO
14108 if (MACHOPIC_INDIRECT
14109 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14110 name = machopic_indirection_name (x, /*stub_p=*/true);
14111 #endif
14112 assemble_name (file, name);
14113 }
14114 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14115 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14116 fputs ("@PLT", file);
14117 break;
14118
14119 case LABEL_REF:
14120 x = XEXP (x, 0);
14121 /* FALLTHRU */
14122 case CODE_LABEL:
14123 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14124 assemble_name (asm_out_file, buf);
14125 break;
14126
14127 case CONST_INT:
14128 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14129 break;
14130
14131 case CONST:
14132 /* This used to output parentheses around the expression,
14133 but that does not work on the 386 (either ATT or BSD assembler). */
14134 output_pic_addr_const (file, XEXP (x, 0), code);
14135 break;
14136
14137 case CONST_DOUBLE:
14138 if (GET_MODE (x) == VOIDmode)
14139 {
14140 /* We can use %d if the number is <32 bits and positive. */
14141 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14142 fprintf (file, "0x%lx%08lx",
14143 (unsigned long) CONST_DOUBLE_HIGH (x),
14144 (unsigned long) CONST_DOUBLE_LOW (x));
14145 else
14146 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14147 }
14148 else
14149 /* We can't handle floating point constants;
14150 TARGET_PRINT_OPERAND must handle them. */
14151 output_operand_lossage ("floating constant misused");
14152 break;
14153
14154 case PLUS:
14155 /* Some assemblers need integer constants to appear first. */
14156 if (CONST_INT_P (XEXP (x, 0)))
14157 {
14158 output_pic_addr_const (file, XEXP (x, 0), code);
14159 putc ('+', file);
14160 output_pic_addr_const (file, XEXP (x, 1), code);
14161 }
14162 else
14163 {
14164 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14165 output_pic_addr_const (file, XEXP (x, 1), code);
14166 putc ('+', file);
14167 output_pic_addr_const (file, XEXP (x, 0), code);
14168 }
14169 break;
14170
14171 case MINUS:
14172 if (!TARGET_MACHO)
14173 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14174 output_pic_addr_const (file, XEXP (x, 0), code);
14175 putc ('-', file);
14176 output_pic_addr_const (file, XEXP (x, 1), code);
14177 if (!TARGET_MACHO)
14178 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14179 break;
14180
14181 case UNSPEC:
14182 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14183 {
14184 bool f = i386_asm_output_addr_const_extra (file, x);
14185 gcc_assert (f);
14186 break;
14187 }
14188
14189 gcc_assert (XVECLEN (x, 0) == 1);
14190 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14191 switch (XINT (x, 1))
14192 {
14193 case UNSPEC_GOT:
14194 fputs ("@GOT", file);
14195 break;
14196 case UNSPEC_GOTOFF:
14197 fputs ("@GOTOFF", file);
14198 break;
14199 case UNSPEC_PLTOFF:
14200 fputs ("@PLTOFF", file);
14201 break;
14202 case UNSPEC_PCREL:
14203 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14204 "(%rip)" : "[rip]", file);
14205 break;
14206 case UNSPEC_GOTPCREL:
14207 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14208 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14209 break;
14210 case UNSPEC_GOTTPOFF:
14211 /* FIXME: This might be @TPOFF in Sun ld too. */
14212 fputs ("@gottpoff", file);
14213 break;
14214 case UNSPEC_TPOFF:
14215 fputs ("@tpoff", file);
14216 break;
14217 case UNSPEC_NTPOFF:
14218 if (TARGET_64BIT)
14219 fputs ("@tpoff", file);
14220 else
14221 fputs ("@ntpoff", file);
14222 break;
14223 case UNSPEC_DTPOFF:
14224 fputs ("@dtpoff", file);
14225 break;
14226 case UNSPEC_GOTNTPOFF:
14227 if (TARGET_64BIT)
14228 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14229 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14230 else
14231 fputs ("@gotntpoff", file);
14232 break;
14233 case UNSPEC_INDNTPOFF:
14234 fputs ("@indntpoff", file);
14235 break;
14236 #if TARGET_MACHO
14237 case UNSPEC_MACHOPIC_OFFSET:
14238 putc ('-', file);
14239 machopic_output_function_base_name (file);
14240 break;
14241 #endif
14242 default:
14243 output_operand_lossage ("invalid UNSPEC as operand");
14244 break;
14245 }
14246 break;
14247
14248 default:
14249 output_operand_lossage ("invalid expression as operand");
14250 }
14251 }
14252
14253 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14254 We need to emit DTP-relative relocations. */
14255
14256 static void ATTRIBUTE_UNUSED
14257 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14258 {
14259 fputs (ASM_LONG, file);
14260 output_addr_const (file, x);
14261 fputs ("@dtpoff", file);
14262 switch (size)
14263 {
14264 case 4:
14265 break;
14266 case 8:
14267 fputs (", 0", file);
14268 break;
14269 default:
14270 gcc_unreachable ();
14271 }
14272 }
14273
14274 /* Return true if X is a representation of the PIC register. This copes
14275 with calls from ix86_find_base_term, where the register might have
14276 been replaced by a cselib value. */
14277
14278 static bool
14279 ix86_pic_register_p (rtx x)
14280 {
14281 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14282 return (pic_offset_table_rtx
14283 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14284 else if (!REG_P (x))
14285 return false;
14286 else if (pic_offset_table_rtx)
14287 {
14288 if (REGNO (x) == REGNO (pic_offset_table_rtx))
14289 return true;
14290 if (HARD_REGISTER_P (x)
14291 && !HARD_REGISTER_P (pic_offset_table_rtx)
14292 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
14293 return true;
14294 return false;
14295 }
14296 else
14297 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14298 }
14299
14300 /* Helper function for ix86_delegitimize_address.
14301 Attempt to delegitimize TLS local-exec accesses. */
14302
14303 static rtx
14304 ix86_delegitimize_tls_address (rtx orig_x)
14305 {
14306 rtx x = orig_x, unspec;
14307 struct ix86_address addr;
14308
14309 if (!TARGET_TLS_DIRECT_SEG_REFS)
14310 return orig_x;
14311 if (MEM_P (x))
14312 x = XEXP (x, 0);
14313 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14314 return orig_x;
14315 if (ix86_decompose_address (x, &addr) == 0
14316 || addr.seg != DEFAULT_TLS_SEG_REG
14317 || addr.disp == NULL_RTX
14318 || GET_CODE (addr.disp) != CONST)
14319 return orig_x;
14320 unspec = XEXP (addr.disp, 0);
14321 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14322 unspec = XEXP (unspec, 0);
14323 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14324 return orig_x;
14325 x = XVECEXP (unspec, 0, 0);
14326 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14327 if (unspec != XEXP (addr.disp, 0))
14328 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14329 if (addr.index)
14330 {
14331 rtx idx = addr.index;
14332 if (addr.scale != 1)
14333 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14334 x = gen_rtx_PLUS (Pmode, idx, x);
14335 }
14336 if (addr.base)
14337 x = gen_rtx_PLUS (Pmode, addr.base, x);
14338 if (MEM_P (orig_x))
14339 x = replace_equiv_address_nv (orig_x, x);
14340 return x;
14341 }
14342
14343 /* In the name of slightly smaller debug output, and to cater to
14344 general assembler lossage, recognize PIC+GOTOFF and turn it back
14345 into a direct symbol reference.
14346
14347 On Darwin, this is necessary to avoid a crash, because Darwin
14348 has a different PIC label for each routine but the DWARF debugging
14349 information is not associated with any particular routine, so it's
14350 necessary to remove references to the PIC label from RTL stored by
14351 the DWARF output code. */
14352
14353 static rtx
14354 ix86_delegitimize_address (rtx x)
14355 {
14356 rtx orig_x = delegitimize_mem_from_attrs (x);
14357 /* addend is NULL or some rtx if x is something+GOTOFF where
14358 something doesn't include the PIC register. */
14359 rtx addend = NULL_RTX;
14360 /* reg_addend is NULL or a multiple of some register. */
14361 rtx reg_addend = NULL_RTX;
14362 /* const_addend is NULL or a const_int. */
14363 rtx const_addend = NULL_RTX;
14364 /* This is the result, or NULL. */
14365 rtx result = NULL_RTX;
14366
14367 x = orig_x;
14368
14369 if (MEM_P (x))
14370 x = XEXP (x, 0);
14371
14372 if (TARGET_64BIT)
14373 {
14374 if (GET_CODE (x) == CONST
14375 && GET_CODE (XEXP (x, 0)) == PLUS
14376 && GET_MODE (XEXP (x, 0)) == Pmode
14377 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14378 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14379 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14380 {
14381 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14382 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14383 if (MEM_P (orig_x))
14384 x = replace_equiv_address_nv (orig_x, x);
14385 return x;
14386 }
14387
14388 if (GET_CODE (x) == CONST
14389 && GET_CODE (XEXP (x, 0)) == UNSPEC
14390 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14391 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14392 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14393 {
14394 x = XVECEXP (XEXP (x, 0), 0, 0);
14395 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14396 {
14397 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14398 GET_MODE (x), 0);
14399 if (x == NULL_RTX)
14400 return orig_x;
14401 }
14402 return x;
14403 }
14404
14405 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14406 return ix86_delegitimize_tls_address (orig_x);
14407
14408 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14409 and -mcmodel=medium -fpic. */
14410 }
14411
14412 if (GET_CODE (x) != PLUS
14413 || GET_CODE (XEXP (x, 1)) != CONST)
14414 return ix86_delegitimize_tls_address (orig_x);
14415
14416 if (ix86_pic_register_p (XEXP (x, 0)))
14417 /* %ebx + GOT/GOTOFF */
14418 ;
14419 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14420 {
14421 /* %ebx + %reg * scale + GOT/GOTOFF */
14422 reg_addend = XEXP (x, 0);
14423 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14424 reg_addend = XEXP (reg_addend, 1);
14425 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14426 reg_addend = XEXP (reg_addend, 0);
14427 else
14428 {
14429 reg_addend = NULL_RTX;
14430 addend = XEXP (x, 0);
14431 }
14432 }
14433 else
14434 addend = XEXP (x, 0);
14435
14436 x = XEXP (XEXP (x, 1), 0);
14437 if (GET_CODE (x) == PLUS
14438 && CONST_INT_P (XEXP (x, 1)))
14439 {
14440 const_addend = XEXP (x, 1);
14441 x = XEXP (x, 0);
14442 }
14443
14444 if (GET_CODE (x) == UNSPEC
14445 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14446 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14447 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14448 && !MEM_P (orig_x) && !addend)))
14449 result = XVECEXP (x, 0, 0);
14450
14451 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14452 && !MEM_P (orig_x))
14453 result = XVECEXP (x, 0, 0);
14454
14455 if (! result)
14456 return ix86_delegitimize_tls_address (orig_x);
14457
14458 if (const_addend)
14459 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14460 if (reg_addend)
14461 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14462 if (addend)
14463 {
14464 /* If the rest of original X doesn't involve the PIC register, add
14465 addend and subtract pic_offset_table_rtx. This can happen e.g.
14466 for code like:
14467 leal (%ebx, %ecx, 4), %ecx
14468 ...
14469 movl foo@GOTOFF(%ecx), %edx
14470 in which case we return (%ecx - %ebx) + foo
14471 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
14472 and reload has completed. */
14473 if (pic_offset_table_rtx
14474 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
14475 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14476 pic_offset_table_rtx),
14477 result);
14478 else if (pic_offset_table_rtx && !TARGET_MACHO && !TARGET_VXWORKS_RTP)
14479 {
14480 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
14481 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
14482 result = gen_rtx_PLUS (Pmode, tmp, result);
14483 }
14484 else
14485 return orig_x;
14486 }
14487 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14488 {
14489 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14490 if (result == NULL_RTX)
14491 return orig_x;
14492 }
14493 return result;
14494 }
14495
14496 /* If X is a machine specific address (i.e. a symbol or label being
14497 referenced as a displacement from the GOT implemented using an
14498 UNSPEC), then return the base term. Otherwise return X. */
14499
14500 rtx
14501 ix86_find_base_term (rtx x)
14502 {
14503 rtx term;
14504
14505 if (TARGET_64BIT)
14506 {
14507 if (GET_CODE (x) != CONST)
14508 return x;
14509 term = XEXP (x, 0);
14510 if (GET_CODE (term) == PLUS
14511 && (CONST_INT_P (XEXP (term, 1))
14512 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
14513 term = XEXP (term, 0);
14514 if (GET_CODE (term) != UNSPEC
14515 || (XINT (term, 1) != UNSPEC_GOTPCREL
14516 && XINT (term, 1) != UNSPEC_PCREL))
14517 return x;
14518
14519 return XVECEXP (term, 0, 0);
14520 }
14521
14522 return ix86_delegitimize_address (x);
14523 }
14524 \f
14525 static void
14526 put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
14527 bool fp, FILE *file)
14528 {
14529 const char *suffix;
14530
14531 if (mode == CCFPmode || mode == CCFPUmode)
14532 {
14533 code = ix86_fp_compare_code_to_integer (code);
14534 mode = CCmode;
14535 }
14536 if (reverse)
14537 code = reverse_condition (code);
14538
14539 switch (code)
14540 {
14541 case EQ:
14542 switch (mode)
14543 {
14544 case CCAmode:
14545 suffix = "a";
14546 break;
14547
14548 case CCCmode:
14549 suffix = "c";
14550 break;
14551
14552 case CCOmode:
14553 suffix = "o";
14554 break;
14555
14556 case CCSmode:
14557 suffix = "s";
14558 break;
14559
14560 default:
14561 suffix = "e";
14562 }
14563 break;
14564 case NE:
14565 switch (mode)
14566 {
14567 case CCAmode:
14568 suffix = "na";
14569 break;
14570
14571 case CCCmode:
14572 suffix = "nc";
14573 break;
14574
14575 case CCOmode:
14576 suffix = "no";
14577 break;
14578
14579 case CCSmode:
14580 suffix = "ns";
14581 break;
14582
14583 default:
14584 suffix = "ne";
14585 }
14586 break;
14587 case GT:
14588 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
14589 suffix = "g";
14590 break;
14591 case GTU:
14592 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14593 Those same assemblers have the same but opposite lossage on cmov. */
14594 if (mode == CCmode)
14595 suffix = fp ? "nbe" : "a";
14596 else
14597 gcc_unreachable ();
14598 break;
14599 case LT:
14600 switch (mode)
14601 {
14602 case CCNOmode:
14603 case CCGOCmode:
14604 suffix = "s";
14605 break;
14606
14607 case CCmode:
14608 case CCGCmode:
14609 suffix = "l";
14610 break;
14611
14612 default:
14613 gcc_unreachable ();
14614 }
14615 break;
14616 case LTU:
14617 if (mode == CCmode)
14618 suffix = "b";
14619 else if (mode == CCCmode)
14620 suffix = "c";
14621 else
14622 gcc_unreachable ();
14623 break;
14624 case GE:
14625 switch (mode)
14626 {
14627 case CCNOmode:
14628 case CCGOCmode:
14629 suffix = "ns";
14630 break;
14631
14632 case CCmode:
14633 case CCGCmode:
14634 suffix = "ge";
14635 break;
14636
14637 default:
14638 gcc_unreachable ();
14639 }
14640 break;
14641 case GEU:
14642 if (mode == CCmode)
14643 suffix = fp ? "nb" : "ae";
14644 else if (mode == CCCmode)
14645 suffix = "nc";
14646 else
14647 gcc_unreachable ();
14648 break;
14649 case LE:
14650 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
14651 suffix = "le";
14652 break;
14653 case LEU:
14654 if (mode == CCmode)
14655 suffix = "be";
14656 else
14657 gcc_unreachable ();
14658 break;
14659 case UNORDERED:
14660 suffix = fp ? "u" : "p";
14661 break;
14662 case ORDERED:
14663 suffix = fp ? "nu" : "np";
14664 break;
14665 default:
14666 gcc_unreachable ();
14667 }
14668 fputs (suffix, file);
14669 }
14670
14671 /* Print the name of register X to FILE based on its machine mode and number.
14672 If CODE is 'w', pretend the mode is HImode.
14673 If CODE is 'b', pretend the mode is QImode.
14674 If CODE is 'k', pretend the mode is SImode.
14675 If CODE is 'q', pretend the mode is DImode.
14676 If CODE is 'x', pretend the mode is V4SFmode.
14677 If CODE is 't', pretend the mode is V8SFmode.
14678 If CODE is 'g', pretend the mode is V16SFmode.
14679 If CODE is 'h', pretend the reg is the 'high' byte register.
14680 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
14681 If CODE is 'd', duplicate the operand for AVX instruction.
14682 */
14683
14684 void
14685 print_reg (rtx x, int code, FILE *file)
14686 {
14687 const char *reg;
14688 unsigned int regno;
14689 bool duplicated = code == 'd' && TARGET_AVX;
14690
14691 if (ASSEMBLER_DIALECT == ASM_ATT)
14692 putc ('%', file);
14693
14694 if (x == pc_rtx)
14695 {
14696 gcc_assert (TARGET_64BIT);
14697 fputs ("rip", file);
14698 return;
14699 }
14700
14701 regno = true_regnum (x);
14702 gcc_assert (regno != ARG_POINTER_REGNUM
14703 && regno != FRAME_POINTER_REGNUM
14704 && regno != FLAGS_REG
14705 && regno != FPSR_REG
14706 && regno != FPCR_REG);
14707
14708 if (code == 'w' || MMX_REG_P (x))
14709 code = 2;
14710 else if (code == 'b')
14711 code = 1;
14712 else if (code == 'k')
14713 code = 4;
14714 else if (code == 'q')
14715 code = 8;
14716 else if (code == 'y')
14717 code = 3;
14718 else if (code == 'h')
14719 code = 0;
14720 else if (code == 'x')
14721 code = 16;
14722 else if (code == 't')
14723 code = 32;
14724 else if (code == 'g')
14725 code = 64;
14726 else
14727 code = GET_MODE_SIZE (GET_MODE (x));
14728
14729 /* Irritatingly, AMD extended registers use different naming convention
14730 from the normal registers: "r%d[bwd]" */
14731 if (REX_INT_REGNO_P (regno))
14732 {
14733 gcc_assert (TARGET_64BIT);
14734 putc ('r', file);
14735 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
14736 switch (code)
14737 {
14738 case 0:
14739 error ("extended registers have no high halves");
14740 break;
14741 case 1:
14742 putc ('b', file);
14743 break;
14744 case 2:
14745 putc ('w', file);
14746 break;
14747 case 4:
14748 putc ('d', file);
14749 break;
14750 case 8:
14751 /* no suffix */
14752 break;
14753 default:
14754 error ("unsupported operand size for extended register");
14755 break;
14756 }
14757 return;
14758 }
14759
14760 reg = NULL;
14761 switch (code)
14762 {
14763 case 3:
14764 if (STACK_TOP_P (x))
14765 {
14766 reg = "st(0)";
14767 break;
14768 }
14769 /* FALLTHRU */
14770 case 8:
14771 case 4:
14772 case 12:
14773 if (! ANY_FP_REG_P (x) && ! ANY_MASK_REG_P (x))
14774 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
14775 /* FALLTHRU */
14776 case 16:
14777 case 2:
14778 normal:
14779 reg = hi_reg_name[regno];
14780 break;
14781 case 1:
14782 if (regno >= ARRAY_SIZE (qi_reg_name))
14783 goto normal;
14784 reg = qi_reg_name[regno];
14785 break;
14786 case 0:
14787 if (regno >= ARRAY_SIZE (qi_high_reg_name))
14788 goto normal;
14789 reg = qi_high_reg_name[regno];
14790 break;
14791 case 32:
14792 if (SSE_REG_P (x))
14793 {
14794 gcc_assert (!duplicated);
14795 putc ('y', file);
14796 fputs (hi_reg_name[regno] + 1, file);
14797 return;
14798 }
14799 case 64:
14800 if (SSE_REG_P (x))
14801 {
14802 gcc_assert (!duplicated);
14803 putc ('z', file);
14804 fputs (hi_reg_name[REGNO (x)] + 1, file);
14805 return;
14806 }
14807 break;
14808 default:
14809 gcc_unreachable ();
14810 }
14811
14812 fputs (reg, file);
14813 if (duplicated)
14814 {
14815 if (ASSEMBLER_DIALECT == ASM_ATT)
14816 fprintf (file, ", %%%s", reg);
14817 else
14818 fprintf (file, ", %s", reg);
14819 }
14820 }
14821
14822 /* Meaning of CODE:
14823 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14824 C -- print opcode suffix for set/cmov insn.
14825 c -- like C, but print reversed condition
14826 F,f -- likewise, but for floating-point.
14827 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14828 otherwise nothing
14829 R -- print embeded rounding and sae.
14830 r -- print only sae.
14831 z -- print the opcode suffix for the size of the current operand.
14832 Z -- likewise, with special suffixes for x87 instructions.
14833 * -- print a star (in certain assembler syntax)
14834 A -- print an absolute memory reference.
14835 E -- print address with DImode register names if TARGET_64BIT.
14836 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14837 s -- print a shift double count, followed by the assemblers argument
14838 delimiter.
14839 b -- print the QImode name of the register for the indicated operand.
14840 %b0 would print %al if operands[0] is reg 0.
14841 w -- likewise, print the HImode name of the register.
14842 k -- likewise, print the SImode name of the register.
14843 q -- likewise, print the DImode name of the register.
14844 x -- likewise, print the V4SFmode name of the register.
14845 t -- likewise, print the V8SFmode name of the register.
14846 g -- likewise, print the V16SFmode name of the register.
14847 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14848 y -- print "st(0)" instead of "st" as a register.
14849 d -- print duplicated register operand for AVX instruction.
14850 D -- print condition for SSE cmp instruction.
14851 P -- if PIC, print an @PLT suffix.
14852 p -- print raw symbol name.
14853 X -- don't print any sort of PIC '@' suffix for a symbol.
14854 & -- print some in-use local-dynamic symbol name.
14855 H -- print a memory address offset by 8; used for sse high-parts
14856 Y -- print condition for XOP pcom* instruction.
14857 + -- print a branch hint as 'cs' or 'ds' prefix
14858 ; -- print a semicolon (after prefixes due to bug in older gas).
14859 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14860 @ -- print a segment register of thread base pointer load
14861 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14862 */
14863
14864 void
14865 ix86_print_operand (FILE *file, rtx x, int code)
14866 {
14867 if (code)
14868 {
14869 switch (code)
14870 {
14871 case 'A':
14872 switch (ASSEMBLER_DIALECT)
14873 {
14874 case ASM_ATT:
14875 putc ('*', file);
14876 break;
14877
14878 case ASM_INTEL:
14879 /* Intel syntax. For absolute addresses, registers should not
14880 be surrounded by braces. */
14881 if (!REG_P (x))
14882 {
14883 putc ('[', file);
14884 ix86_print_operand (file, x, 0);
14885 putc (']', file);
14886 return;
14887 }
14888 break;
14889
14890 default:
14891 gcc_unreachable ();
14892 }
14893
14894 ix86_print_operand (file, x, 0);
14895 return;
14896
14897 case 'E':
14898 /* Wrap address in an UNSPEC to declare special handling. */
14899 if (TARGET_64BIT)
14900 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
14901
14902 output_address (x);
14903 return;
14904
14905 case 'L':
14906 if (ASSEMBLER_DIALECT == ASM_ATT)
14907 putc ('l', file);
14908 return;
14909
14910 case 'W':
14911 if (ASSEMBLER_DIALECT == ASM_ATT)
14912 putc ('w', file);
14913 return;
14914
14915 case 'B':
14916 if (ASSEMBLER_DIALECT == ASM_ATT)
14917 putc ('b', file);
14918 return;
14919
14920 case 'Q':
14921 if (ASSEMBLER_DIALECT == ASM_ATT)
14922 putc ('l', file);
14923 return;
14924
14925 case 'S':
14926 if (ASSEMBLER_DIALECT == ASM_ATT)
14927 putc ('s', file);
14928 return;
14929
14930 case 'T':
14931 if (ASSEMBLER_DIALECT == ASM_ATT)
14932 putc ('t', file);
14933 return;
14934
14935 case 'O':
14936 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14937 if (ASSEMBLER_DIALECT != ASM_ATT)
14938 return;
14939
14940 switch (GET_MODE_SIZE (GET_MODE (x)))
14941 {
14942 case 2:
14943 putc ('w', file);
14944 break;
14945
14946 case 4:
14947 putc ('l', file);
14948 break;
14949
14950 case 8:
14951 putc ('q', file);
14952 break;
14953
14954 default:
14955 output_operand_lossage
14956 ("invalid operand size for operand code 'O'");
14957 return;
14958 }
14959
14960 putc ('.', file);
14961 #endif
14962 return;
14963
14964 case 'z':
14965 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14966 {
14967 /* Opcodes don't get size suffixes if using Intel opcodes. */
14968 if (ASSEMBLER_DIALECT == ASM_INTEL)
14969 return;
14970
14971 switch (GET_MODE_SIZE (GET_MODE (x)))
14972 {
14973 case 1:
14974 putc ('b', file);
14975 return;
14976
14977 case 2:
14978 putc ('w', file);
14979 return;
14980
14981 case 4:
14982 putc ('l', file);
14983 return;
14984
14985 case 8:
14986 putc ('q', file);
14987 return;
14988
14989 default:
14990 output_operand_lossage
14991 ("invalid operand size for operand code 'z'");
14992 return;
14993 }
14994 }
14995
14996 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14997 warning
14998 (0, "non-integer operand used with operand code 'z'");
14999 /* FALLTHRU */
15000
15001 case 'Z':
15002 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
15003 if (ASSEMBLER_DIALECT == ASM_INTEL)
15004 return;
15005
15006 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15007 {
15008 switch (GET_MODE_SIZE (GET_MODE (x)))
15009 {
15010 case 2:
15011 #ifdef HAVE_AS_IX86_FILDS
15012 putc ('s', file);
15013 #endif
15014 return;
15015
15016 case 4:
15017 putc ('l', file);
15018 return;
15019
15020 case 8:
15021 #ifdef HAVE_AS_IX86_FILDQ
15022 putc ('q', file);
15023 #else
15024 fputs ("ll", file);
15025 #endif
15026 return;
15027
15028 default:
15029 break;
15030 }
15031 }
15032 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15033 {
15034 /* 387 opcodes don't get size suffixes
15035 if the operands are registers. */
15036 if (STACK_REG_P (x))
15037 return;
15038
15039 switch (GET_MODE_SIZE (GET_MODE (x)))
15040 {
15041 case 4:
15042 putc ('s', file);
15043 return;
15044
15045 case 8:
15046 putc ('l', file);
15047 return;
15048
15049 case 12:
15050 case 16:
15051 putc ('t', file);
15052 return;
15053
15054 default:
15055 break;
15056 }
15057 }
15058 else
15059 {
15060 output_operand_lossage
15061 ("invalid operand type used with operand code 'Z'");
15062 return;
15063 }
15064
15065 output_operand_lossage
15066 ("invalid operand size for operand code 'Z'");
15067 return;
15068
15069 case 'd':
15070 case 'b':
15071 case 'w':
15072 case 'k':
15073 case 'q':
15074 case 'h':
15075 case 't':
15076 case 'g':
15077 case 'y':
15078 case 'x':
15079 case 'X':
15080 case 'P':
15081 case 'p':
15082 break;
15083
15084 case 's':
15085 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15086 {
15087 ix86_print_operand (file, x, 0);
15088 fputs (", ", file);
15089 }
15090 return;
15091
15092 case 'Y':
15093 switch (GET_CODE (x))
15094 {
15095 case NE:
15096 fputs ("neq", file);
15097 break;
15098 case EQ:
15099 fputs ("eq", file);
15100 break;
15101 case GE:
15102 case GEU:
15103 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15104 break;
15105 case GT:
15106 case GTU:
15107 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15108 break;
15109 case LE:
15110 case LEU:
15111 fputs ("le", file);
15112 break;
15113 case LT:
15114 case LTU:
15115 fputs ("lt", file);
15116 break;
15117 case UNORDERED:
15118 fputs ("unord", file);
15119 break;
15120 case ORDERED:
15121 fputs ("ord", file);
15122 break;
15123 case UNEQ:
15124 fputs ("ueq", file);
15125 break;
15126 case UNGE:
15127 fputs ("nlt", file);
15128 break;
15129 case UNGT:
15130 fputs ("nle", file);
15131 break;
15132 case UNLE:
15133 fputs ("ule", file);
15134 break;
15135 case UNLT:
15136 fputs ("ult", file);
15137 break;
15138 case LTGT:
15139 fputs ("une", file);
15140 break;
15141 default:
15142 output_operand_lossage ("operand is not a condition code, "
15143 "invalid operand code 'Y'");
15144 return;
15145 }
15146 return;
15147
15148 case 'D':
15149 /* Little bit of braindamage here. The SSE compare instructions
15150 does use completely different names for the comparisons that the
15151 fp conditional moves. */
15152 switch (GET_CODE (x))
15153 {
15154 case UNEQ:
15155 if (TARGET_AVX)
15156 {
15157 fputs ("eq_us", file);
15158 break;
15159 }
15160 case EQ:
15161 fputs ("eq", file);
15162 break;
15163 case UNLT:
15164 if (TARGET_AVX)
15165 {
15166 fputs ("nge", file);
15167 break;
15168 }
15169 case LT:
15170 fputs ("lt", file);
15171 break;
15172 case UNLE:
15173 if (TARGET_AVX)
15174 {
15175 fputs ("ngt", file);
15176 break;
15177 }
15178 case LE:
15179 fputs ("le", file);
15180 break;
15181 case UNORDERED:
15182 fputs ("unord", file);
15183 break;
15184 case LTGT:
15185 if (TARGET_AVX)
15186 {
15187 fputs ("neq_oq", file);
15188 break;
15189 }
15190 case NE:
15191 fputs ("neq", file);
15192 break;
15193 case GE:
15194 if (TARGET_AVX)
15195 {
15196 fputs ("ge", file);
15197 break;
15198 }
15199 case UNGE:
15200 fputs ("nlt", file);
15201 break;
15202 case GT:
15203 if (TARGET_AVX)
15204 {
15205 fputs ("gt", file);
15206 break;
15207 }
15208 case UNGT:
15209 fputs ("nle", file);
15210 break;
15211 case ORDERED:
15212 fputs ("ord", file);
15213 break;
15214 default:
15215 output_operand_lossage ("operand is not a condition code, "
15216 "invalid operand code 'D'");
15217 return;
15218 }
15219 return;
15220
15221 case 'F':
15222 case 'f':
15223 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15224 if (ASSEMBLER_DIALECT == ASM_ATT)
15225 putc ('.', file);
15226 #endif
15227
15228 case 'C':
15229 case 'c':
15230 if (!COMPARISON_P (x))
15231 {
15232 output_operand_lossage ("operand is not a condition code, "
15233 "invalid operand code '%c'", code);
15234 return;
15235 }
15236 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15237 code == 'c' || code == 'f',
15238 code == 'F' || code == 'f',
15239 file);
15240 return;
15241
15242 case 'H':
15243 if (!offsettable_memref_p (x))
15244 {
15245 output_operand_lossage ("operand is not an offsettable memory "
15246 "reference, invalid operand code 'H'");
15247 return;
15248 }
15249 /* It doesn't actually matter what mode we use here, as we're
15250 only going to use this for printing. */
15251 x = adjust_address_nv (x, DImode, 8);
15252 /* Output 'qword ptr' for intel assembler dialect. */
15253 if (ASSEMBLER_DIALECT == ASM_INTEL)
15254 code = 'q';
15255 break;
15256
15257 case 'K':
15258 gcc_assert (CONST_INT_P (x));
15259
15260 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15261 #ifdef HAVE_AS_IX86_HLE
15262 fputs ("xacquire ", file);
15263 #else
15264 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15265 #endif
15266 else if (INTVAL (x) & IX86_HLE_RELEASE)
15267 #ifdef HAVE_AS_IX86_HLE
15268 fputs ("xrelease ", file);
15269 #else
15270 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15271 #endif
15272 /* We do not want to print value of the operand. */
15273 return;
15274
15275 case 'N':
15276 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15277 fputs ("{z}", file);
15278 return;
15279
15280 case 'r':
15281 gcc_assert (CONST_INT_P (x));
15282 gcc_assert (INTVAL (x) == ROUND_SAE);
15283
15284 if (ASSEMBLER_DIALECT == ASM_INTEL)
15285 fputs (", ", file);
15286
15287 fputs ("{sae}", file);
15288
15289 if (ASSEMBLER_DIALECT == ASM_ATT)
15290 fputs (", ", file);
15291
15292 return;
15293
15294 case 'R':
15295 gcc_assert (CONST_INT_P (x));
15296
15297 if (ASSEMBLER_DIALECT == ASM_INTEL)
15298 fputs (", ", file);
15299
15300 switch (INTVAL (x))
15301 {
15302 case ROUND_NEAREST_INT | ROUND_SAE:
15303 fputs ("{rn-sae}", file);
15304 break;
15305 case ROUND_NEG_INF | ROUND_SAE:
15306 fputs ("{rd-sae}", file);
15307 break;
15308 case ROUND_POS_INF | ROUND_SAE:
15309 fputs ("{ru-sae}", file);
15310 break;
15311 case ROUND_ZERO | ROUND_SAE:
15312 fputs ("{rz-sae}", file);
15313 break;
15314 default:
15315 gcc_unreachable ();
15316 }
15317
15318 if (ASSEMBLER_DIALECT == ASM_ATT)
15319 fputs (", ", file);
15320
15321 return;
15322
15323 case '*':
15324 if (ASSEMBLER_DIALECT == ASM_ATT)
15325 putc ('*', file);
15326 return;
15327
15328 case '&':
15329 {
15330 const char *name = get_some_local_dynamic_name ();
15331 if (name == NULL)
15332 output_operand_lossage ("'%%&' used without any "
15333 "local dynamic TLS references");
15334 else
15335 assemble_name (file, name);
15336 return;
15337 }
15338
15339 case '+':
15340 {
15341 rtx x;
15342
15343 if (!optimize
15344 || optimize_function_for_size_p (cfun)
15345 || !TARGET_BRANCH_PREDICTION_HINTS)
15346 return;
15347
15348 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15349 if (x)
15350 {
15351 int pred_val = XINT (x, 0);
15352
15353 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15354 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15355 {
15356 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15357 bool cputaken
15358 = final_forward_branch_p (current_output_insn) == 0;
15359
15360 /* Emit hints only in the case default branch prediction
15361 heuristics would fail. */
15362 if (taken != cputaken)
15363 {
15364 /* We use 3e (DS) prefix for taken branches and
15365 2e (CS) prefix for not taken branches. */
15366 if (taken)
15367 fputs ("ds ; ", file);
15368 else
15369 fputs ("cs ; ", file);
15370 }
15371 }
15372 }
15373 return;
15374 }
15375
15376 case ';':
15377 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15378 putc (';', file);
15379 #endif
15380 return;
15381
15382 case '@':
15383 if (ASSEMBLER_DIALECT == ASM_ATT)
15384 putc ('%', file);
15385
15386 /* The kernel uses a different segment register for performance
15387 reasons; a system call would not have to trash the userspace
15388 segment register, which would be expensive. */
15389 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15390 fputs ("fs", file);
15391 else
15392 fputs ("gs", file);
15393 return;
15394
15395 case '~':
15396 putc (TARGET_AVX2 ? 'i' : 'f', file);
15397 return;
15398
15399 case '^':
15400 if (TARGET_64BIT && Pmode != word_mode)
15401 fputs ("addr32 ", file);
15402 return;
15403
15404 default:
15405 output_operand_lossage ("invalid operand code '%c'", code);
15406 }
15407 }
15408
15409 if (REG_P (x))
15410 print_reg (x, code, file);
15411
15412 else if (MEM_P (x))
15413 {
15414 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15415 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15416 && GET_MODE (x) != BLKmode)
15417 {
15418 const char * size;
15419 switch (GET_MODE_SIZE (GET_MODE (x)))
15420 {
15421 case 1: size = "BYTE"; break;
15422 case 2: size = "WORD"; break;
15423 case 4: size = "DWORD"; break;
15424 case 8: size = "QWORD"; break;
15425 case 12: size = "TBYTE"; break;
15426 case 16:
15427 if (GET_MODE (x) == XFmode)
15428 size = "TBYTE";
15429 else
15430 size = "XMMWORD";
15431 break;
15432 case 32: size = "YMMWORD"; break;
15433 case 64: size = "ZMMWORD"; break;
15434 default:
15435 gcc_unreachable ();
15436 }
15437
15438 /* Check for explicit size override (codes 'b', 'w', 'k',
15439 'q' and 'x') */
15440 if (code == 'b')
15441 size = "BYTE";
15442 else if (code == 'w')
15443 size = "WORD";
15444 else if (code == 'k')
15445 size = "DWORD";
15446 else if (code == 'q')
15447 size = "QWORD";
15448 else if (code == 'x')
15449 size = "XMMWORD";
15450
15451 fputs (size, file);
15452 fputs (" PTR ", file);
15453 }
15454
15455 x = XEXP (x, 0);
15456 /* Avoid (%rip) for call operands. */
15457 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15458 && !CONST_INT_P (x))
15459 output_addr_const (file, x);
15460 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15461 output_operand_lossage ("invalid constraints for operand");
15462 else
15463 output_address (x);
15464 }
15465
15466 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
15467 {
15468 REAL_VALUE_TYPE r;
15469 long l;
15470
15471 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15472 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15473
15474 if (ASSEMBLER_DIALECT == ASM_ATT)
15475 putc ('$', file);
15476 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15477 if (code == 'q')
15478 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15479 (unsigned long long) (int) l);
15480 else
15481 fprintf (file, "0x%08x", (unsigned int) l);
15482 }
15483
15484 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
15485 {
15486 REAL_VALUE_TYPE r;
15487 long l[2];
15488
15489 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15490 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15491
15492 if (ASSEMBLER_DIALECT == ASM_ATT)
15493 putc ('$', file);
15494 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
15495 }
15496
15497 /* These float cases don't actually occur as immediate operands. */
15498 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
15499 {
15500 char dstr[30];
15501
15502 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
15503 fputs (dstr, file);
15504 }
15505
15506 else
15507 {
15508 /* We have patterns that allow zero sets of memory, for instance.
15509 In 64-bit mode, we should probably support all 8-byte vectors,
15510 since we can in fact encode that into an immediate. */
15511 if (GET_CODE (x) == CONST_VECTOR)
15512 {
15513 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
15514 x = const0_rtx;
15515 }
15516
15517 if (code != 'P' && code != 'p')
15518 {
15519 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
15520 {
15521 if (ASSEMBLER_DIALECT == ASM_ATT)
15522 putc ('$', file);
15523 }
15524 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
15525 || GET_CODE (x) == LABEL_REF)
15526 {
15527 if (ASSEMBLER_DIALECT == ASM_ATT)
15528 putc ('$', file);
15529 else
15530 fputs ("OFFSET FLAT:", file);
15531 }
15532 }
15533 if (CONST_INT_P (x))
15534 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15535 else if (flag_pic || MACHOPIC_INDIRECT)
15536 output_pic_addr_const (file, x, code);
15537 else
15538 output_addr_const (file, x);
15539 }
15540 }
15541
15542 static bool
15543 ix86_print_operand_punct_valid_p (unsigned char code)
15544 {
15545 return (code == '@' || code == '*' || code == '+' || code == '&'
15546 || code == ';' || code == '~' || code == '^');
15547 }
15548 \f
15549 /* Print a memory operand whose address is ADDR. */
15550
15551 static void
15552 ix86_print_operand_address (FILE *file, rtx addr)
15553 {
15554 struct ix86_address parts;
15555 rtx base, index, disp;
15556 int scale;
15557 int ok;
15558 bool vsib = false;
15559 int code = 0;
15560
15561 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
15562 {
15563 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15564 gcc_assert (parts.index == NULL_RTX);
15565 parts.index = XVECEXP (addr, 0, 1);
15566 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
15567 addr = XVECEXP (addr, 0, 0);
15568 vsib = true;
15569 }
15570 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
15571 {
15572 gcc_assert (TARGET_64BIT);
15573 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15574 code = 'q';
15575 }
15576 else
15577 ok = ix86_decompose_address (addr, &parts);
15578
15579 gcc_assert (ok);
15580
15581 base = parts.base;
15582 index = parts.index;
15583 disp = parts.disp;
15584 scale = parts.scale;
15585
15586 switch (parts.seg)
15587 {
15588 case SEG_DEFAULT:
15589 break;
15590 case SEG_FS:
15591 case SEG_GS:
15592 if (ASSEMBLER_DIALECT == ASM_ATT)
15593 putc ('%', file);
15594 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
15595 break;
15596 default:
15597 gcc_unreachable ();
15598 }
15599
15600 /* Use one byte shorter RIP relative addressing for 64bit mode. */
15601 if (TARGET_64BIT && !base && !index)
15602 {
15603 rtx symbol = disp;
15604
15605 if (GET_CODE (disp) == CONST
15606 && GET_CODE (XEXP (disp, 0)) == PLUS
15607 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15608 symbol = XEXP (XEXP (disp, 0), 0);
15609
15610 if (GET_CODE (symbol) == LABEL_REF
15611 || (GET_CODE (symbol) == SYMBOL_REF
15612 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
15613 base = pc_rtx;
15614 }
15615 if (!base && !index)
15616 {
15617 /* Displacement only requires special attention. */
15618
15619 if (CONST_INT_P (disp))
15620 {
15621 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
15622 fputs ("ds:", file);
15623 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
15624 }
15625 else if (flag_pic)
15626 output_pic_addr_const (file, disp, 0);
15627 else
15628 output_addr_const (file, disp);
15629 }
15630 else
15631 {
15632 /* Print SImode register names to force addr32 prefix. */
15633 if (SImode_address_operand (addr, VOIDmode))
15634 {
15635 #ifdef ENABLE_CHECKING
15636 gcc_assert (TARGET_64BIT);
15637 switch (GET_CODE (addr))
15638 {
15639 case SUBREG:
15640 gcc_assert (GET_MODE (addr) == SImode);
15641 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
15642 break;
15643 case ZERO_EXTEND:
15644 case AND:
15645 gcc_assert (GET_MODE (addr) == DImode);
15646 break;
15647 default:
15648 gcc_unreachable ();
15649 }
15650 #endif
15651 gcc_assert (!code);
15652 code = 'k';
15653 }
15654 else if (code == 0
15655 && TARGET_X32
15656 && disp
15657 && CONST_INT_P (disp)
15658 && INTVAL (disp) < -16*1024*1024)
15659 {
15660 /* X32 runs in 64-bit mode, where displacement, DISP, in
15661 address DISP(%r64), is encoded as 32-bit immediate sign-
15662 extended from 32-bit to 64-bit. For -0x40000300(%r64),
15663 address is %r64 + 0xffffffffbffffd00. When %r64 <
15664 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
15665 which is invalid for x32. The correct address is %r64
15666 - 0x40000300 == 0xf7ffdd64. To properly encode
15667 -0x40000300(%r64) for x32, we zero-extend negative
15668 displacement by forcing addr32 prefix which truncates
15669 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
15670 zero-extend all negative displacements, including -1(%rsp).
15671 However, for small negative displacements, sign-extension
15672 won't cause overflow. We only zero-extend negative
15673 displacements if they < -16*1024*1024, which is also used
15674 to check legitimate address displacements for PIC. */
15675 code = 'k';
15676 }
15677
15678 if (ASSEMBLER_DIALECT == ASM_ATT)
15679 {
15680 if (disp)
15681 {
15682 if (flag_pic)
15683 output_pic_addr_const (file, disp, 0);
15684 else if (GET_CODE (disp) == LABEL_REF)
15685 output_asm_label (disp);
15686 else
15687 output_addr_const (file, disp);
15688 }
15689
15690 putc ('(', file);
15691 if (base)
15692 print_reg (base, code, file);
15693 if (index)
15694 {
15695 putc (',', file);
15696 print_reg (index, vsib ? 0 : code, file);
15697 if (scale != 1 || vsib)
15698 fprintf (file, ",%d", scale);
15699 }
15700 putc (')', file);
15701 }
15702 else
15703 {
15704 rtx offset = NULL_RTX;
15705
15706 if (disp)
15707 {
15708 /* Pull out the offset of a symbol; print any symbol itself. */
15709 if (GET_CODE (disp) == CONST
15710 && GET_CODE (XEXP (disp, 0)) == PLUS
15711 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15712 {
15713 offset = XEXP (XEXP (disp, 0), 1);
15714 disp = gen_rtx_CONST (VOIDmode,
15715 XEXP (XEXP (disp, 0), 0));
15716 }
15717
15718 if (flag_pic)
15719 output_pic_addr_const (file, disp, 0);
15720 else if (GET_CODE (disp) == LABEL_REF)
15721 output_asm_label (disp);
15722 else if (CONST_INT_P (disp))
15723 offset = disp;
15724 else
15725 output_addr_const (file, disp);
15726 }
15727
15728 putc ('[', file);
15729 if (base)
15730 {
15731 print_reg (base, code, file);
15732 if (offset)
15733 {
15734 if (INTVAL (offset) >= 0)
15735 putc ('+', file);
15736 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15737 }
15738 }
15739 else if (offset)
15740 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15741 else
15742 putc ('0', file);
15743
15744 if (index)
15745 {
15746 putc ('+', file);
15747 print_reg (index, vsib ? 0 : code, file);
15748 if (scale != 1 || vsib)
15749 fprintf (file, "*%d", scale);
15750 }
15751 putc (']', file);
15752 }
15753 }
15754 }
15755
15756 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15757
15758 static bool
15759 i386_asm_output_addr_const_extra (FILE *file, rtx x)
15760 {
15761 rtx op;
15762
15763 if (GET_CODE (x) != UNSPEC)
15764 return false;
15765
15766 op = XVECEXP (x, 0, 0);
15767 switch (XINT (x, 1))
15768 {
15769 case UNSPEC_GOTTPOFF:
15770 output_addr_const (file, op);
15771 /* FIXME: This might be @TPOFF in Sun ld. */
15772 fputs ("@gottpoff", file);
15773 break;
15774 case UNSPEC_TPOFF:
15775 output_addr_const (file, op);
15776 fputs ("@tpoff", file);
15777 break;
15778 case UNSPEC_NTPOFF:
15779 output_addr_const (file, op);
15780 if (TARGET_64BIT)
15781 fputs ("@tpoff", file);
15782 else
15783 fputs ("@ntpoff", file);
15784 break;
15785 case UNSPEC_DTPOFF:
15786 output_addr_const (file, op);
15787 fputs ("@dtpoff", file);
15788 break;
15789 case UNSPEC_GOTNTPOFF:
15790 output_addr_const (file, op);
15791 if (TARGET_64BIT)
15792 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15793 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
15794 else
15795 fputs ("@gotntpoff", file);
15796 break;
15797 case UNSPEC_INDNTPOFF:
15798 output_addr_const (file, op);
15799 fputs ("@indntpoff", file);
15800 break;
15801 #if TARGET_MACHO
15802 case UNSPEC_MACHOPIC_OFFSET:
15803 output_addr_const (file, op);
15804 putc ('-', file);
15805 machopic_output_function_base_name (file);
15806 break;
15807 #endif
15808
15809 case UNSPEC_STACK_CHECK:
15810 {
15811 int offset;
15812
15813 gcc_assert (flag_split_stack);
15814
15815 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15816 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
15817 #else
15818 gcc_unreachable ();
15819 #endif
15820
15821 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
15822 }
15823 break;
15824
15825 default:
15826 return false;
15827 }
15828
15829 return true;
15830 }
15831 \f
15832 /* Split one or more double-mode RTL references into pairs of half-mode
15833 references. The RTL can be REG, offsettable MEM, integer constant, or
15834 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
15835 split and "num" is its length. lo_half and hi_half are output arrays
15836 that parallel "operands". */
15837
15838 void
15839 split_double_mode (enum machine_mode mode, rtx operands[],
15840 int num, rtx lo_half[], rtx hi_half[])
15841 {
15842 enum machine_mode half_mode;
15843 unsigned int byte;
15844
15845 switch (mode)
15846 {
15847 case TImode:
15848 half_mode = DImode;
15849 break;
15850 case DImode:
15851 half_mode = SImode;
15852 break;
15853 default:
15854 gcc_unreachable ();
15855 }
15856
15857 byte = GET_MODE_SIZE (half_mode);
15858
15859 while (num--)
15860 {
15861 rtx op = operands[num];
15862
15863 /* simplify_subreg refuse to split volatile memory addresses,
15864 but we still have to handle it. */
15865 if (MEM_P (op))
15866 {
15867 lo_half[num] = adjust_address (op, half_mode, 0);
15868 hi_half[num] = adjust_address (op, half_mode, byte);
15869 }
15870 else
15871 {
15872 lo_half[num] = simplify_gen_subreg (half_mode, op,
15873 GET_MODE (op) == VOIDmode
15874 ? mode : GET_MODE (op), 0);
15875 hi_half[num] = simplify_gen_subreg (half_mode, op,
15876 GET_MODE (op) == VOIDmode
15877 ? mode : GET_MODE (op), byte);
15878 }
15879 }
15880 }
15881 \f
15882 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15883 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15884 is the expression of the binary operation. The output may either be
15885 emitted here, or returned to the caller, like all output_* functions.
15886
15887 There is no guarantee that the operands are the same mode, as they
15888 might be within FLOAT or FLOAT_EXTEND expressions. */
15889
15890 #ifndef SYSV386_COMPAT
15891 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15892 wants to fix the assemblers because that causes incompatibility
15893 with gcc. No-one wants to fix gcc because that causes
15894 incompatibility with assemblers... You can use the option of
15895 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15896 #define SYSV386_COMPAT 1
15897 #endif
15898
15899 const char *
15900 output_387_binary_op (rtx insn, rtx *operands)
15901 {
15902 static char buf[40];
15903 const char *p;
15904 const char *ssep;
15905 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
15906
15907 #ifdef ENABLE_CHECKING
15908 /* Even if we do not want to check the inputs, this documents input
15909 constraints. Which helps in understanding the following code. */
15910 if (STACK_REG_P (operands[0])
15911 && ((REG_P (operands[1])
15912 && REGNO (operands[0]) == REGNO (operands[1])
15913 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
15914 || (REG_P (operands[2])
15915 && REGNO (operands[0]) == REGNO (operands[2])
15916 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
15917 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
15918 ; /* ok */
15919 else
15920 gcc_assert (is_sse);
15921 #endif
15922
15923 switch (GET_CODE (operands[3]))
15924 {
15925 case PLUS:
15926 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15927 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15928 p = "fiadd";
15929 else
15930 p = "fadd";
15931 ssep = "vadd";
15932 break;
15933
15934 case MINUS:
15935 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15936 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15937 p = "fisub";
15938 else
15939 p = "fsub";
15940 ssep = "vsub";
15941 break;
15942
15943 case MULT:
15944 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15945 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15946 p = "fimul";
15947 else
15948 p = "fmul";
15949 ssep = "vmul";
15950 break;
15951
15952 case DIV:
15953 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15954 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15955 p = "fidiv";
15956 else
15957 p = "fdiv";
15958 ssep = "vdiv";
15959 break;
15960
15961 default:
15962 gcc_unreachable ();
15963 }
15964
15965 if (is_sse)
15966 {
15967 if (TARGET_AVX)
15968 {
15969 strcpy (buf, ssep);
15970 if (GET_MODE (operands[0]) == SFmode)
15971 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
15972 else
15973 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
15974 }
15975 else
15976 {
15977 strcpy (buf, ssep + 1);
15978 if (GET_MODE (operands[0]) == SFmode)
15979 strcat (buf, "ss\t{%2, %0|%0, %2}");
15980 else
15981 strcat (buf, "sd\t{%2, %0|%0, %2}");
15982 }
15983 return buf;
15984 }
15985 strcpy (buf, p);
15986
15987 switch (GET_CODE (operands[3]))
15988 {
15989 case MULT:
15990 case PLUS:
15991 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
15992 {
15993 rtx temp = operands[2];
15994 operands[2] = operands[1];
15995 operands[1] = temp;
15996 }
15997
15998 /* know operands[0] == operands[1]. */
15999
16000 if (MEM_P (operands[2]))
16001 {
16002 p = "%Z2\t%2";
16003 break;
16004 }
16005
16006 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16007 {
16008 if (STACK_TOP_P (operands[0]))
16009 /* How is it that we are storing to a dead operand[2]?
16010 Well, presumably operands[1] is dead too. We can't
16011 store the result to st(0) as st(0) gets popped on this
16012 instruction. Instead store to operands[2] (which I
16013 think has to be st(1)). st(1) will be popped later.
16014 gcc <= 2.8.1 didn't have this check and generated
16015 assembly code that the Unixware assembler rejected. */
16016 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16017 else
16018 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16019 break;
16020 }
16021
16022 if (STACK_TOP_P (operands[0]))
16023 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16024 else
16025 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16026 break;
16027
16028 case MINUS:
16029 case DIV:
16030 if (MEM_P (operands[1]))
16031 {
16032 p = "r%Z1\t%1";
16033 break;
16034 }
16035
16036 if (MEM_P (operands[2]))
16037 {
16038 p = "%Z2\t%2";
16039 break;
16040 }
16041
16042 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16043 {
16044 #if SYSV386_COMPAT
16045 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
16046 derived assemblers, confusingly reverse the direction of
16047 the operation for fsub{r} and fdiv{r} when the
16048 destination register is not st(0). The Intel assembler
16049 doesn't have this brain damage. Read !SYSV386_COMPAT to
16050 figure out what the hardware really does. */
16051 if (STACK_TOP_P (operands[0]))
16052 p = "{p\t%0, %2|rp\t%2, %0}";
16053 else
16054 p = "{rp\t%2, %0|p\t%0, %2}";
16055 #else
16056 if (STACK_TOP_P (operands[0]))
16057 /* As above for fmul/fadd, we can't store to st(0). */
16058 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16059 else
16060 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16061 #endif
16062 break;
16063 }
16064
16065 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16066 {
16067 #if SYSV386_COMPAT
16068 if (STACK_TOP_P (operands[0]))
16069 p = "{rp\t%0, %1|p\t%1, %0}";
16070 else
16071 p = "{p\t%1, %0|rp\t%0, %1}";
16072 #else
16073 if (STACK_TOP_P (operands[0]))
16074 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16075 else
16076 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16077 #endif
16078 break;
16079 }
16080
16081 if (STACK_TOP_P (operands[0]))
16082 {
16083 if (STACK_TOP_P (operands[1]))
16084 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16085 else
16086 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16087 break;
16088 }
16089 else if (STACK_TOP_P (operands[1]))
16090 {
16091 #if SYSV386_COMPAT
16092 p = "{\t%1, %0|r\t%0, %1}";
16093 #else
16094 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16095 #endif
16096 }
16097 else
16098 {
16099 #if SYSV386_COMPAT
16100 p = "{r\t%2, %0|\t%0, %2}";
16101 #else
16102 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16103 #endif
16104 }
16105 break;
16106
16107 default:
16108 gcc_unreachable ();
16109 }
16110
16111 strcat (buf, p);
16112 return buf;
16113 }
16114
16115 /* Check if a 256bit AVX register is referenced inside of EXP. */
16116
16117 static int
16118 ix86_check_avx256_register (rtx *pexp, void *)
16119 {
16120 rtx exp = *pexp;
16121
16122 if (GET_CODE (exp) == SUBREG)
16123 exp = SUBREG_REG (exp);
16124
16125 if (REG_P (exp)
16126 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)))
16127 return 1;
16128
16129 return 0;
16130 }
16131
16132 /* Return needed mode for entity in optimize_mode_switching pass. */
16133
16134 static int
16135 ix86_avx_u128_mode_needed (rtx_insn *insn)
16136 {
16137 if (CALL_P (insn))
16138 {
16139 rtx link;
16140
16141 /* Needed mode is set to AVX_U128_CLEAN if there are
16142 no 256bit modes used in function arguments. */
16143 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16144 link;
16145 link = XEXP (link, 1))
16146 {
16147 if (GET_CODE (XEXP (link, 0)) == USE)
16148 {
16149 rtx arg = XEXP (XEXP (link, 0), 0);
16150
16151 if (ix86_check_avx256_register (&arg, NULL))
16152 return AVX_U128_DIRTY;
16153 }
16154 }
16155
16156 return AVX_U128_CLEAN;
16157 }
16158
16159 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16160 changes state only when a 256bit register is written to, but we need
16161 to prevent the compiler from moving optimal insertion point above
16162 eventual read from 256bit register. */
16163 if (for_each_rtx (&PATTERN (insn), ix86_check_avx256_register, NULL))
16164 return AVX_U128_DIRTY;
16165
16166 return AVX_U128_ANY;
16167 }
16168
16169 /* Return mode that i387 must be switched into
16170 prior to the execution of insn. */
16171
16172 static int
16173 ix86_i387_mode_needed (int entity, rtx_insn *insn)
16174 {
16175 enum attr_i387_cw mode;
16176
16177 /* The mode UNINITIALIZED is used to store control word after a
16178 function call or ASM pattern. The mode ANY specify that function
16179 has no requirements on the control word and make no changes in the
16180 bits we are interested in. */
16181
16182 if (CALL_P (insn)
16183 || (NONJUMP_INSN_P (insn)
16184 && (asm_noperands (PATTERN (insn)) >= 0
16185 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16186 return I387_CW_UNINITIALIZED;
16187
16188 if (recog_memoized (insn) < 0)
16189 return I387_CW_ANY;
16190
16191 mode = get_attr_i387_cw (insn);
16192
16193 switch (entity)
16194 {
16195 case I387_TRUNC:
16196 if (mode == I387_CW_TRUNC)
16197 return mode;
16198 break;
16199
16200 case I387_FLOOR:
16201 if (mode == I387_CW_FLOOR)
16202 return mode;
16203 break;
16204
16205 case I387_CEIL:
16206 if (mode == I387_CW_CEIL)
16207 return mode;
16208 break;
16209
16210 case I387_MASK_PM:
16211 if (mode == I387_CW_MASK_PM)
16212 return mode;
16213 break;
16214
16215 default:
16216 gcc_unreachable ();
16217 }
16218
16219 return I387_CW_ANY;
16220 }
16221
16222 /* Return mode that entity must be switched into
16223 prior to the execution of insn. */
16224
16225 static int
16226 ix86_mode_needed (int entity, rtx_insn *insn)
16227 {
16228 switch (entity)
16229 {
16230 case AVX_U128:
16231 return ix86_avx_u128_mode_needed (insn);
16232 case I387_TRUNC:
16233 case I387_FLOOR:
16234 case I387_CEIL:
16235 case I387_MASK_PM:
16236 return ix86_i387_mode_needed (entity, insn);
16237 default:
16238 gcc_unreachable ();
16239 }
16240 return 0;
16241 }
16242
16243 /* Check if a 256bit AVX register is referenced in stores. */
16244
16245 static void
16246 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
16247 {
16248 if (ix86_check_avx256_register (&dest, NULL))
16249 {
16250 bool *used = (bool *) data;
16251 *used = true;
16252 }
16253 }
16254
16255 /* Calculate mode of upper 128bit AVX registers after the insn. */
16256
16257 static int
16258 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
16259 {
16260 rtx pat = PATTERN (insn);
16261
16262 if (vzeroupper_operation (pat, VOIDmode)
16263 || vzeroall_operation (pat, VOIDmode))
16264 return AVX_U128_CLEAN;
16265
16266 /* We know that state is clean after CALL insn if there are no
16267 256bit registers used in the function return register. */
16268 if (CALL_P (insn))
16269 {
16270 bool avx_reg256_found = false;
16271 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16272
16273 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16274 }
16275
16276 /* Otherwise, return current mode. Remember that if insn
16277 references AVX 256bit registers, the mode was already changed
16278 to DIRTY from MODE_NEEDED. */
16279 return mode;
16280 }
16281
16282 /* Return the mode that an insn results in. */
16283
16284 int
16285 ix86_mode_after (int entity, int mode, rtx_insn *insn)
16286 {
16287 switch (entity)
16288 {
16289 case AVX_U128:
16290 return ix86_avx_u128_mode_after (mode, insn);
16291 case I387_TRUNC:
16292 case I387_FLOOR:
16293 case I387_CEIL:
16294 case I387_MASK_PM:
16295 return mode;
16296 default:
16297 gcc_unreachable ();
16298 }
16299 }
16300
16301 static int
16302 ix86_avx_u128_mode_entry (void)
16303 {
16304 tree arg;
16305
16306 /* Entry mode is set to AVX_U128_DIRTY if there are
16307 256bit modes used in function arguments. */
16308 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16309 arg = TREE_CHAIN (arg))
16310 {
16311 rtx incoming = DECL_INCOMING_RTL (arg);
16312
16313 if (incoming && ix86_check_avx256_register (&incoming, NULL))
16314 return AVX_U128_DIRTY;
16315 }
16316
16317 return AVX_U128_CLEAN;
16318 }
16319
16320 /* Return a mode that ENTITY is assumed to be
16321 switched to at function entry. */
16322
16323 static int
16324 ix86_mode_entry (int entity)
16325 {
16326 switch (entity)
16327 {
16328 case AVX_U128:
16329 return ix86_avx_u128_mode_entry ();
16330 case I387_TRUNC:
16331 case I387_FLOOR:
16332 case I387_CEIL:
16333 case I387_MASK_PM:
16334 return I387_CW_ANY;
16335 default:
16336 gcc_unreachable ();
16337 }
16338 }
16339
16340 static int
16341 ix86_avx_u128_mode_exit (void)
16342 {
16343 rtx reg = crtl->return_rtx;
16344
16345 /* Exit mode is set to AVX_U128_DIRTY if there are
16346 256bit modes used in the function return register. */
16347 if (reg && ix86_check_avx256_register (&reg, NULL))
16348 return AVX_U128_DIRTY;
16349
16350 return AVX_U128_CLEAN;
16351 }
16352
16353 /* Return a mode that ENTITY is assumed to be
16354 switched to at function exit. */
16355
16356 static int
16357 ix86_mode_exit (int entity)
16358 {
16359 switch (entity)
16360 {
16361 case AVX_U128:
16362 return ix86_avx_u128_mode_exit ();
16363 case I387_TRUNC:
16364 case I387_FLOOR:
16365 case I387_CEIL:
16366 case I387_MASK_PM:
16367 return I387_CW_ANY;
16368 default:
16369 gcc_unreachable ();
16370 }
16371 }
16372
16373 static int
16374 ix86_mode_priority (int, int n)
16375 {
16376 return n;
16377 }
16378
16379 /* Output code to initialize control word copies used by trunc?f?i and
16380 rounding patterns. CURRENT_MODE is set to current control word,
16381 while NEW_MODE is set to new control word. */
16382
16383 static void
16384 emit_i387_cw_initialization (int mode)
16385 {
16386 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16387 rtx new_mode;
16388
16389 enum ix86_stack_slot slot;
16390
16391 rtx reg = gen_reg_rtx (HImode);
16392
16393 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16394 emit_move_insn (reg, copy_rtx (stored_mode));
16395
16396 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16397 || optimize_insn_for_size_p ())
16398 {
16399 switch (mode)
16400 {
16401 case I387_CW_TRUNC:
16402 /* round toward zero (truncate) */
16403 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16404 slot = SLOT_CW_TRUNC;
16405 break;
16406
16407 case I387_CW_FLOOR:
16408 /* round down toward -oo */
16409 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16410 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16411 slot = SLOT_CW_FLOOR;
16412 break;
16413
16414 case I387_CW_CEIL:
16415 /* round up toward +oo */
16416 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16417 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16418 slot = SLOT_CW_CEIL;
16419 break;
16420
16421 case I387_CW_MASK_PM:
16422 /* mask precision exception for nearbyint() */
16423 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16424 slot = SLOT_CW_MASK_PM;
16425 break;
16426
16427 default:
16428 gcc_unreachable ();
16429 }
16430 }
16431 else
16432 {
16433 switch (mode)
16434 {
16435 case I387_CW_TRUNC:
16436 /* round toward zero (truncate) */
16437 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16438 slot = SLOT_CW_TRUNC;
16439 break;
16440
16441 case I387_CW_FLOOR:
16442 /* round down toward -oo */
16443 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16444 slot = SLOT_CW_FLOOR;
16445 break;
16446
16447 case I387_CW_CEIL:
16448 /* round up toward +oo */
16449 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16450 slot = SLOT_CW_CEIL;
16451 break;
16452
16453 case I387_CW_MASK_PM:
16454 /* mask precision exception for nearbyint() */
16455 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16456 slot = SLOT_CW_MASK_PM;
16457 break;
16458
16459 default:
16460 gcc_unreachable ();
16461 }
16462 }
16463
16464 gcc_assert (slot < MAX_386_STACK_LOCALS);
16465
16466 new_mode = assign_386_stack_local (HImode, slot);
16467 emit_move_insn (new_mode, reg);
16468 }
16469
16470 /* Emit vzeroupper. */
16471
16472 void
16473 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16474 {
16475 int i;
16476
16477 /* Cancel automatic vzeroupper insertion if there are
16478 live call-saved SSE registers at the insertion point. */
16479
16480 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16481 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16482 return;
16483
16484 if (TARGET_64BIT)
16485 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16486 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16487 return;
16488
16489 emit_insn (gen_avx_vzeroupper ());
16490 }
16491
16492 /* Generate one or more insns to set ENTITY to MODE. */
16493
16494 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
16495 is the set of hard registers live at the point where the insn(s)
16496 are to be inserted. */
16497
16498 static void
16499 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
16500 HARD_REG_SET regs_live)
16501 {
16502 switch (entity)
16503 {
16504 case AVX_U128:
16505 if (mode == AVX_U128_CLEAN)
16506 ix86_avx_emit_vzeroupper (regs_live);
16507 break;
16508 case I387_TRUNC:
16509 case I387_FLOOR:
16510 case I387_CEIL:
16511 case I387_MASK_PM:
16512 if (mode != I387_CW_ANY
16513 && mode != I387_CW_UNINITIALIZED)
16514 emit_i387_cw_initialization (mode);
16515 break;
16516 default:
16517 gcc_unreachable ();
16518 }
16519 }
16520
16521 /* Output code for INSN to convert a float to a signed int. OPERANDS
16522 are the insn operands. The output may be [HSD]Imode and the input
16523 operand may be [SDX]Fmode. */
16524
16525 const char *
16526 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
16527 {
16528 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16529 int dimode_p = GET_MODE (operands[0]) == DImode;
16530 int round_mode = get_attr_i387_cw (insn);
16531
16532 /* Jump through a hoop or two for DImode, since the hardware has no
16533 non-popping instruction. We used to do this a different way, but
16534 that was somewhat fragile and broke with post-reload splitters. */
16535 if ((dimode_p || fisttp) && !stack_top_dies)
16536 output_asm_insn ("fld\t%y1", operands);
16537
16538 gcc_assert (STACK_TOP_P (operands[1]));
16539 gcc_assert (MEM_P (operands[0]));
16540 gcc_assert (GET_MODE (operands[1]) != TFmode);
16541
16542 if (fisttp)
16543 output_asm_insn ("fisttp%Z0\t%0", operands);
16544 else
16545 {
16546 if (round_mode != I387_CW_ANY)
16547 output_asm_insn ("fldcw\t%3", operands);
16548 if (stack_top_dies || dimode_p)
16549 output_asm_insn ("fistp%Z0\t%0", operands);
16550 else
16551 output_asm_insn ("fist%Z0\t%0", operands);
16552 if (round_mode != I387_CW_ANY)
16553 output_asm_insn ("fldcw\t%2", operands);
16554 }
16555
16556 return "";
16557 }
16558
16559 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16560 have the values zero or one, indicates the ffreep insn's operand
16561 from the OPERANDS array. */
16562
16563 static const char *
16564 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
16565 {
16566 if (TARGET_USE_FFREEP)
16567 #ifdef HAVE_AS_IX86_FFREEP
16568 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
16569 #else
16570 {
16571 static char retval[32];
16572 int regno = REGNO (operands[opno]);
16573
16574 gcc_assert (STACK_REGNO_P (regno));
16575
16576 regno -= FIRST_STACK_REG;
16577
16578 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
16579 return retval;
16580 }
16581 #endif
16582
16583 return opno ? "fstp\t%y1" : "fstp\t%y0";
16584 }
16585
16586
16587 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16588 should be used. UNORDERED_P is true when fucom should be used. */
16589
16590 const char *
16591 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
16592 {
16593 int stack_top_dies;
16594 rtx cmp_op0, cmp_op1;
16595 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
16596
16597 if (eflags_p)
16598 {
16599 cmp_op0 = operands[0];
16600 cmp_op1 = operands[1];
16601 }
16602 else
16603 {
16604 cmp_op0 = operands[1];
16605 cmp_op1 = operands[2];
16606 }
16607
16608 if (is_sse)
16609 {
16610 if (GET_MODE (operands[0]) == SFmode)
16611 if (unordered_p)
16612 return "%vucomiss\t{%1, %0|%0, %1}";
16613 else
16614 return "%vcomiss\t{%1, %0|%0, %1}";
16615 else
16616 if (unordered_p)
16617 return "%vucomisd\t{%1, %0|%0, %1}";
16618 else
16619 return "%vcomisd\t{%1, %0|%0, %1}";
16620 }
16621
16622 gcc_assert (STACK_TOP_P (cmp_op0));
16623
16624 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16625
16626 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
16627 {
16628 if (stack_top_dies)
16629 {
16630 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
16631 return output_387_ffreep (operands, 1);
16632 }
16633 else
16634 return "ftst\n\tfnstsw\t%0";
16635 }
16636
16637 if (STACK_REG_P (cmp_op1)
16638 && stack_top_dies
16639 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
16640 && REGNO (cmp_op1) != FIRST_STACK_REG)
16641 {
16642 /* If both the top of the 387 stack dies, and the other operand
16643 is also a stack register that dies, then this must be a
16644 `fcompp' float compare */
16645
16646 if (eflags_p)
16647 {
16648 /* There is no double popping fcomi variant. Fortunately,
16649 eflags is immune from the fstp's cc clobbering. */
16650 if (unordered_p)
16651 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
16652 else
16653 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
16654 return output_387_ffreep (operands, 0);
16655 }
16656 else
16657 {
16658 if (unordered_p)
16659 return "fucompp\n\tfnstsw\t%0";
16660 else
16661 return "fcompp\n\tfnstsw\t%0";
16662 }
16663 }
16664 else
16665 {
16666 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
16667
16668 static const char * const alt[16] =
16669 {
16670 "fcom%Z2\t%y2\n\tfnstsw\t%0",
16671 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
16672 "fucom%Z2\t%y2\n\tfnstsw\t%0",
16673 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
16674
16675 "ficom%Z2\t%y2\n\tfnstsw\t%0",
16676 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
16677 NULL,
16678 NULL,
16679
16680 "fcomi\t{%y1, %0|%0, %y1}",
16681 "fcomip\t{%y1, %0|%0, %y1}",
16682 "fucomi\t{%y1, %0|%0, %y1}",
16683 "fucomip\t{%y1, %0|%0, %y1}",
16684
16685 NULL,
16686 NULL,
16687 NULL,
16688 NULL
16689 };
16690
16691 int mask;
16692 const char *ret;
16693
16694 mask = eflags_p << 3;
16695 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
16696 mask |= unordered_p << 1;
16697 mask |= stack_top_dies;
16698
16699 gcc_assert (mask < 16);
16700 ret = alt[mask];
16701 gcc_assert (ret);
16702
16703 return ret;
16704 }
16705 }
16706
16707 void
16708 ix86_output_addr_vec_elt (FILE *file, int value)
16709 {
16710 const char *directive = ASM_LONG;
16711
16712 #ifdef ASM_QUAD
16713 if (TARGET_LP64)
16714 directive = ASM_QUAD;
16715 #else
16716 gcc_assert (!TARGET_64BIT);
16717 #endif
16718
16719 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
16720 }
16721
16722 void
16723 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
16724 {
16725 const char *directive = ASM_LONG;
16726
16727 #ifdef ASM_QUAD
16728 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
16729 directive = ASM_QUAD;
16730 #else
16731 gcc_assert (!TARGET_64BIT);
16732 #endif
16733 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
16734 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
16735 fprintf (file, "%s%s%d-%s%d\n",
16736 directive, LPREFIX, value, LPREFIX, rel);
16737 else if (HAVE_AS_GOTOFF_IN_DATA)
16738 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
16739 #if TARGET_MACHO
16740 else if (TARGET_MACHO)
16741 {
16742 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
16743 machopic_output_function_base_name (file);
16744 putc ('\n', file);
16745 }
16746 #endif
16747 else
16748 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
16749 GOT_SYMBOL_NAME, LPREFIX, value);
16750 }
16751 \f
16752 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
16753 for the target. */
16754
16755 void
16756 ix86_expand_clear (rtx dest)
16757 {
16758 rtx tmp;
16759
16760 /* We play register width games, which are only valid after reload. */
16761 gcc_assert (reload_completed);
16762
16763 /* Avoid HImode and its attendant prefix byte. */
16764 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
16765 dest = gen_rtx_REG (SImode, REGNO (dest));
16766 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
16767
16768 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
16769 {
16770 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16771 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
16772 }
16773
16774 emit_insn (tmp);
16775 }
16776
16777 /* X is an unchanging MEM. If it is a constant pool reference, return
16778 the constant pool rtx, else NULL. */
16779
16780 rtx
16781 maybe_get_pool_constant (rtx x)
16782 {
16783 x = ix86_delegitimize_address (XEXP (x, 0));
16784
16785 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
16786 return get_pool_constant (x);
16787
16788 return NULL_RTX;
16789 }
16790
16791 void
16792 ix86_expand_move (enum machine_mode mode, rtx operands[])
16793 {
16794 rtx op0, op1;
16795 enum tls_model model;
16796
16797 op0 = operands[0];
16798 op1 = operands[1];
16799
16800 if (GET_CODE (op1) == SYMBOL_REF)
16801 {
16802 rtx tmp;
16803
16804 model = SYMBOL_REF_TLS_MODEL (op1);
16805 if (model)
16806 {
16807 op1 = legitimize_tls_address (op1, model, true);
16808 op1 = force_operand (op1, op0);
16809 if (op1 == op0)
16810 return;
16811 op1 = convert_to_mode (mode, op1, 1);
16812 }
16813 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
16814 op1 = tmp;
16815 }
16816 else if (GET_CODE (op1) == CONST
16817 && GET_CODE (XEXP (op1, 0)) == PLUS
16818 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
16819 {
16820 rtx addend = XEXP (XEXP (op1, 0), 1);
16821 rtx symbol = XEXP (XEXP (op1, 0), 0);
16822 rtx tmp;
16823
16824 model = SYMBOL_REF_TLS_MODEL (symbol);
16825 if (model)
16826 tmp = legitimize_tls_address (symbol, model, true);
16827 else
16828 tmp = legitimize_pe_coff_symbol (symbol, true);
16829
16830 if (tmp)
16831 {
16832 tmp = force_operand (tmp, NULL);
16833 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
16834 op0, 1, OPTAB_DIRECT);
16835 if (tmp == op0)
16836 return;
16837 op1 = convert_to_mode (mode, tmp, 1);
16838 }
16839 }
16840
16841 if ((flag_pic || MACHOPIC_INDIRECT)
16842 && symbolic_operand (op1, mode))
16843 {
16844 if (TARGET_MACHO && !TARGET_64BIT)
16845 {
16846 #if TARGET_MACHO
16847 /* dynamic-no-pic */
16848 if (MACHOPIC_INDIRECT)
16849 {
16850 rtx temp = ((reload_in_progress
16851 || ((op0 && REG_P (op0))
16852 && mode == Pmode))
16853 ? op0 : gen_reg_rtx (Pmode));
16854 op1 = machopic_indirect_data_reference (op1, temp);
16855 if (MACHOPIC_PURE)
16856 op1 = machopic_legitimize_pic_address (op1, mode,
16857 temp == op1 ? 0 : temp);
16858 }
16859 if (op0 != op1 && GET_CODE (op0) != MEM)
16860 {
16861 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
16862 emit_insn (insn);
16863 return;
16864 }
16865 if (GET_CODE (op0) == MEM)
16866 op1 = force_reg (Pmode, op1);
16867 else
16868 {
16869 rtx temp = op0;
16870 if (GET_CODE (temp) != REG)
16871 temp = gen_reg_rtx (Pmode);
16872 temp = legitimize_pic_address (op1, temp);
16873 if (temp == op0)
16874 return;
16875 op1 = temp;
16876 }
16877 /* dynamic-no-pic */
16878 #endif
16879 }
16880 else
16881 {
16882 if (MEM_P (op0))
16883 op1 = force_reg (mode, op1);
16884 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
16885 {
16886 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
16887 op1 = legitimize_pic_address (op1, reg);
16888 if (op0 == op1)
16889 return;
16890 op1 = convert_to_mode (mode, op1, 1);
16891 }
16892 }
16893 }
16894 else
16895 {
16896 if (MEM_P (op0)
16897 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
16898 || !push_operand (op0, mode))
16899 && MEM_P (op1))
16900 op1 = force_reg (mode, op1);
16901
16902 if (push_operand (op0, mode)
16903 && ! general_no_elim_operand (op1, mode))
16904 op1 = copy_to_mode_reg (mode, op1);
16905
16906 /* Force large constants in 64bit compilation into register
16907 to get them CSEed. */
16908 if (can_create_pseudo_p ()
16909 && (mode == DImode) && TARGET_64BIT
16910 && immediate_operand (op1, mode)
16911 && !x86_64_zext_immediate_operand (op1, VOIDmode)
16912 && !register_operand (op0, mode)
16913 && optimize)
16914 op1 = copy_to_mode_reg (mode, op1);
16915
16916 if (can_create_pseudo_p ()
16917 && FLOAT_MODE_P (mode)
16918 && GET_CODE (op1) == CONST_DOUBLE)
16919 {
16920 /* If we are loading a floating point constant to a register,
16921 force the value to memory now, since we'll get better code
16922 out the back end. */
16923
16924 op1 = validize_mem (force_const_mem (mode, op1));
16925 if (!register_operand (op0, mode))
16926 {
16927 rtx temp = gen_reg_rtx (mode);
16928 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
16929 emit_move_insn (op0, temp);
16930 return;
16931 }
16932 }
16933 }
16934
16935 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16936 }
16937
16938 void
16939 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
16940 {
16941 rtx op0 = operands[0], op1 = operands[1];
16942 unsigned int align = GET_MODE_ALIGNMENT (mode);
16943
16944 if (push_operand (op0, VOIDmode))
16945 op0 = emit_move_resolve_push (mode, op0);
16946
16947 /* Force constants other than zero into memory. We do not know how
16948 the instructions used to build constants modify the upper 64 bits
16949 of the register, once we have that information we may be able
16950 to handle some of them more efficiently. */
16951 if (can_create_pseudo_p ()
16952 && register_operand (op0, mode)
16953 && (CONSTANT_P (op1)
16954 || (GET_CODE (op1) == SUBREG
16955 && CONSTANT_P (SUBREG_REG (op1))))
16956 && !standard_sse_constant_p (op1))
16957 op1 = validize_mem (force_const_mem (mode, op1));
16958
16959 /* We need to check memory alignment for SSE mode since attribute
16960 can make operands unaligned. */
16961 if (can_create_pseudo_p ()
16962 && SSE_REG_MODE_P (mode)
16963 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
16964 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
16965 {
16966 rtx tmp[2];
16967
16968 /* ix86_expand_vector_move_misalign() does not like constants ... */
16969 if (CONSTANT_P (op1)
16970 || (GET_CODE (op1) == SUBREG
16971 && CONSTANT_P (SUBREG_REG (op1))))
16972 op1 = validize_mem (force_const_mem (mode, op1));
16973
16974 /* ... nor both arguments in memory. */
16975 if (!register_operand (op0, mode)
16976 && !register_operand (op1, mode))
16977 op1 = force_reg (mode, op1);
16978
16979 tmp[0] = op0; tmp[1] = op1;
16980 ix86_expand_vector_move_misalign (mode, tmp);
16981 return;
16982 }
16983
16984 /* Make operand1 a register if it isn't already. */
16985 if (can_create_pseudo_p ()
16986 && !register_operand (op0, mode)
16987 && !register_operand (op1, mode))
16988 {
16989 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
16990 return;
16991 }
16992
16993 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16994 }
16995
16996 /* Split 32-byte AVX unaligned load and store if needed. */
16997
16998 static void
16999 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
17000 {
17001 rtx m;
17002 rtx (*extract) (rtx, rtx, rtx);
17003 rtx (*load_unaligned) (rtx, rtx);
17004 rtx (*store_unaligned) (rtx, rtx);
17005 enum machine_mode mode;
17006
17007 switch (GET_MODE (op0))
17008 {
17009 default:
17010 gcc_unreachable ();
17011 case V32QImode:
17012 extract = gen_avx_vextractf128v32qi;
17013 load_unaligned = gen_avx_loaddquv32qi;
17014 store_unaligned = gen_avx_storedquv32qi;
17015 mode = V16QImode;
17016 break;
17017 case V8SFmode:
17018 extract = gen_avx_vextractf128v8sf;
17019 load_unaligned = gen_avx_loadups256;
17020 store_unaligned = gen_avx_storeups256;
17021 mode = V4SFmode;
17022 break;
17023 case V4DFmode:
17024 extract = gen_avx_vextractf128v4df;
17025 load_unaligned = gen_avx_loadupd256;
17026 store_unaligned = gen_avx_storeupd256;
17027 mode = V2DFmode;
17028 break;
17029 }
17030
17031 if (MEM_P (op1))
17032 {
17033 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
17034 {
17035 rtx r = gen_reg_rtx (mode);
17036 m = adjust_address (op1, mode, 0);
17037 emit_move_insn (r, m);
17038 m = adjust_address (op1, mode, 16);
17039 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
17040 emit_move_insn (op0, r);
17041 }
17042 /* Normal *mov<mode>_internal pattern will handle
17043 unaligned loads just fine if misaligned_operand
17044 is true, and without the UNSPEC it can be combined
17045 with arithmetic instructions. */
17046 else if (misaligned_operand (op1, GET_MODE (op1)))
17047 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17048 else
17049 emit_insn (load_unaligned (op0, op1));
17050 }
17051 else if (MEM_P (op0))
17052 {
17053 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
17054 {
17055 m = adjust_address (op0, mode, 0);
17056 emit_insn (extract (m, op1, const0_rtx));
17057 m = adjust_address (op0, mode, 16);
17058 emit_insn (extract (m, op1, const1_rtx));
17059 }
17060 else
17061 emit_insn (store_unaligned (op0, op1));
17062 }
17063 else
17064 gcc_unreachable ();
17065 }
17066
17067 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17068 straight to ix86_expand_vector_move. */
17069 /* Code generation for scalar reg-reg moves of single and double precision data:
17070 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17071 movaps reg, reg
17072 else
17073 movss reg, reg
17074 if (x86_sse_partial_reg_dependency == true)
17075 movapd reg, reg
17076 else
17077 movsd reg, reg
17078
17079 Code generation for scalar loads of double precision data:
17080 if (x86_sse_split_regs == true)
17081 movlpd mem, reg (gas syntax)
17082 else
17083 movsd mem, reg
17084
17085 Code generation for unaligned packed loads of single precision data
17086 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17087 if (x86_sse_unaligned_move_optimal)
17088 movups mem, reg
17089
17090 if (x86_sse_partial_reg_dependency == true)
17091 {
17092 xorps reg, reg
17093 movlps mem, reg
17094 movhps mem+8, reg
17095 }
17096 else
17097 {
17098 movlps mem, reg
17099 movhps mem+8, reg
17100 }
17101
17102 Code generation for unaligned packed loads of double precision data
17103 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17104 if (x86_sse_unaligned_move_optimal)
17105 movupd mem, reg
17106
17107 if (x86_sse_split_regs == true)
17108 {
17109 movlpd mem, reg
17110 movhpd mem+8, reg
17111 }
17112 else
17113 {
17114 movsd mem, reg
17115 movhpd mem+8, reg
17116 }
17117 */
17118
17119 void
17120 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
17121 {
17122 rtx op0, op1, orig_op0 = NULL_RTX, m;
17123 rtx (*load_unaligned) (rtx, rtx);
17124 rtx (*store_unaligned) (rtx, rtx);
17125
17126 op0 = operands[0];
17127 op1 = operands[1];
17128
17129 if (GET_MODE_SIZE (mode) == 64)
17130 {
17131 switch (GET_MODE_CLASS (mode))
17132 {
17133 case MODE_VECTOR_INT:
17134 case MODE_INT:
17135 if (GET_MODE (op0) != V16SImode)
17136 {
17137 if (!MEM_P (op0))
17138 {
17139 orig_op0 = op0;
17140 op0 = gen_reg_rtx (V16SImode);
17141 }
17142 else
17143 op0 = gen_lowpart (V16SImode, op0);
17144 }
17145 op1 = gen_lowpart (V16SImode, op1);
17146 /* FALLTHRU */
17147
17148 case MODE_VECTOR_FLOAT:
17149 switch (GET_MODE (op0))
17150 {
17151 default:
17152 gcc_unreachable ();
17153 case V16SImode:
17154 load_unaligned = gen_avx512f_loaddquv16si;
17155 store_unaligned = gen_avx512f_storedquv16si;
17156 break;
17157 case V16SFmode:
17158 load_unaligned = gen_avx512f_loadups512;
17159 store_unaligned = gen_avx512f_storeups512;
17160 break;
17161 case V8DFmode:
17162 load_unaligned = gen_avx512f_loadupd512;
17163 store_unaligned = gen_avx512f_storeupd512;
17164 break;
17165 }
17166
17167 if (MEM_P (op1))
17168 emit_insn (load_unaligned (op0, op1));
17169 else if (MEM_P (op0))
17170 emit_insn (store_unaligned (op0, op1));
17171 else
17172 gcc_unreachable ();
17173 if (orig_op0)
17174 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17175 break;
17176
17177 default:
17178 gcc_unreachable ();
17179 }
17180
17181 return;
17182 }
17183
17184 if (TARGET_AVX
17185 && GET_MODE_SIZE (mode) == 32)
17186 {
17187 switch (GET_MODE_CLASS (mode))
17188 {
17189 case MODE_VECTOR_INT:
17190 case MODE_INT:
17191 if (GET_MODE (op0) != V32QImode)
17192 {
17193 if (!MEM_P (op0))
17194 {
17195 orig_op0 = op0;
17196 op0 = gen_reg_rtx (V32QImode);
17197 }
17198 else
17199 op0 = gen_lowpart (V32QImode, op0);
17200 }
17201 op1 = gen_lowpart (V32QImode, op1);
17202 /* FALLTHRU */
17203
17204 case MODE_VECTOR_FLOAT:
17205 ix86_avx256_split_vector_move_misalign (op0, op1);
17206 if (orig_op0)
17207 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17208 break;
17209
17210 default:
17211 gcc_unreachable ();
17212 }
17213
17214 return;
17215 }
17216
17217 if (MEM_P (op1))
17218 {
17219 /* Normal *mov<mode>_internal pattern will handle
17220 unaligned loads just fine if misaligned_operand
17221 is true, and without the UNSPEC it can be combined
17222 with arithmetic instructions. */
17223 if (TARGET_AVX
17224 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17225 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17226 && misaligned_operand (op1, GET_MODE (op1)))
17227 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17228 /* ??? If we have typed data, then it would appear that using
17229 movdqu is the only way to get unaligned data loaded with
17230 integer type. */
17231 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17232 {
17233 if (GET_MODE (op0) != V16QImode)
17234 {
17235 orig_op0 = op0;
17236 op0 = gen_reg_rtx (V16QImode);
17237 }
17238 op1 = gen_lowpart (V16QImode, op1);
17239 /* We will eventually emit movups based on insn attributes. */
17240 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17241 if (orig_op0)
17242 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17243 }
17244 else if (TARGET_SSE2 && mode == V2DFmode)
17245 {
17246 rtx zero;
17247
17248 if (TARGET_AVX
17249 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17250 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17251 || optimize_insn_for_size_p ())
17252 {
17253 /* We will eventually emit movups based on insn attributes. */
17254 emit_insn (gen_sse2_loadupd (op0, op1));
17255 return;
17256 }
17257
17258 /* When SSE registers are split into halves, we can avoid
17259 writing to the top half twice. */
17260 if (TARGET_SSE_SPLIT_REGS)
17261 {
17262 emit_clobber (op0);
17263 zero = op0;
17264 }
17265 else
17266 {
17267 /* ??? Not sure about the best option for the Intel chips.
17268 The following would seem to satisfy; the register is
17269 entirely cleared, breaking the dependency chain. We
17270 then store to the upper half, with a dependency depth
17271 of one. A rumor has it that Intel recommends two movsd
17272 followed by an unpacklpd, but this is unconfirmed. And
17273 given that the dependency depth of the unpacklpd would
17274 still be one, I'm not sure why this would be better. */
17275 zero = CONST0_RTX (V2DFmode);
17276 }
17277
17278 m = adjust_address (op1, DFmode, 0);
17279 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17280 m = adjust_address (op1, DFmode, 8);
17281 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17282 }
17283 else
17284 {
17285 rtx t;
17286
17287 if (TARGET_AVX
17288 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17289 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17290 || optimize_insn_for_size_p ())
17291 {
17292 if (GET_MODE (op0) != V4SFmode)
17293 {
17294 orig_op0 = op0;
17295 op0 = gen_reg_rtx (V4SFmode);
17296 }
17297 op1 = gen_lowpart (V4SFmode, op1);
17298 emit_insn (gen_sse_loadups (op0, op1));
17299 if (orig_op0)
17300 emit_move_insn (orig_op0,
17301 gen_lowpart (GET_MODE (orig_op0), op0));
17302 return;
17303 }
17304
17305 if (mode != V4SFmode)
17306 t = gen_reg_rtx (V4SFmode);
17307 else
17308 t = op0;
17309
17310 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17311 emit_move_insn (t, CONST0_RTX (V4SFmode));
17312 else
17313 emit_clobber (t);
17314
17315 m = adjust_address (op1, V2SFmode, 0);
17316 emit_insn (gen_sse_loadlps (t, t, m));
17317 m = adjust_address (op1, V2SFmode, 8);
17318 emit_insn (gen_sse_loadhps (t, t, m));
17319 if (mode != V4SFmode)
17320 emit_move_insn (op0, gen_lowpart (mode, t));
17321 }
17322 }
17323 else if (MEM_P (op0))
17324 {
17325 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17326 {
17327 op0 = gen_lowpart (V16QImode, op0);
17328 op1 = gen_lowpart (V16QImode, op1);
17329 /* We will eventually emit movups based on insn attributes. */
17330 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17331 }
17332 else if (TARGET_SSE2 && mode == V2DFmode)
17333 {
17334 if (TARGET_AVX
17335 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17336 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17337 || optimize_insn_for_size_p ())
17338 /* We will eventually emit movups based on insn attributes. */
17339 emit_insn (gen_sse2_storeupd (op0, op1));
17340 else
17341 {
17342 m = adjust_address (op0, DFmode, 0);
17343 emit_insn (gen_sse2_storelpd (m, op1));
17344 m = adjust_address (op0, DFmode, 8);
17345 emit_insn (gen_sse2_storehpd (m, op1));
17346 }
17347 }
17348 else
17349 {
17350 if (mode != V4SFmode)
17351 op1 = gen_lowpart (V4SFmode, op1);
17352
17353 if (TARGET_AVX
17354 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17355 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17356 || optimize_insn_for_size_p ())
17357 {
17358 op0 = gen_lowpart (V4SFmode, op0);
17359 emit_insn (gen_sse_storeups (op0, op1));
17360 }
17361 else
17362 {
17363 m = adjust_address (op0, V2SFmode, 0);
17364 emit_insn (gen_sse_storelps (m, op1));
17365 m = adjust_address (op0, V2SFmode, 8);
17366 emit_insn (gen_sse_storehps (m, op1));
17367 }
17368 }
17369 }
17370 else
17371 gcc_unreachable ();
17372 }
17373
17374 /* Helper function of ix86_fixup_binary_operands to canonicalize
17375 operand order. Returns true if the operands should be swapped. */
17376
17377 static bool
17378 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
17379 rtx operands[])
17380 {
17381 rtx dst = operands[0];
17382 rtx src1 = operands[1];
17383 rtx src2 = operands[2];
17384
17385 /* If the operation is not commutative, we can't do anything. */
17386 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17387 return false;
17388
17389 /* Highest priority is that src1 should match dst. */
17390 if (rtx_equal_p (dst, src1))
17391 return false;
17392 if (rtx_equal_p (dst, src2))
17393 return true;
17394
17395 /* Next highest priority is that immediate constants come second. */
17396 if (immediate_operand (src2, mode))
17397 return false;
17398 if (immediate_operand (src1, mode))
17399 return true;
17400
17401 /* Lowest priority is that memory references should come second. */
17402 if (MEM_P (src2))
17403 return false;
17404 if (MEM_P (src1))
17405 return true;
17406
17407 return false;
17408 }
17409
17410
17411 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17412 destination to use for the operation. If different from the true
17413 destination in operands[0], a copy operation will be required. */
17414
17415 rtx
17416 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
17417 rtx operands[])
17418 {
17419 rtx dst = operands[0];
17420 rtx src1 = operands[1];
17421 rtx src2 = operands[2];
17422
17423 /* Canonicalize operand order. */
17424 if (ix86_swap_binary_operands_p (code, mode, operands))
17425 {
17426 rtx temp;
17427
17428 /* It is invalid to swap operands of different modes. */
17429 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17430
17431 temp = src1;
17432 src1 = src2;
17433 src2 = temp;
17434 }
17435
17436 /* Both source operands cannot be in memory. */
17437 if (MEM_P (src1) && MEM_P (src2))
17438 {
17439 /* Optimization: Only read from memory once. */
17440 if (rtx_equal_p (src1, src2))
17441 {
17442 src2 = force_reg (mode, src2);
17443 src1 = src2;
17444 }
17445 else if (rtx_equal_p (dst, src1))
17446 src2 = force_reg (mode, src2);
17447 else
17448 src1 = force_reg (mode, src1);
17449 }
17450
17451 /* If the destination is memory, and we do not have matching source
17452 operands, do things in registers. */
17453 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17454 dst = gen_reg_rtx (mode);
17455
17456 /* Source 1 cannot be a constant. */
17457 if (CONSTANT_P (src1))
17458 src1 = force_reg (mode, src1);
17459
17460 /* Source 1 cannot be a non-matching memory. */
17461 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17462 src1 = force_reg (mode, src1);
17463
17464 /* Improve address combine. */
17465 if (code == PLUS
17466 && GET_MODE_CLASS (mode) == MODE_INT
17467 && MEM_P (src2))
17468 src2 = force_reg (mode, src2);
17469
17470 operands[1] = src1;
17471 operands[2] = src2;
17472 return dst;
17473 }
17474
17475 /* Similarly, but assume that the destination has already been
17476 set up properly. */
17477
17478 void
17479 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17480 enum machine_mode mode, rtx operands[])
17481 {
17482 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
17483 gcc_assert (dst == operands[0]);
17484 }
17485
17486 /* Attempt to expand a binary operator. Make the expansion closer to the
17487 actual machine, then just general_operand, which will allow 3 separate
17488 memory references (one output, two input) in a single insn. */
17489
17490 void
17491 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
17492 rtx operands[])
17493 {
17494 rtx src1, src2, dst, op, clob;
17495
17496 dst = ix86_fixup_binary_operands (code, mode, operands);
17497 src1 = operands[1];
17498 src2 = operands[2];
17499
17500 /* Emit the instruction. */
17501
17502 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
17503 if (reload_in_progress)
17504 {
17505 /* Reload doesn't know about the flags register, and doesn't know that
17506 it doesn't want to clobber it. We can only do this with PLUS. */
17507 gcc_assert (code == PLUS);
17508 emit_insn (op);
17509 }
17510 else if (reload_completed
17511 && code == PLUS
17512 && !rtx_equal_p (dst, src1))
17513 {
17514 /* This is going to be an LEA; avoid splitting it later. */
17515 emit_insn (op);
17516 }
17517 else
17518 {
17519 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17520 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17521 }
17522
17523 /* Fix up the destination if needed. */
17524 if (dst != operands[0])
17525 emit_move_insn (operands[0], dst);
17526 }
17527
17528 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17529 the given OPERANDS. */
17530
17531 void
17532 ix86_expand_vector_logical_operator (enum rtx_code code, enum machine_mode mode,
17533 rtx operands[])
17534 {
17535 rtx op1 = NULL_RTX, op2 = NULL_RTX;
17536 if (GET_CODE (operands[1]) == SUBREG)
17537 {
17538 op1 = operands[1];
17539 op2 = operands[2];
17540 }
17541 else if (GET_CODE (operands[2]) == SUBREG)
17542 {
17543 op1 = operands[2];
17544 op2 = operands[1];
17545 }
17546 /* Optimize (__m128i) d | (__m128i) e and similar code
17547 when d and e are float vectors into float vector logical
17548 insn. In C/C++ without using intrinsics there is no other way
17549 to express vector logical operation on float vectors than
17550 to cast them temporarily to integer vectors. */
17551 if (op1
17552 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17553 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
17554 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
17555 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
17556 && SUBREG_BYTE (op1) == 0
17557 && (GET_CODE (op2) == CONST_VECTOR
17558 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
17559 && SUBREG_BYTE (op2) == 0))
17560 && can_create_pseudo_p ())
17561 {
17562 rtx dst;
17563 switch (GET_MODE (SUBREG_REG (op1)))
17564 {
17565 case V4SFmode:
17566 case V8SFmode:
17567 case V16SFmode:
17568 case V2DFmode:
17569 case V4DFmode:
17570 case V8DFmode:
17571 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
17572 if (GET_CODE (op2) == CONST_VECTOR)
17573 {
17574 op2 = gen_lowpart (GET_MODE (dst), op2);
17575 op2 = force_reg (GET_MODE (dst), op2);
17576 }
17577 else
17578 {
17579 op1 = operands[1];
17580 op2 = SUBREG_REG (operands[2]);
17581 if (!nonimmediate_operand (op2, GET_MODE (dst)))
17582 op2 = force_reg (GET_MODE (dst), op2);
17583 }
17584 op1 = SUBREG_REG (op1);
17585 if (!nonimmediate_operand (op1, GET_MODE (dst)))
17586 op1 = force_reg (GET_MODE (dst), op1);
17587 emit_insn (gen_rtx_SET (VOIDmode, dst,
17588 gen_rtx_fmt_ee (code, GET_MODE (dst),
17589 op1, op2)));
17590 emit_move_insn (operands[0], gen_lowpart (mode, dst));
17591 return;
17592 default:
17593 break;
17594 }
17595 }
17596 if (!nonimmediate_operand (operands[1], mode))
17597 operands[1] = force_reg (mode, operands[1]);
17598 if (!nonimmediate_operand (operands[2], mode))
17599 operands[2] = force_reg (mode, operands[2]);
17600 ix86_fixup_binary_operands_no_copy (code, mode, operands);
17601 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17602 gen_rtx_fmt_ee (code, mode, operands[1],
17603 operands[2])));
17604 }
17605
17606 /* Return TRUE or FALSE depending on whether the binary operator meets the
17607 appropriate constraints. */
17608
17609 bool
17610 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
17611 rtx operands[3])
17612 {
17613 rtx dst = operands[0];
17614 rtx src1 = operands[1];
17615 rtx src2 = operands[2];
17616
17617 /* Both source operands cannot be in memory. */
17618 if (MEM_P (src1) && MEM_P (src2))
17619 return false;
17620
17621 /* Canonicalize operand order for commutative operators. */
17622 if (ix86_swap_binary_operands_p (code, mode, operands))
17623 {
17624 rtx temp = src1;
17625 src1 = src2;
17626 src2 = temp;
17627 }
17628
17629 /* If the destination is memory, we must have a matching source operand. */
17630 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17631 return false;
17632
17633 /* Source 1 cannot be a constant. */
17634 if (CONSTANT_P (src1))
17635 return false;
17636
17637 /* Source 1 cannot be a non-matching memory. */
17638 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17639 /* Support "andhi/andsi/anddi" as a zero-extending move. */
17640 return (code == AND
17641 && (mode == HImode
17642 || mode == SImode
17643 || (TARGET_64BIT && mode == DImode))
17644 && satisfies_constraint_L (src2));
17645
17646 return true;
17647 }
17648
17649 /* Attempt to expand a unary operator. Make the expansion closer to the
17650 actual machine, then just general_operand, which will allow 2 separate
17651 memory references (one output, one input) in a single insn. */
17652
17653 void
17654 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
17655 rtx operands[])
17656 {
17657 int matching_memory;
17658 rtx src, dst, op, clob;
17659
17660 dst = operands[0];
17661 src = operands[1];
17662
17663 /* If the destination is memory, and we do not have matching source
17664 operands, do things in registers. */
17665 matching_memory = 0;
17666 if (MEM_P (dst))
17667 {
17668 if (rtx_equal_p (dst, src))
17669 matching_memory = 1;
17670 else
17671 dst = gen_reg_rtx (mode);
17672 }
17673
17674 /* When source operand is memory, destination must match. */
17675 if (MEM_P (src) && !matching_memory)
17676 src = force_reg (mode, src);
17677
17678 /* Emit the instruction. */
17679
17680 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
17681 if (reload_in_progress || code == NOT)
17682 {
17683 /* Reload doesn't know about the flags register, and doesn't know that
17684 it doesn't want to clobber it. */
17685 gcc_assert (code == NOT);
17686 emit_insn (op);
17687 }
17688 else
17689 {
17690 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17691 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17692 }
17693
17694 /* Fix up the destination if needed. */
17695 if (dst != operands[0])
17696 emit_move_insn (operands[0], dst);
17697 }
17698
17699 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
17700 divisor are within the range [0-255]. */
17701
17702 void
17703 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
17704 bool signed_p)
17705 {
17706 rtx_code_label *end_label, *qimode_label;
17707 rtx insn, div, mod;
17708 rtx scratch, tmp0, tmp1, tmp2;
17709 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
17710 rtx (*gen_zero_extend) (rtx, rtx);
17711 rtx (*gen_test_ccno_1) (rtx, rtx);
17712
17713 switch (mode)
17714 {
17715 case SImode:
17716 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
17717 gen_test_ccno_1 = gen_testsi_ccno_1;
17718 gen_zero_extend = gen_zero_extendqisi2;
17719 break;
17720 case DImode:
17721 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
17722 gen_test_ccno_1 = gen_testdi_ccno_1;
17723 gen_zero_extend = gen_zero_extendqidi2;
17724 break;
17725 default:
17726 gcc_unreachable ();
17727 }
17728
17729 end_label = gen_label_rtx ();
17730 qimode_label = gen_label_rtx ();
17731
17732 scratch = gen_reg_rtx (mode);
17733
17734 /* Use 8bit unsigned divimod if dividend and divisor are within
17735 the range [0-255]. */
17736 emit_move_insn (scratch, operands[2]);
17737 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
17738 scratch, 1, OPTAB_DIRECT);
17739 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
17740 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
17741 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
17742 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
17743 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
17744 pc_rtx);
17745 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
17746 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17747 JUMP_LABEL (insn) = qimode_label;
17748
17749 /* Generate original signed/unsigned divimod. */
17750 div = gen_divmod4_1 (operands[0], operands[1],
17751 operands[2], operands[3]);
17752 emit_insn (div);
17753
17754 /* Branch to the end. */
17755 emit_jump_insn (gen_jump (end_label));
17756 emit_barrier ();
17757
17758 /* Generate 8bit unsigned divide. */
17759 emit_label (qimode_label);
17760 /* Don't use operands[0] for result of 8bit divide since not all
17761 registers support QImode ZERO_EXTRACT. */
17762 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
17763 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
17764 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
17765 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
17766
17767 if (signed_p)
17768 {
17769 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
17770 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
17771 }
17772 else
17773 {
17774 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
17775 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
17776 }
17777
17778 /* Extract remainder from AH. */
17779 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
17780 if (REG_P (operands[1]))
17781 insn = emit_move_insn (operands[1], tmp1);
17782 else
17783 {
17784 /* Need a new scratch register since the old one has result
17785 of 8bit divide. */
17786 scratch = gen_reg_rtx (mode);
17787 emit_move_insn (scratch, tmp1);
17788 insn = emit_move_insn (operands[1], scratch);
17789 }
17790 set_unique_reg_note (insn, REG_EQUAL, mod);
17791
17792 /* Zero extend quotient from AL. */
17793 tmp1 = gen_lowpart (QImode, tmp0);
17794 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
17795 set_unique_reg_note (insn, REG_EQUAL, div);
17796
17797 emit_label (end_label);
17798 }
17799
17800 #define LEA_MAX_STALL (3)
17801 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
17802
17803 /* Increase given DISTANCE in half-cycles according to
17804 dependencies between PREV and NEXT instructions.
17805 Add 1 half-cycle if there is no dependency and
17806 go to next cycle if there is some dependecy. */
17807
17808 static unsigned int
17809 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
17810 {
17811 df_ref def, use;
17812
17813 if (!prev || !next)
17814 return distance + (distance & 1) + 2;
17815
17816 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
17817 return distance + 1;
17818
17819 FOR_EACH_INSN_USE (use, next)
17820 FOR_EACH_INSN_DEF (def, prev)
17821 if (!DF_REF_IS_ARTIFICIAL (def)
17822 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
17823 return distance + (distance & 1) + 2;
17824
17825 return distance + 1;
17826 }
17827
17828 /* Function checks if instruction INSN defines register number
17829 REGNO1 or REGNO2. */
17830
17831 static bool
17832 insn_defines_reg (unsigned int regno1, unsigned int regno2,
17833 rtx insn)
17834 {
17835 df_ref def;
17836
17837 FOR_EACH_INSN_DEF (def, insn)
17838 if (DF_REF_REG_DEF_P (def)
17839 && !DF_REF_IS_ARTIFICIAL (def)
17840 && (regno1 == DF_REF_REGNO (def)
17841 || regno2 == DF_REF_REGNO (def)))
17842 return true;
17843
17844 return false;
17845 }
17846
17847 /* Function checks if instruction INSN uses register number
17848 REGNO as a part of address expression. */
17849
17850 static bool
17851 insn_uses_reg_mem (unsigned int regno, rtx insn)
17852 {
17853 df_ref use;
17854
17855 FOR_EACH_INSN_USE (use, insn)
17856 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
17857 return true;
17858
17859 return false;
17860 }
17861
17862 /* Search backward for non-agu definition of register number REGNO1
17863 or register number REGNO2 in basic block starting from instruction
17864 START up to head of basic block or instruction INSN.
17865
17866 Function puts true value into *FOUND var if definition was found
17867 and false otherwise.
17868
17869 Distance in half-cycles between START and found instruction or head
17870 of BB is added to DISTANCE and returned. */
17871
17872 static int
17873 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
17874 rtx_insn *insn, int distance,
17875 rtx_insn *start, bool *found)
17876 {
17877 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
17878 rtx_insn *prev = start;
17879 rtx_insn *next = NULL;
17880
17881 *found = false;
17882
17883 while (prev
17884 && prev != insn
17885 && distance < LEA_SEARCH_THRESHOLD)
17886 {
17887 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
17888 {
17889 distance = increase_distance (prev, next, distance);
17890 if (insn_defines_reg (regno1, regno2, prev))
17891 {
17892 if (recog_memoized (prev) < 0
17893 || get_attr_type (prev) != TYPE_LEA)
17894 {
17895 *found = true;
17896 return distance;
17897 }
17898 }
17899
17900 next = prev;
17901 }
17902 if (prev == BB_HEAD (bb))
17903 break;
17904
17905 prev = PREV_INSN (prev);
17906 }
17907
17908 return distance;
17909 }
17910
17911 /* Search backward for non-agu definition of register number REGNO1
17912 or register number REGNO2 in INSN's basic block until
17913 1. Pass LEA_SEARCH_THRESHOLD instructions, or
17914 2. Reach neighbour BBs boundary, or
17915 3. Reach agu definition.
17916 Returns the distance between the non-agu definition point and INSN.
17917 If no definition point, returns -1. */
17918
17919 static int
17920 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
17921 rtx_insn *insn)
17922 {
17923 basic_block bb = BLOCK_FOR_INSN (insn);
17924 int distance = 0;
17925 bool found = false;
17926
17927 if (insn != BB_HEAD (bb))
17928 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
17929 distance, PREV_INSN (insn),
17930 &found);
17931
17932 if (!found && distance < LEA_SEARCH_THRESHOLD)
17933 {
17934 edge e;
17935 edge_iterator ei;
17936 bool simple_loop = false;
17937
17938 FOR_EACH_EDGE (e, ei, bb->preds)
17939 if (e->src == bb)
17940 {
17941 simple_loop = true;
17942 break;
17943 }
17944
17945 if (simple_loop)
17946 distance = distance_non_agu_define_in_bb (regno1, regno2,
17947 insn, distance,
17948 BB_END (bb), &found);
17949 else
17950 {
17951 int shortest_dist = -1;
17952 bool found_in_bb = false;
17953
17954 FOR_EACH_EDGE (e, ei, bb->preds)
17955 {
17956 int bb_dist
17957 = distance_non_agu_define_in_bb (regno1, regno2,
17958 insn, distance,
17959 BB_END (e->src),
17960 &found_in_bb);
17961 if (found_in_bb)
17962 {
17963 if (shortest_dist < 0)
17964 shortest_dist = bb_dist;
17965 else if (bb_dist > 0)
17966 shortest_dist = MIN (bb_dist, shortest_dist);
17967
17968 found = true;
17969 }
17970 }
17971
17972 distance = shortest_dist;
17973 }
17974 }
17975
17976 /* get_attr_type may modify recog data. We want to make sure
17977 that recog data is valid for instruction INSN, on which
17978 distance_non_agu_define is called. INSN is unchanged here. */
17979 extract_insn_cached (insn);
17980
17981 if (!found)
17982 return -1;
17983
17984 return distance >> 1;
17985 }
17986
17987 /* Return the distance in half-cycles between INSN and the next
17988 insn that uses register number REGNO in memory address added
17989 to DISTANCE. Return -1 if REGNO0 is set.
17990
17991 Put true value into *FOUND if register usage was found and
17992 false otherwise.
17993 Put true value into *REDEFINED if register redefinition was
17994 found and false otherwise. */
17995
17996 static int
17997 distance_agu_use_in_bb (unsigned int regno,
17998 rtx_insn *insn, int distance, rtx_insn *start,
17999 bool *found, bool *redefined)
18000 {
18001 basic_block bb = NULL;
18002 rtx_insn *next = start;
18003 rtx_insn *prev = NULL;
18004
18005 *found = false;
18006 *redefined = false;
18007
18008 if (start != NULL_RTX)
18009 {
18010 bb = BLOCK_FOR_INSN (start);
18011 if (start != BB_HEAD (bb))
18012 /* If insn and start belong to the same bb, set prev to insn,
18013 so the call to increase_distance will increase the distance
18014 between insns by 1. */
18015 prev = insn;
18016 }
18017
18018 while (next
18019 && next != insn
18020 && distance < LEA_SEARCH_THRESHOLD)
18021 {
18022 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
18023 {
18024 distance = increase_distance(prev, next, distance);
18025 if (insn_uses_reg_mem (regno, next))
18026 {
18027 /* Return DISTANCE if OP0 is used in memory
18028 address in NEXT. */
18029 *found = true;
18030 return distance;
18031 }
18032
18033 if (insn_defines_reg (regno, INVALID_REGNUM, next))
18034 {
18035 /* Return -1 if OP0 is set in NEXT. */
18036 *redefined = true;
18037 return -1;
18038 }
18039
18040 prev = next;
18041 }
18042
18043 if (next == BB_END (bb))
18044 break;
18045
18046 next = NEXT_INSN (next);
18047 }
18048
18049 return distance;
18050 }
18051
18052 /* Return the distance between INSN and the next insn that uses
18053 register number REGNO0 in memory address. Return -1 if no such
18054 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18055
18056 static int
18057 distance_agu_use (unsigned int regno0, rtx_insn *insn)
18058 {
18059 basic_block bb = BLOCK_FOR_INSN (insn);
18060 int distance = 0;
18061 bool found = false;
18062 bool redefined = false;
18063
18064 if (insn != BB_END (bb))
18065 distance = distance_agu_use_in_bb (regno0, insn, distance,
18066 NEXT_INSN (insn),
18067 &found, &redefined);
18068
18069 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18070 {
18071 edge e;
18072 edge_iterator ei;
18073 bool simple_loop = false;
18074
18075 FOR_EACH_EDGE (e, ei, bb->succs)
18076 if (e->dest == bb)
18077 {
18078 simple_loop = true;
18079 break;
18080 }
18081
18082 if (simple_loop)
18083 distance = distance_agu_use_in_bb (regno0, insn,
18084 distance, BB_HEAD (bb),
18085 &found, &redefined);
18086 else
18087 {
18088 int shortest_dist = -1;
18089 bool found_in_bb = false;
18090 bool redefined_in_bb = false;
18091
18092 FOR_EACH_EDGE (e, ei, bb->succs)
18093 {
18094 int bb_dist
18095 = distance_agu_use_in_bb (regno0, insn,
18096 distance, BB_HEAD (e->dest),
18097 &found_in_bb, &redefined_in_bb);
18098 if (found_in_bb)
18099 {
18100 if (shortest_dist < 0)
18101 shortest_dist = bb_dist;
18102 else if (bb_dist > 0)
18103 shortest_dist = MIN (bb_dist, shortest_dist);
18104
18105 found = true;
18106 }
18107 }
18108
18109 distance = shortest_dist;
18110 }
18111 }
18112
18113 if (!found || redefined)
18114 return -1;
18115
18116 return distance >> 1;
18117 }
18118
18119 /* Define this macro to tune LEA priority vs ADD, it take effect when
18120 there is a dilemma of choicing LEA or ADD
18121 Negative value: ADD is more preferred than LEA
18122 Zero: Netrual
18123 Positive value: LEA is more preferred than ADD*/
18124 #define IX86_LEA_PRIORITY 0
18125
18126 /* Return true if usage of lea INSN has performance advantage
18127 over a sequence of instructions. Instructions sequence has
18128 SPLIT_COST cycles higher latency than lea latency. */
18129
18130 static bool
18131 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
18132 unsigned int regno2, int split_cost, bool has_scale)
18133 {
18134 int dist_define, dist_use;
18135
18136 /* For Silvermont if using a 2-source or 3-source LEA for
18137 non-destructive destination purposes, or due to wanting
18138 ability to use SCALE, the use of LEA is justified. */
18139 if (TARGET_SILVERMONT || TARGET_INTEL)
18140 {
18141 if (has_scale)
18142 return true;
18143 if (split_cost < 1)
18144 return false;
18145 if (regno0 == regno1 || regno0 == regno2)
18146 return false;
18147 return true;
18148 }
18149
18150 dist_define = distance_non_agu_define (regno1, regno2, insn);
18151 dist_use = distance_agu_use (regno0, insn);
18152
18153 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18154 {
18155 /* If there is no non AGU operand definition, no AGU
18156 operand usage and split cost is 0 then both lea
18157 and non lea variants have same priority. Currently
18158 we prefer lea for 64 bit code and non lea on 32 bit
18159 code. */
18160 if (dist_use < 0 && split_cost == 0)
18161 return TARGET_64BIT || IX86_LEA_PRIORITY;
18162 else
18163 return true;
18164 }
18165
18166 /* With longer definitions distance lea is more preferable.
18167 Here we change it to take into account splitting cost and
18168 lea priority. */
18169 dist_define += split_cost + IX86_LEA_PRIORITY;
18170
18171 /* If there is no use in memory addess then we just check
18172 that split cost exceeds AGU stall. */
18173 if (dist_use < 0)
18174 return dist_define > LEA_MAX_STALL;
18175
18176 /* If this insn has both backward non-agu dependence and forward
18177 agu dependence, the one with short distance takes effect. */
18178 return dist_define >= dist_use;
18179 }
18180
18181 /* Return true if it is legal to clobber flags by INSN and
18182 false otherwise. */
18183
18184 static bool
18185 ix86_ok_to_clobber_flags (rtx_insn *insn)
18186 {
18187 basic_block bb = BLOCK_FOR_INSN (insn);
18188 df_ref use;
18189 bitmap live;
18190
18191 while (insn)
18192 {
18193 if (NONDEBUG_INSN_P (insn))
18194 {
18195 FOR_EACH_INSN_USE (use, insn)
18196 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
18197 return false;
18198
18199 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18200 return true;
18201 }
18202
18203 if (insn == BB_END (bb))
18204 break;
18205
18206 insn = NEXT_INSN (insn);
18207 }
18208
18209 live = df_get_live_out(bb);
18210 return !REGNO_REG_SET_P (live, FLAGS_REG);
18211 }
18212
18213 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18214 move and add to avoid AGU stalls. */
18215
18216 bool
18217 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
18218 {
18219 unsigned int regno0, regno1, regno2;
18220
18221 /* Check if we need to optimize. */
18222 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18223 return false;
18224
18225 /* Check it is correct to split here. */
18226 if (!ix86_ok_to_clobber_flags(insn))
18227 return false;
18228
18229 regno0 = true_regnum (operands[0]);
18230 regno1 = true_regnum (operands[1]);
18231 regno2 = true_regnum (operands[2]);
18232
18233 /* We need to split only adds with non destructive
18234 destination operand. */
18235 if (regno0 == regno1 || regno0 == regno2)
18236 return false;
18237 else
18238 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18239 }
18240
18241 /* Return true if we should emit lea instruction instead of mov
18242 instruction. */
18243
18244 bool
18245 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
18246 {
18247 unsigned int regno0, regno1;
18248
18249 /* Check if we need to optimize. */
18250 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18251 return false;
18252
18253 /* Use lea for reg to reg moves only. */
18254 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18255 return false;
18256
18257 regno0 = true_regnum (operands[0]);
18258 regno1 = true_regnum (operands[1]);
18259
18260 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18261 }
18262
18263 /* Return true if we need to split lea into a sequence of
18264 instructions to avoid AGU stalls. */
18265
18266 bool
18267 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
18268 {
18269 unsigned int regno0, regno1, regno2;
18270 int split_cost;
18271 struct ix86_address parts;
18272 int ok;
18273
18274 /* Check we need to optimize. */
18275 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18276 return false;
18277
18278 /* The "at least two components" test below might not catch simple
18279 move or zero extension insns if parts.base is non-NULL and parts.disp
18280 is const0_rtx as the only components in the address, e.g. if the
18281 register is %rbp or %r13. As this test is much cheaper and moves or
18282 zero extensions are the common case, do this check first. */
18283 if (REG_P (operands[1])
18284 || (SImode_address_operand (operands[1], VOIDmode)
18285 && REG_P (XEXP (operands[1], 0))))
18286 return false;
18287
18288 /* Check if it is OK to split here. */
18289 if (!ix86_ok_to_clobber_flags (insn))
18290 return false;
18291
18292 ok = ix86_decompose_address (operands[1], &parts);
18293 gcc_assert (ok);
18294
18295 /* There should be at least two components in the address. */
18296 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18297 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18298 return false;
18299
18300 /* We should not split into add if non legitimate pic
18301 operand is used as displacement. */
18302 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18303 return false;
18304
18305 regno0 = true_regnum (operands[0]) ;
18306 regno1 = INVALID_REGNUM;
18307 regno2 = INVALID_REGNUM;
18308
18309 if (parts.base)
18310 regno1 = true_regnum (parts.base);
18311 if (parts.index)
18312 regno2 = true_regnum (parts.index);
18313
18314 split_cost = 0;
18315
18316 /* Compute how many cycles we will add to execution time
18317 if split lea into a sequence of instructions. */
18318 if (parts.base || parts.index)
18319 {
18320 /* Have to use mov instruction if non desctructive
18321 destination form is used. */
18322 if (regno1 != regno0 && regno2 != regno0)
18323 split_cost += 1;
18324
18325 /* Have to add index to base if both exist. */
18326 if (parts.base && parts.index)
18327 split_cost += 1;
18328
18329 /* Have to use shift and adds if scale is 2 or greater. */
18330 if (parts.scale > 1)
18331 {
18332 if (regno0 != regno1)
18333 split_cost += 1;
18334 else if (regno2 == regno0)
18335 split_cost += 4;
18336 else
18337 split_cost += parts.scale;
18338 }
18339
18340 /* Have to use add instruction with immediate if
18341 disp is non zero. */
18342 if (parts.disp && parts.disp != const0_rtx)
18343 split_cost += 1;
18344
18345 /* Subtract the price of lea. */
18346 split_cost -= 1;
18347 }
18348
18349 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18350 parts.scale > 1);
18351 }
18352
18353 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18354 matches destination. RTX includes clobber of FLAGS_REG. */
18355
18356 static void
18357 ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
18358 rtx dst, rtx src)
18359 {
18360 rtx op, clob;
18361
18362 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18363 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18364
18365 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18366 }
18367
18368 /* Return true if regno1 def is nearest to the insn. */
18369
18370 static bool
18371 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
18372 {
18373 rtx_insn *prev = insn;
18374 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
18375
18376 if (insn == start)
18377 return false;
18378 while (prev && prev != start)
18379 {
18380 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18381 {
18382 prev = PREV_INSN (prev);
18383 continue;
18384 }
18385 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18386 return true;
18387 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18388 return false;
18389 prev = PREV_INSN (prev);
18390 }
18391
18392 /* None of the regs is defined in the bb. */
18393 return false;
18394 }
18395
18396 /* Split lea instructions into a sequence of instructions
18397 which are executed on ALU to avoid AGU stalls.
18398 It is assumed that it is allowed to clobber flags register
18399 at lea position. */
18400
18401 void
18402 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], enum machine_mode mode)
18403 {
18404 unsigned int regno0, regno1, regno2;
18405 struct ix86_address parts;
18406 rtx target, tmp;
18407 int ok, adds;
18408
18409 ok = ix86_decompose_address (operands[1], &parts);
18410 gcc_assert (ok);
18411
18412 target = gen_lowpart (mode, operands[0]);
18413
18414 regno0 = true_regnum (target);
18415 regno1 = INVALID_REGNUM;
18416 regno2 = INVALID_REGNUM;
18417
18418 if (parts.base)
18419 {
18420 parts.base = gen_lowpart (mode, parts.base);
18421 regno1 = true_regnum (parts.base);
18422 }
18423
18424 if (parts.index)
18425 {
18426 parts.index = gen_lowpart (mode, parts.index);
18427 regno2 = true_regnum (parts.index);
18428 }
18429
18430 if (parts.disp)
18431 parts.disp = gen_lowpart (mode, parts.disp);
18432
18433 if (parts.scale > 1)
18434 {
18435 /* Case r1 = r1 + ... */
18436 if (regno1 == regno0)
18437 {
18438 /* If we have a case r1 = r1 + C * r2 then we
18439 should use multiplication which is very
18440 expensive. Assume cost model is wrong if we
18441 have such case here. */
18442 gcc_assert (regno2 != regno0);
18443
18444 for (adds = parts.scale; adds > 0; adds--)
18445 ix86_emit_binop (PLUS, mode, target, parts.index);
18446 }
18447 else
18448 {
18449 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18450 if (regno0 != regno2)
18451 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18452
18453 /* Use shift for scaling. */
18454 ix86_emit_binop (ASHIFT, mode, target,
18455 GEN_INT (exact_log2 (parts.scale)));
18456
18457 if (parts.base)
18458 ix86_emit_binop (PLUS, mode, target, parts.base);
18459
18460 if (parts.disp && parts.disp != const0_rtx)
18461 ix86_emit_binop (PLUS, mode, target, parts.disp);
18462 }
18463 }
18464 else if (!parts.base && !parts.index)
18465 {
18466 gcc_assert(parts.disp);
18467 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18468 }
18469 else
18470 {
18471 if (!parts.base)
18472 {
18473 if (regno0 != regno2)
18474 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18475 }
18476 else if (!parts.index)
18477 {
18478 if (regno0 != regno1)
18479 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
18480 }
18481 else
18482 {
18483 if (regno0 == regno1)
18484 tmp = parts.index;
18485 else if (regno0 == regno2)
18486 tmp = parts.base;
18487 else
18488 {
18489 rtx tmp1;
18490
18491 /* Find better operand for SET instruction, depending
18492 on which definition is farther from the insn. */
18493 if (find_nearest_reg_def (insn, regno1, regno2))
18494 tmp = parts.index, tmp1 = parts.base;
18495 else
18496 tmp = parts.base, tmp1 = parts.index;
18497
18498 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18499
18500 if (parts.disp && parts.disp != const0_rtx)
18501 ix86_emit_binop (PLUS, mode, target, parts.disp);
18502
18503 ix86_emit_binop (PLUS, mode, target, tmp1);
18504 return;
18505 }
18506
18507 ix86_emit_binop (PLUS, mode, target, tmp);
18508 }
18509
18510 if (parts.disp && parts.disp != const0_rtx)
18511 ix86_emit_binop (PLUS, mode, target, parts.disp);
18512 }
18513 }
18514
18515 /* Return true if it is ok to optimize an ADD operation to LEA
18516 operation to avoid flag register consumation. For most processors,
18517 ADD is faster than LEA. For the processors like BONNELL, if the
18518 destination register of LEA holds an actual address which will be
18519 used soon, LEA is better and otherwise ADD is better. */
18520
18521 bool
18522 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
18523 {
18524 unsigned int regno0 = true_regnum (operands[0]);
18525 unsigned int regno1 = true_regnum (operands[1]);
18526 unsigned int regno2 = true_regnum (operands[2]);
18527
18528 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18529 if (regno0 != regno1 && regno0 != regno2)
18530 return true;
18531
18532 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18533 return false;
18534
18535 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
18536 }
18537
18538 /* Return true if destination reg of SET_BODY is shift count of
18539 USE_BODY. */
18540
18541 static bool
18542 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
18543 {
18544 rtx set_dest;
18545 rtx shift_rtx;
18546 int i;
18547
18548 /* Retrieve destination of SET_BODY. */
18549 switch (GET_CODE (set_body))
18550 {
18551 case SET:
18552 set_dest = SET_DEST (set_body);
18553 if (!set_dest || !REG_P (set_dest))
18554 return false;
18555 break;
18556 case PARALLEL:
18557 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
18558 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
18559 use_body))
18560 return true;
18561 default:
18562 return false;
18563 break;
18564 }
18565
18566 /* Retrieve shift count of USE_BODY. */
18567 switch (GET_CODE (use_body))
18568 {
18569 case SET:
18570 shift_rtx = XEXP (use_body, 1);
18571 break;
18572 case PARALLEL:
18573 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
18574 if (ix86_dep_by_shift_count_body (set_body,
18575 XVECEXP (use_body, 0, i)))
18576 return true;
18577 default:
18578 return false;
18579 break;
18580 }
18581
18582 if (shift_rtx
18583 && (GET_CODE (shift_rtx) == ASHIFT
18584 || GET_CODE (shift_rtx) == LSHIFTRT
18585 || GET_CODE (shift_rtx) == ASHIFTRT
18586 || GET_CODE (shift_rtx) == ROTATE
18587 || GET_CODE (shift_rtx) == ROTATERT))
18588 {
18589 rtx shift_count = XEXP (shift_rtx, 1);
18590
18591 /* Return true if shift count is dest of SET_BODY. */
18592 if (REG_P (shift_count))
18593 {
18594 /* Add check since it can be invoked before register
18595 allocation in pre-reload schedule. */
18596 if (reload_completed
18597 && true_regnum (set_dest) == true_regnum (shift_count))
18598 return true;
18599 else if (REGNO(set_dest) == REGNO(shift_count))
18600 return true;
18601 }
18602 }
18603
18604 return false;
18605 }
18606
18607 /* Return true if destination reg of SET_INSN is shift count of
18608 USE_INSN. */
18609
18610 bool
18611 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
18612 {
18613 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
18614 PATTERN (use_insn));
18615 }
18616
18617 /* Return TRUE or FALSE depending on whether the unary operator meets the
18618 appropriate constraints. */
18619
18620 bool
18621 ix86_unary_operator_ok (enum rtx_code,
18622 enum machine_mode,
18623 rtx operands[2])
18624 {
18625 /* If one of operands is memory, source and destination must match. */
18626 if ((MEM_P (operands[0])
18627 || MEM_P (operands[1]))
18628 && ! rtx_equal_p (operands[0], operands[1]))
18629 return false;
18630 return true;
18631 }
18632
18633 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
18634 are ok, keeping in mind the possible movddup alternative. */
18635
18636 bool
18637 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
18638 {
18639 if (MEM_P (operands[0]))
18640 return rtx_equal_p (operands[0], operands[1 + high]);
18641 if (MEM_P (operands[1]) && MEM_P (operands[2]))
18642 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
18643 return true;
18644 }
18645
18646 /* Post-reload splitter for converting an SF or DFmode value in an
18647 SSE register into an unsigned SImode. */
18648
18649 void
18650 ix86_split_convert_uns_si_sse (rtx operands[])
18651 {
18652 enum machine_mode vecmode;
18653 rtx value, large, zero_or_two31, input, two31, x;
18654
18655 large = operands[1];
18656 zero_or_two31 = operands[2];
18657 input = operands[3];
18658 two31 = operands[4];
18659 vecmode = GET_MODE (large);
18660 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
18661
18662 /* Load up the value into the low element. We must ensure that the other
18663 elements are valid floats -- zero is the easiest such value. */
18664 if (MEM_P (input))
18665 {
18666 if (vecmode == V4SFmode)
18667 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
18668 else
18669 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
18670 }
18671 else
18672 {
18673 input = gen_rtx_REG (vecmode, REGNO (input));
18674 emit_move_insn (value, CONST0_RTX (vecmode));
18675 if (vecmode == V4SFmode)
18676 emit_insn (gen_sse_movss (value, value, input));
18677 else
18678 emit_insn (gen_sse2_movsd (value, value, input));
18679 }
18680
18681 emit_move_insn (large, two31);
18682 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
18683
18684 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
18685 emit_insn (gen_rtx_SET (VOIDmode, large, x));
18686
18687 x = gen_rtx_AND (vecmode, zero_or_two31, large);
18688 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
18689
18690 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
18691 emit_insn (gen_rtx_SET (VOIDmode, value, x));
18692
18693 large = gen_rtx_REG (V4SImode, REGNO (large));
18694 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
18695
18696 x = gen_rtx_REG (V4SImode, REGNO (value));
18697 if (vecmode == V4SFmode)
18698 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
18699 else
18700 emit_insn (gen_sse2_cvttpd2dq (x, value));
18701 value = x;
18702
18703 emit_insn (gen_xorv4si3 (value, value, large));
18704 }
18705
18706 /* Convert an unsigned DImode value into a DFmode, using only SSE.
18707 Expects the 64-bit DImode to be supplied in a pair of integral
18708 registers. Requires SSE2; will use SSE3 if available. For x86_32,
18709 -mfpmath=sse, !optimize_size only. */
18710
18711 void
18712 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
18713 {
18714 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
18715 rtx int_xmm, fp_xmm;
18716 rtx biases, exponents;
18717 rtx x;
18718
18719 int_xmm = gen_reg_rtx (V4SImode);
18720 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
18721 emit_insn (gen_movdi_to_sse (int_xmm, input));
18722 else if (TARGET_SSE_SPLIT_REGS)
18723 {
18724 emit_clobber (int_xmm);
18725 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
18726 }
18727 else
18728 {
18729 x = gen_reg_rtx (V2DImode);
18730 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
18731 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
18732 }
18733
18734 x = gen_rtx_CONST_VECTOR (V4SImode,
18735 gen_rtvec (4, GEN_INT (0x43300000UL),
18736 GEN_INT (0x45300000UL),
18737 const0_rtx, const0_rtx));
18738 exponents = validize_mem (force_const_mem (V4SImode, x));
18739
18740 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
18741 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
18742
18743 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
18744 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
18745 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
18746 (0x1.0p84 + double(fp_value_hi_xmm)).
18747 Note these exponents differ by 32. */
18748
18749 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
18750
18751 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
18752 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
18753 real_ldexp (&bias_lo_rvt, &dconst1, 52);
18754 real_ldexp (&bias_hi_rvt, &dconst1, 84);
18755 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
18756 x = const_double_from_real_value (bias_hi_rvt, DFmode);
18757 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
18758 biases = validize_mem (force_const_mem (V2DFmode, biases));
18759 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
18760
18761 /* Add the upper and lower DFmode values together. */
18762 if (TARGET_SSE3)
18763 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
18764 else
18765 {
18766 x = copy_to_mode_reg (V2DFmode, fp_xmm);
18767 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
18768 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
18769 }
18770
18771 ix86_expand_vector_extract (false, target, fp_xmm, 0);
18772 }
18773
18774 /* Not used, but eases macroization of patterns. */
18775 void
18776 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
18777 {
18778 gcc_unreachable ();
18779 }
18780
18781 /* Convert an unsigned SImode value into a DFmode. Only currently used
18782 for SSE, but applicable anywhere. */
18783
18784 void
18785 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
18786 {
18787 REAL_VALUE_TYPE TWO31r;
18788 rtx x, fp;
18789
18790 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
18791 NULL, 1, OPTAB_DIRECT);
18792
18793 fp = gen_reg_rtx (DFmode);
18794 emit_insn (gen_floatsidf2 (fp, x));
18795
18796 real_ldexp (&TWO31r, &dconst1, 31);
18797 x = const_double_from_real_value (TWO31r, DFmode);
18798
18799 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
18800 if (x != target)
18801 emit_move_insn (target, x);
18802 }
18803
18804 /* Convert a signed DImode value into a DFmode. Only used for SSE in
18805 32-bit mode; otherwise we have a direct convert instruction. */
18806
18807 void
18808 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
18809 {
18810 REAL_VALUE_TYPE TWO32r;
18811 rtx fp_lo, fp_hi, x;
18812
18813 fp_lo = gen_reg_rtx (DFmode);
18814 fp_hi = gen_reg_rtx (DFmode);
18815
18816 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
18817
18818 real_ldexp (&TWO32r, &dconst1, 32);
18819 x = const_double_from_real_value (TWO32r, DFmode);
18820 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
18821
18822 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
18823
18824 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
18825 0, OPTAB_DIRECT);
18826 if (x != target)
18827 emit_move_insn (target, x);
18828 }
18829
18830 /* Convert an unsigned SImode value into a SFmode, using only SSE.
18831 For x86_32, -mfpmath=sse, !optimize_size only. */
18832 void
18833 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
18834 {
18835 REAL_VALUE_TYPE ONE16r;
18836 rtx fp_hi, fp_lo, int_hi, int_lo, x;
18837
18838 real_ldexp (&ONE16r, &dconst1, 16);
18839 x = const_double_from_real_value (ONE16r, SFmode);
18840 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
18841 NULL, 0, OPTAB_DIRECT);
18842 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
18843 NULL, 0, OPTAB_DIRECT);
18844 fp_hi = gen_reg_rtx (SFmode);
18845 fp_lo = gen_reg_rtx (SFmode);
18846 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
18847 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
18848 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
18849 0, OPTAB_DIRECT);
18850 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
18851 0, OPTAB_DIRECT);
18852 if (!rtx_equal_p (target, fp_hi))
18853 emit_move_insn (target, fp_hi);
18854 }
18855
18856 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
18857 a vector of unsigned ints VAL to vector of floats TARGET. */
18858
18859 void
18860 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
18861 {
18862 rtx tmp[8];
18863 REAL_VALUE_TYPE TWO16r;
18864 enum machine_mode intmode = GET_MODE (val);
18865 enum machine_mode fltmode = GET_MODE (target);
18866 rtx (*cvt) (rtx, rtx);
18867
18868 if (intmode == V4SImode)
18869 cvt = gen_floatv4siv4sf2;
18870 else
18871 cvt = gen_floatv8siv8sf2;
18872 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
18873 tmp[0] = force_reg (intmode, tmp[0]);
18874 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
18875 OPTAB_DIRECT);
18876 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
18877 NULL_RTX, 1, OPTAB_DIRECT);
18878 tmp[3] = gen_reg_rtx (fltmode);
18879 emit_insn (cvt (tmp[3], tmp[1]));
18880 tmp[4] = gen_reg_rtx (fltmode);
18881 emit_insn (cvt (tmp[4], tmp[2]));
18882 real_ldexp (&TWO16r, &dconst1, 16);
18883 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
18884 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
18885 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
18886 OPTAB_DIRECT);
18887 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
18888 OPTAB_DIRECT);
18889 if (tmp[7] != target)
18890 emit_move_insn (target, tmp[7]);
18891 }
18892
18893 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
18894 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
18895 This is done by doing just signed conversion if < 0x1p31, and otherwise by
18896 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
18897
18898 rtx
18899 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
18900 {
18901 REAL_VALUE_TYPE TWO31r;
18902 rtx two31r, tmp[4];
18903 enum machine_mode mode = GET_MODE (val);
18904 enum machine_mode scalarmode = GET_MODE_INNER (mode);
18905 enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
18906 rtx (*cmp) (rtx, rtx, rtx, rtx);
18907 int i;
18908
18909 for (i = 0; i < 3; i++)
18910 tmp[i] = gen_reg_rtx (mode);
18911 real_ldexp (&TWO31r, &dconst1, 31);
18912 two31r = const_double_from_real_value (TWO31r, scalarmode);
18913 two31r = ix86_build_const_vector (mode, 1, two31r);
18914 two31r = force_reg (mode, two31r);
18915 switch (mode)
18916 {
18917 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
18918 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
18919 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
18920 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
18921 default: gcc_unreachable ();
18922 }
18923 tmp[3] = gen_rtx_LE (mode, two31r, val);
18924 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
18925 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
18926 0, OPTAB_DIRECT);
18927 if (intmode == V4SImode || TARGET_AVX2)
18928 *xorp = expand_simple_binop (intmode, ASHIFT,
18929 gen_lowpart (intmode, tmp[0]),
18930 GEN_INT (31), NULL_RTX, 0,
18931 OPTAB_DIRECT);
18932 else
18933 {
18934 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
18935 two31 = ix86_build_const_vector (intmode, 1, two31);
18936 *xorp = expand_simple_binop (intmode, AND,
18937 gen_lowpart (intmode, tmp[0]),
18938 two31, NULL_RTX, 0,
18939 OPTAB_DIRECT);
18940 }
18941 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
18942 0, OPTAB_DIRECT);
18943 }
18944
18945 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
18946 then replicate the value for all elements of the vector
18947 register. */
18948
18949 rtx
18950 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
18951 {
18952 int i, n_elt;
18953 rtvec v;
18954 enum machine_mode scalar_mode;
18955
18956 switch (mode)
18957 {
18958 case V64QImode:
18959 case V32QImode:
18960 case V16QImode:
18961 case V32HImode:
18962 case V16HImode:
18963 case V8HImode:
18964 case V16SImode:
18965 case V8SImode:
18966 case V4SImode:
18967 case V8DImode:
18968 case V4DImode:
18969 case V2DImode:
18970 gcc_assert (vect);
18971 case V16SFmode:
18972 case V8SFmode:
18973 case V4SFmode:
18974 case V8DFmode:
18975 case V4DFmode:
18976 case V2DFmode:
18977 n_elt = GET_MODE_NUNITS (mode);
18978 v = rtvec_alloc (n_elt);
18979 scalar_mode = GET_MODE_INNER (mode);
18980
18981 RTVEC_ELT (v, 0) = value;
18982
18983 for (i = 1; i < n_elt; ++i)
18984 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
18985
18986 return gen_rtx_CONST_VECTOR (mode, v);
18987
18988 default:
18989 gcc_unreachable ();
18990 }
18991 }
18992
18993 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
18994 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
18995 for an SSE register. If VECT is true, then replicate the mask for
18996 all elements of the vector register. If INVERT is true, then create
18997 a mask excluding the sign bit. */
18998
18999 rtx
19000 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
19001 {
19002 enum machine_mode vec_mode, imode;
19003 HOST_WIDE_INT hi, lo;
19004 int shift = 63;
19005 rtx v;
19006 rtx mask;
19007
19008 /* Find the sign bit, sign extended to 2*HWI. */
19009 switch (mode)
19010 {
19011 case V16SImode:
19012 case V16SFmode:
19013 case V8SImode:
19014 case V4SImode:
19015 case V8SFmode:
19016 case V4SFmode:
19017 vec_mode = mode;
19018 mode = GET_MODE_INNER (mode);
19019 imode = SImode;
19020 lo = 0x80000000, hi = lo < 0;
19021 break;
19022
19023 case V8DImode:
19024 case V4DImode:
19025 case V2DImode:
19026 case V8DFmode:
19027 case V4DFmode:
19028 case V2DFmode:
19029 vec_mode = mode;
19030 mode = GET_MODE_INNER (mode);
19031 imode = DImode;
19032 if (HOST_BITS_PER_WIDE_INT >= 64)
19033 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
19034 else
19035 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19036 break;
19037
19038 case TImode:
19039 case TFmode:
19040 vec_mode = VOIDmode;
19041 if (HOST_BITS_PER_WIDE_INT >= 64)
19042 {
19043 imode = TImode;
19044 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
19045 }
19046 else
19047 {
19048 rtvec vec;
19049
19050 imode = DImode;
19051 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19052
19053 if (invert)
19054 {
19055 lo = ~lo, hi = ~hi;
19056 v = constm1_rtx;
19057 }
19058 else
19059 v = const0_rtx;
19060
19061 mask = immed_double_const (lo, hi, imode);
19062
19063 vec = gen_rtvec (2, v, mask);
19064 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
19065 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
19066
19067 return v;
19068 }
19069 break;
19070
19071 default:
19072 gcc_unreachable ();
19073 }
19074
19075 if (invert)
19076 lo = ~lo, hi = ~hi;
19077
19078 /* Force this value into the low part of a fp vector constant. */
19079 mask = immed_double_const (lo, hi, imode);
19080 mask = gen_lowpart (mode, mask);
19081
19082 if (vec_mode == VOIDmode)
19083 return force_reg (mode, mask);
19084
19085 v = ix86_build_const_vector (vec_mode, vect, mask);
19086 return force_reg (vec_mode, v);
19087 }
19088
19089 /* Generate code for floating point ABS or NEG. */
19090
19091 void
19092 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
19093 rtx operands[])
19094 {
19095 rtx mask, set, dst, src;
19096 bool use_sse = false;
19097 bool vector_mode = VECTOR_MODE_P (mode);
19098 enum machine_mode vmode = mode;
19099
19100 if (vector_mode)
19101 use_sse = true;
19102 else if (mode == TFmode)
19103 use_sse = true;
19104 else if (TARGET_SSE_MATH)
19105 {
19106 use_sse = SSE_FLOAT_MODE_P (mode);
19107 if (mode == SFmode)
19108 vmode = V4SFmode;
19109 else if (mode == DFmode)
19110 vmode = V2DFmode;
19111 }
19112
19113 /* NEG and ABS performed with SSE use bitwise mask operations.
19114 Create the appropriate mask now. */
19115 if (use_sse)
19116 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19117 else
19118 mask = NULL_RTX;
19119
19120 dst = operands[0];
19121 src = operands[1];
19122
19123 set = gen_rtx_fmt_e (code, mode, src);
19124 set = gen_rtx_SET (VOIDmode, dst, set);
19125
19126 if (mask)
19127 {
19128 rtx use, clob;
19129 rtvec par;
19130
19131 use = gen_rtx_USE (VOIDmode, mask);
19132 if (vector_mode)
19133 par = gen_rtvec (2, set, use);
19134 else
19135 {
19136 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19137 par = gen_rtvec (3, set, use, clob);
19138 }
19139 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19140 }
19141 else
19142 emit_insn (set);
19143 }
19144
19145 /* Expand a copysign operation. Special case operand 0 being a constant. */
19146
19147 void
19148 ix86_expand_copysign (rtx operands[])
19149 {
19150 enum machine_mode mode, vmode;
19151 rtx dest, op0, op1, mask, nmask;
19152
19153 dest = operands[0];
19154 op0 = operands[1];
19155 op1 = operands[2];
19156
19157 mode = GET_MODE (dest);
19158
19159 if (mode == SFmode)
19160 vmode = V4SFmode;
19161 else if (mode == DFmode)
19162 vmode = V2DFmode;
19163 else
19164 vmode = mode;
19165
19166 if (GET_CODE (op0) == CONST_DOUBLE)
19167 {
19168 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19169
19170 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19171 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19172
19173 if (mode == SFmode || mode == DFmode)
19174 {
19175 if (op0 == CONST0_RTX (mode))
19176 op0 = CONST0_RTX (vmode);
19177 else
19178 {
19179 rtx v = ix86_build_const_vector (vmode, false, op0);
19180
19181 op0 = force_reg (vmode, v);
19182 }
19183 }
19184 else if (op0 != CONST0_RTX (mode))
19185 op0 = force_reg (mode, op0);
19186
19187 mask = ix86_build_signbit_mask (vmode, 0, 0);
19188
19189 if (mode == SFmode)
19190 copysign_insn = gen_copysignsf3_const;
19191 else if (mode == DFmode)
19192 copysign_insn = gen_copysigndf3_const;
19193 else
19194 copysign_insn = gen_copysigntf3_const;
19195
19196 emit_insn (copysign_insn (dest, op0, op1, mask));
19197 }
19198 else
19199 {
19200 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19201
19202 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19203 mask = ix86_build_signbit_mask (vmode, 0, 0);
19204
19205 if (mode == SFmode)
19206 copysign_insn = gen_copysignsf3_var;
19207 else if (mode == DFmode)
19208 copysign_insn = gen_copysigndf3_var;
19209 else
19210 copysign_insn = gen_copysigntf3_var;
19211
19212 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19213 }
19214 }
19215
19216 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19217 be a constant, and so has already been expanded into a vector constant. */
19218
19219 void
19220 ix86_split_copysign_const (rtx operands[])
19221 {
19222 enum machine_mode mode, vmode;
19223 rtx dest, op0, mask, x;
19224
19225 dest = operands[0];
19226 op0 = operands[1];
19227 mask = operands[3];
19228
19229 mode = GET_MODE (dest);
19230 vmode = GET_MODE (mask);
19231
19232 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19233 x = gen_rtx_AND (vmode, dest, mask);
19234 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19235
19236 if (op0 != CONST0_RTX (vmode))
19237 {
19238 x = gen_rtx_IOR (vmode, dest, op0);
19239 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19240 }
19241 }
19242
19243 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19244 so we have to do two masks. */
19245
19246 void
19247 ix86_split_copysign_var (rtx operands[])
19248 {
19249 enum machine_mode mode, vmode;
19250 rtx dest, scratch, op0, op1, mask, nmask, x;
19251
19252 dest = operands[0];
19253 scratch = operands[1];
19254 op0 = operands[2];
19255 op1 = operands[3];
19256 nmask = operands[4];
19257 mask = operands[5];
19258
19259 mode = GET_MODE (dest);
19260 vmode = GET_MODE (mask);
19261
19262 if (rtx_equal_p (op0, op1))
19263 {
19264 /* Shouldn't happen often (it's useless, obviously), but when it does
19265 we'd generate incorrect code if we continue below. */
19266 emit_move_insn (dest, op0);
19267 return;
19268 }
19269
19270 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19271 {
19272 gcc_assert (REGNO (op1) == REGNO (scratch));
19273
19274 x = gen_rtx_AND (vmode, scratch, mask);
19275 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19276
19277 dest = mask;
19278 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19279 x = gen_rtx_NOT (vmode, dest);
19280 x = gen_rtx_AND (vmode, x, op0);
19281 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19282 }
19283 else
19284 {
19285 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19286 {
19287 x = gen_rtx_AND (vmode, scratch, mask);
19288 }
19289 else /* alternative 2,4 */
19290 {
19291 gcc_assert (REGNO (mask) == REGNO (scratch));
19292 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19293 x = gen_rtx_AND (vmode, scratch, op1);
19294 }
19295 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19296
19297 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19298 {
19299 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19300 x = gen_rtx_AND (vmode, dest, nmask);
19301 }
19302 else /* alternative 3,4 */
19303 {
19304 gcc_assert (REGNO (nmask) == REGNO (dest));
19305 dest = nmask;
19306 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19307 x = gen_rtx_AND (vmode, dest, op0);
19308 }
19309 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19310 }
19311
19312 x = gen_rtx_IOR (vmode, dest, scratch);
19313 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19314 }
19315
19316 /* Return TRUE or FALSE depending on whether the first SET in INSN
19317 has source and destination with matching CC modes, and that the
19318 CC mode is at least as constrained as REQ_MODE. */
19319
19320 bool
19321 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
19322 {
19323 rtx set;
19324 enum machine_mode set_mode;
19325
19326 set = PATTERN (insn);
19327 if (GET_CODE (set) == PARALLEL)
19328 set = XVECEXP (set, 0, 0);
19329 gcc_assert (GET_CODE (set) == SET);
19330 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19331
19332 set_mode = GET_MODE (SET_DEST (set));
19333 switch (set_mode)
19334 {
19335 case CCNOmode:
19336 if (req_mode != CCNOmode
19337 && (req_mode != CCmode
19338 || XEXP (SET_SRC (set), 1) != const0_rtx))
19339 return false;
19340 break;
19341 case CCmode:
19342 if (req_mode == CCGCmode)
19343 return false;
19344 /* FALLTHRU */
19345 case CCGCmode:
19346 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19347 return false;
19348 /* FALLTHRU */
19349 case CCGOCmode:
19350 if (req_mode == CCZmode)
19351 return false;
19352 /* FALLTHRU */
19353 case CCZmode:
19354 break;
19355
19356 case CCAmode:
19357 case CCCmode:
19358 case CCOmode:
19359 case CCSmode:
19360 if (set_mode != req_mode)
19361 return false;
19362 break;
19363
19364 default:
19365 gcc_unreachable ();
19366 }
19367
19368 return GET_MODE (SET_SRC (set)) == set_mode;
19369 }
19370
19371 /* Generate insn patterns to do an integer compare of OPERANDS. */
19372
19373 static rtx
19374 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19375 {
19376 enum machine_mode cmpmode;
19377 rtx tmp, flags;
19378
19379 cmpmode = SELECT_CC_MODE (code, op0, op1);
19380 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19381
19382 /* This is very simple, but making the interface the same as in the
19383 FP case makes the rest of the code easier. */
19384 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19385 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19386
19387 /* Return the test that should be put into the flags user, i.e.
19388 the bcc, scc, or cmov instruction. */
19389 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19390 }
19391
19392 /* Figure out whether to use ordered or unordered fp comparisons.
19393 Return the appropriate mode to use. */
19394
19395 enum machine_mode
19396 ix86_fp_compare_mode (enum rtx_code)
19397 {
19398 /* ??? In order to make all comparisons reversible, we do all comparisons
19399 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19400 all forms trapping and nontrapping comparisons, we can make inequality
19401 comparisons trapping again, since it results in better code when using
19402 FCOM based compares. */
19403 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19404 }
19405
19406 enum machine_mode
19407 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19408 {
19409 enum machine_mode mode = GET_MODE (op0);
19410
19411 if (SCALAR_FLOAT_MODE_P (mode))
19412 {
19413 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19414 return ix86_fp_compare_mode (code);
19415 }
19416
19417 switch (code)
19418 {
19419 /* Only zero flag is needed. */
19420 case EQ: /* ZF=0 */
19421 case NE: /* ZF!=0 */
19422 return CCZmode;
19423 /* Codes needing carry flag. */
19424 case GEU: /* CF=0 */
19425 case LTU: /* CF=1 */
19426 /* Detect overflow checks. They need just the carry flag. */
19427 if (GET_CODE (op0) == PLUS
19428 && rtx_equal_p (op1, XEXP (op0, 0)))
19429 return CCCmode;
19430 else
19431 return CCmode;
19432 case GTU: /* CF=0 & ZF=0 */
19433 case LEU: /* CF=1 | ZF=1 */
19434 return CCmode;
19435 /* Codes possibly doable only with sign flag when
19436 comparing against zero. */
19437 case GE: /* SF=OF or SF=0 */
19438 case LT: /* SF<>OF or SF=1 */
19439 if (op1 == const0_rtx)
19440 return CCGOCmode;
19441 else
19442 /* For other cases Carry flag is not required. */
19443 return CCGCmode;
19444 /* Codes doable only with sign flag when comparing
19445 against zero, but we miss jump instruction for it
19446 so we need to use relational tests against overflow
19447 that thus needs to be zero. */
19448 case GT: /* ZF=0 & SF=OF */
19449 case LE: /* ZF=1 | SF<>OF */
19450 if (op1 == const0_rtx)
19451 return CCNOmode;
19452 else
19453 return CCGCmode;
19454 /* strcmp pattern do (use flags) and combine may ask us for proper
19455 mode. */
19456 case USE:
19457 return CCmode;
19458 default:
19459 gcc_unreachable ();
19460 }
19461 }
19462
19463 /* Return the fixed registers used for condition codes. */
19464
19465 static bool
19466 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19467 {
19468 *p1 = FLAGS_REG;
19469 *p2 = FPSR_REG;
19470 return true;
19471 }
19472
19473 /* If two condition code modes are compatible, return a condition code
19474 mode which is compatible with both. Otherwise, return
19475 VOIDmode. */
19476
19477 static enum machine_mode
19478 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
19479 {
19480 if (m1 == m2)
19481 return m1;
19482
19483 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19484 return VOIDmode;
19485
19486 if ((m1 == CCGCmode && m2 == CCGOCmode)
19487 || (m1 == CCGOCmode && m2 == CCGCmode))
19488 return CCGCmode;
19489
19490 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19491 return m2;
19492 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19493 return m1;
19494
19495 switch (m1)
19496 {
19497 default:
19498 gcc_unreachable ();
19499
19500 case CCmode:
19501 case CCGCmode:
19502 case CCGOCmode:
19503 case CCNOmode:
19504 case CCAmode:
19505 case CCCmode:
19506 case CCOmode:
19507 case CCSmode:
19508 case CCZmode:
19509 switch (m2)
19510 {
19511 default:
19512 return VOIDmode;
19513
19514 case CCmode:
19515 case CCGCmode:
19516 case CCGOCmode:
19517 case CCNOmode:
19518 case CCAmode:
19519 case CCCmode:
19520 case CCOmode:
19521 case CCSmode:
19522 case CCZmode:
19523 return CCmode;
19524 }
19525
19526 case CCFPmode:
19527 case CCFPUmode:
19528 /* These are only compatible with themselves, which we already
19529 checked above. */
19530 return VOIDmode;
19531 }
19532 }
19533
19534
19535 /* Return a comparison we can do and that it is equivalent to
19536 swap_condition (code) apart possibly from orderedness.
19537 But, never change orderedness if TARGET_IEEE_FP, returning
19538 UNKNOWN in that case if necessary. */
19539
19540 static enum rtx_code
19541 ix86_fp_swap_condition (enum rtx_code code)
19542 {
19543 switch (code)
19544 {
19545 case GT: /* GTU - CF=0 & ZF=0 */
19546 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
19547 case GE: /* GEU - CF=0 */
19548 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
19549 case UNLT: /* LTU - CF=1 */
19550 return TARGET_IEEE_FP ? UNKNOWN : GT;
19551 case UNLE: /* LEU - CF=1 | ZF=1 */
19552 return TARGET_IEEE_FP ? UNKNOWN : GE;
19553 default:
19554 return swap_condition (code);
19555 }
19556 }
19557
19558 /* Return cost of comparison CODE using the best strategy for performance.
19559 All following functions do use number of instructions as a cost metrics.
19560 In future this should be tweaked to compute bytes for optimize_size and
19561 take into account performance of various instructions on various CPUs. */
19562
19563 static int
19564 ix86_fp_comparison_cost (enum rtx_code code)
19565 {
19566 int arith_cost;
19567
19568 /* The cost of code using bit-twiddling on %ah. */
19569 switch (code)
19570 {
19571 case UNLE:
19572 case UNLT:
19573 case LTGT:
19574 case GT:
19575 case GE:
19576 case UNORDERED:
19577 case ORDERED:
19578 case UNEQ:
19579 arith_cost = 4;
19580 break;
19581 case LT:
19582 case NE:
19583 case EQ:
19584 case UNGE:
19585 arith_cost = TARGET_IEEE_FP ? 5 : 4;
19586 break;
19587 case LE:
19588 case UNGT:
19589 arith_cost = TARGET_IEEE_FP ? 6 : 4;
19590 break;
19591 default:
19592 gcc_unreachable ();
19593 }
19594
19595 switch (ix86_fp_comparison_strategy (code))
19596 {
19597 case IX86_FPCMP_COMI:
19598 return arith_cost > 4 ? 3 : 2;
19599 case IX86_FPCMP_SAHF:
19600 return arith_cost > 4 ? 4 : 3;
19601 default:
19602 return arith_cost;
19603 }
19604 }
19605
19606 /* Return strategy to use for floating-point. We assume that fcomi is always
19607 preferrable where available, since that is also true when looking at size
19608 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
19609
19610 enum ix86_fpcmp_strategy
19611 ix86_fp_comparison_strategy (enum rtx_code)
19612 {
19613 /* Do fcomi/sahf based test when profitable. */
19614
19615 if (TARGET_CMOVE)
19616 return IX86_FPCMP_COMI;
19617
19618 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
19619 return IX86_FPCMP_SAHF;
19620
19621 return IX86_FPCMP_ARITH;
19622 }
19623
19624 /* Swap, force into registers, or otherwise massage the two operands
19625 to a fp comparison. The operands are updated in place; the new
19626 comparison code is returned. */
19627
19628 static enum rtx_code
19629 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
19630 {
19631 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
19632 rtx op0 = *pop0, op1 = *pop1;
19633 enum machine_mode op_mode = GET_MODE (op0);
19634 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
19635
19636 /* All of the unordered compare instructions only work on registers.
19637 The same is true of the fcomi compare instructions. The XFmode
19638 compare instructions require registers except when comparing
19639 against zero or when converting operand 1 from fixed point to
19640 floating point. */
19641
19642 if (!is_sse
19643 && (fpcmp_mode == CCFPUmode
19644 || (op_mode == XFmode
19645 && ! (standard_80387_constant_p (op0) == 1
19646 || standard_80387_constant_p (op1) == 1)
19647 && GET_CODE (op1) != FLOAT)
19648 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
19649 {
19650 op0 = force_reg (op_mode, op0);
19651 op1 = force_reg (op_mode, op1);
19652 }
19653 else
19654 {
19655 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
19656 things around if they appear profitable, otherwise force op0
19657 into a register. */
19658
19659 if (standard_80387_constant_p (op0) == 0
19660 || (MEM_P (op0)
19661 && ! (standard_80387_constant_p (op1) == 0
19662 || MEM_P (op1))))
19663 {
19664 enum rtx_code new_code = ix86_fp_swap_condition (code);
19665 if (new_code != UNKNOWN)
19666 {
19667 rtx tmp;
19668 tmp = op0, op0 = op1, op1 = tmp;
19669 code = new_code;
19670 }
19671 }
19672
19673 if (!REG_P (op0))
19674 op0 = force_reg (op_mode, op0);
19675
19676 if (CONSTANT_P (op1))
19677 {
19678 int tmp = standard_80387_constant_p (op1);
19679 if (tmp == 0)
19680 op1 = validize_mem (force_const_mem (op_mode, op1));
19681 else if (tmp == 1)
19682 {
19683 if (TARGET_CMOVE)
19684 op1 = force_reg (op_mode, op1);
19685 }
19686 else
19687 op1 = force_reg (op_mode, op1);
19688 }
19689 }
19690
19691 /* Try to rearrange the comparison to make it cheaper. */
19692 if (ix86_fp_comparison_cost (code)
19693 > ix86_fp_comparison_cost (swap_condition (code))
19694 && (REG_P (op1) || can_create_pseudo_p ()))
19695 {
19696 rtx tmp;
19697 tmp = op0, op0 = op1, op1 = tmp;
19698 code = swap_condition (code);
19699 if (!REG_P (op0))
19700 op0 = force_reg (op_mode, op0);
19701 }
19702
19703 *pop0 = op0;
19704 *pop1 = op1;
19705 return code;
19706 }
19707
19708 /* Convert comparison codes we use to represent FP comparison to integer
19709 code that will result in proper branch. Return UNKNOWN if no such code
19710 is available. */
19711
19712 enum rtx_code
19713 ix86_fp_compare_code_to_integer (enum rtx_code code)
19714 {
19715 switch (code)
19716 {
19717 case GT:
19718 return GTU;
19719 case GE:
19720 return GEU;
19721 case ORDERED:
19722 case UNORDERED:
19723 return code;
19724 break;
19725 case UNEQ:
19726 return EQ;
19727 break;
19728 case UNLT:
19729 return LTU;
19730 break;
19731 case UNLE:
19732 return LEU;
19733 break;
19734 case LTGT:
19735 return NE;
19736 break;
19737 default:
19738 return UNKNOWN;
19739 }
19740 }
19741
19742 /* Generate insn patterns to do a floating point compare of OPERANDS. */
19743
19744 static rtx
19745 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
19746 {
19747 enum machine_mode fpcmp_mode, intcmp_mode;
19748 rtx tmp, tmp2;
19749
19750 fpcmp_mode = ix86_fp_compare_mode (code);
19751 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
19752
19753 /* Do fcomi/sahf based test when profitable. */
19754 switch (ix86_fp_comparison_strategy (code))
19755 {
19756 case IX86_FPCMP_COMI:
19757 intcmp_mode = fpcmp_mode;
19758 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19759 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19760 tmp);
19761 emit_insn (tmp);
19762 break;
19763
19764 case IX86_FPCMP_SAHF:
19765 intcmp_mode = fpcmp_mode;
19766 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19767 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19768 tmp);
19769
19770 if (!scratch)
19771 scratch = gen_reg_rtx (HImode);
19772 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
19773 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
19774 break;
19775
19776 case IX86_FPCMP_ARITH:
19777 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
19778 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19779 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
19780 if (!scratch)
19781 scratch = gen_reg_rtx (HImode);
19782 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
19783
19784 /* In the unordered case, we have to check C2 for NaN's, which
19785 doesn't happen to work out to anything nice combination-wise.
19786 So do some bit twiddling on the value we've got in AH to come
19787 up with an appropriate set of condition codes. */
19788
19789 intcmp_mode = CCNOmode;
19790 switch (code)
19791 {
19792 case GT:
19793 case UNGT:
19794 if (code == GT || !TARGET_IEEE_FP)
19795 {
19796 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19797 code = EQ;
19798 }
19799 else
19800 {
19801 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19802 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19803 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
19804 intcmp_mode = CCmode;
19805 code = GEU;
19806 }
19807 break;
19808 case LT:
19809 case UNLT:
19810 if (code == LT && TARGET_IEEE_FP)
19811 {
19812 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19813 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
19814 intcmp_mode = CCmode;
19815 code = EQ;
19816 }
19817 else
19818 {
19819 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
19820 code = NE;
19821 }
19822 break;
19823 case GE:
19824 case UNGE:
19825 if (code == GE || !TARGET_IEEE_FP)
19826 {
19827 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
19828 code = EQ;
19829 }
19830 else
19831 {
19832 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19833 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
19834 code = NE;
19835 }
19836 break;
19837 case LE:
19838 case UNLE:
19839 if (code == LE && TARGET_IEEE_FP)
19840 {
19841 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19842 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19843 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19844 intcmp_mode = CCmode;
19845 code = LTU;
19846 }
19847 else
19848 {
19849 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19850 code = NE;
19851 }
19852 break;
19853 case EQ:
19854 case UNEQ:
19855 if (code == EQ && TARGET_IEEE_FP)
19856 {
19857 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19858 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19859 intcmp_mode = CCmode;
19860 code = EQ;
19861 }
19862 else
19863 {
19864 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19865 code = NE;
19866 }
19867 break;
19868 case NE:
19869 case LTGT:
19870 if (code == NE && TARGET_IEEE_FP)
19871 {
19872 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19873 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
19874 GEN_INT (0x40)));
19875 code = NE;
19876 }
19877 else
19878 {
19879 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19880 code = EQ;
19881 }
19882 break;
19883
19884 case UNORDERED:
19885 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19886 code = NE;
19887 break;
19888 case ORDERED:
19889 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19890 code = EQ;
19891 break;
19892
19893 default:
19894 gcc_unreachable ();
19895 }
19896 break;
19897
19898 default:
19899 gcc_unreachable();
19900 }
19901
19902 /* Return the test that should be put into the flags user, i.e.
19903 the bcc, scc, or cmov instruction. */
19904 return gen_rtx_fmt_ee (code, VOIDmode,
19905 gen_rtx_REG (intcmp_mode, FLAGS_REG),
19906 const0_rtx);
19907 }
19908
19909 static rtx
19910 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
19911 {
19912 rtx ret;
19913
19914 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
19915 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
19916
19917 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
19918 {
19919 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
19920 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
19921 }
19922 else
19923 ret = ix86_expand_int_compare (code, op0, op1);
19924
19925 return ret;
19926 }
19927
19928 void
19929 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
19930 {
19931 enum machine_mode mode = GET_MODE (op0);
19932 rtx tmp;
19933
19934 switch (mode)
19935 {
19936 case SFmode:
19937 case DFmode:
19938 case XFmode:
19939 case QImode:
19940 case HImode:
19941 case SImode:
19942 simple:
19943 tmp = ix86_expand_compare (code, op0, op1);
19944 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19945 gen_rtx_LABEL_REF (VOIDmode, label),
19946 pc_rtx);
19947 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19948 return;
19949
19950 case DImode:
19951 if (TARGET_64BIT)
19952 goto simple;
19953 case TImode:
19954 /* Expand DImode branch into multiple compare+branch. */
19955 {
19956 rtx lo[2], hi[2];
19957 rtx_code_label *label2;
19958 enum rtx_code code1, code2, code3;
19959 enum machine_mode submode;
19960
19961 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
19962 {
19963 tmp = op0, op0 = op1, op1 = tmp;
19964 code = swap_condition (code);
19965 }
19966
19967 split_double_mode (mode, &op0, 1, lo+0, hi+0);
19968 split_double_mode (mode, &op1, 1, lo+1, hi+1);
19969
19970 submode = mode == DImode ? SImode : DImode;
19971
19972 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
19973 avoid two branches. This costs one extra insn, so disable when
19974 optimizing for size. */
19975
19976 if ((code == EQ || code == NE)
19977 && (!optimize_insn_for_size_p ()
19978 || hi[1] == const0_rtx || lo[1] == const0_rtx))
19979 {
19980 rtx xor0, xor1;
19981
19982 xor1 = hi[0];
19983 if (hi[1] != const0_rtx)
19984 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
19985 NULL_RTX, 0, OPTAB_WIDEN);
19986
19987 xor0 = lo[0];
19988 if (lo[1] != const0_rtx)
19989 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
19990 NULL_RTX, 0, OPTAB_WIDEN);
19991
19992 tmp = expand_binop (submode, ior_optab, xor1, xor0,
19993 NULL_RTX, 0, OPTAB_WIDEN);
19994
19995 ix86_expand_branch (code, tmp, const0_rtx, label);
19996 return;
19997 }
19998
19999 /* Otherwise, if we are doing less-than or greater-or-equal-than,
20000 op1 is a constant and the low word is zero, then we can just
20001 examine the high word. Similarly for low word -1 and
20002 less-or-equal-than or greater-than. */
20003
20004 if (CONST_INT_P (hi[1]))
20005 switch (code)
20006 {
20007 case LT: case LTU: case GE: case GEU:
20008 if (lo[1] == const0_rtx)
20009 {
20010 ix86_expand_branch (code, hi[0], hi[1], label);
20011 return;
20012 }
20013 break;
20014 case LE: case LEU: case GT: case GTU:
20015 if (lo[1] == constm1_rtx)
20016 {
20017 ix86_expand_branch (code, hi[0], hi[1], label);
20018 return;
20019 }
20020 break;
20021 default:
20022 break;
20023 }
20024
20025 /* Otherwise, we need two or three jumps. */
20026
20027 label2 = gen_label_rtx ();
20028
20029 code1 = code;
20030 code2 = swap_condition (code);
20031 code3 = unsigned_condition (code);
20032
20033 switch (code)
20034 {
20035 case LT: case GT: case LTU: case GTU:
20036 break;
20037
20038 case LE: code1 = LT; code2 = GT; break;
20039 case GE: code1 = GT; code2 = LT; break;
20040 case LEU: code1 = LTU; code2 = GTU; break;
20041 case GEU: code1 = GTU; code2 = LTU; break;
20042
20043 case EQ: code1 = UNKNOWN; code2 = NE; break;
20044 case NE: code2 = UNKNOWN; break;
20045
20046 default:
20047 gcc_unreachable ();
20048 }
20049
20050 /*
20051 * a < b =>
20052 * if (hi(a) < hi(b)) goto true;
20053 * if (hi(a) > hi(b)) goto false;
20054 * if (lo(a) < lo(b)) goto true;
20055 * false:
20056 */
20057
20058 if (code1 != UNKNOWN)
20059 ix86_expand_branch (code1, hi[0], hi[1], label);
20060 if (code2 != UNKNOWN)
20061 ix86_expand_branch (code2, hi[0], hi[1], label2);
20062
20063 ix86_expand_branch (code3, lo[0], lo[1], label);
20064
20065 if (code2 != UNKNOWN)
20066 emit_label (label2);
20067 return;
20068 }
20069
20070 default:
20071 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20072 goto simple;
20073 }
20074 }
20075
20076 /* Split branch based on floating point condition. */
20077 void
20078 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20079 rtx target1, rtx target2, rtx tmp)
20080 {
20081 rtx condition;
20082 rtx i;
20083
20084 if (target2 != pc_rtx)
20085 {
20086 rtx tmp = target2;
20087 code = reverse_condition_maybe_unordered (code);
20088 target2 = target1;
20089 target1 = tmp;
20090 }
20091
20092 condition = ix86_expand_fp_compare (code, op1, op2,
20093 tmp);
20094
20095 i = emit_jump_insn (gen_rtx_SET
20096 (VOIDmode, pc_rtx,
20097 gen_rtx_IF_THEN_ELSE (VOIDmode,
20098 condition, target1, target2)));
20099 if (split_branch_probability >= 0)
20100 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20101 }
20102
20103 void
20104 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20105 {
20106 rtx ret;
20107
20108 gcc_assert (GET_MODE (dest) == QImode);
20109
20110 ret = ix86_expand_compare (code, op0, op1);
20111 PUT_MODE (ret, QImode);
20112 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20113 }
20114
20115 /* Expand comparison setting or clearing carry flag. Return true when
20116 successful and set pop for the operation. */
20117 static bool
20118 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20119 {
20120 enum machine_mode mode =
20121 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20122
20123 /* Do not handle double-mode compares that go through special path. */
20124 if (mode == (TARGET_64BIT ? TImode : DImode))
20125 return false;
20126
20127 if (SCALAR_FLOAT_MODE_P (mode))
20128 {
20129 rtx compare_op;
20130 rtx_insn *compare_seq;
20131
20132 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20133
20134 /* Shortcut: following common codes never translate
20135 into carry flag compares. */
20136 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20137 || code == ORDERED || code == UNORDERED)
20138 return false;
20139
20140 /* These comparisons require zero flag; swap operands so they won't. */
20141 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20142 && !TARGET_IEEE_FP)
20143 {
20144 rtx tmp = op0;
20145 op0 = op1;
20146 op1 = tmp;
20147 code = swap_condition (code);
20148 }
20149
20150 /* Try to expand the comparison and verify that we end up with
20151 carry flag based comparison. This fails to be true only when
20152 we decide to expand comparison using arithmetic that is not
20153 too common scenario. */
20154 start_sequence ();
20155 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20156 compare_seq = get_insns ();
20157 end_sequence ();
20158
20159 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20160 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20161 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20162 else
20163 code = GET_CODE (compare_op);
20164
20165 if (code != LTU && code != GEU)
20166 return false;
20167
20168 emit_insn (compare_seq);
20169 *pop = compare_op;
20170 return true;
20171 }
20172
20173 if (!INTEGRAL_MODE_P (mode))
20174 return false;
20175
20176 switch (code)
20177 {
20178 case LTU:
20179 case GEU:
20180 break;
20181
20182 /* Convert a==0 into (unsigned)a<1. */
20183 case EQ:
20184 case NE:
20185 if (op1 != const0_rtx)
20186 return false;
20187 op1 = const1_rtx;
20188 code = (code == EQ ? LTU : GEU);
20189 break;
20190
20191 /* Convert a>b into b<a or a>=b-1. */
20192 case GTU:
20193 case LEU:
20194 if (CONST_INT_P (op1))
20195 {
20196 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20197 /* Bail out on overflow. We still can swap operands but that
20198 would force loading of the constant into register. */
20199 if (op1 == const0_rtx
20200 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20201 return false;
20202 code = (code == GTU ? GEU : LTU);
20203 }
20204 else
20205 {
20206 rtx tmp = op1;
20207 op1 = op0;
20208 op0 = tmp;
20209 code = (code == GTU ? LTU : GEU);
20210 }
20211 break;
20212
20213 /* Convert a>=0 into (unsigned)a<0x80000000. */
20214 case LT:
20215 case GE:
20216 if (mode == DImode || op1 != const0_rtx)
20217 return false;
20218 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20219 code = (code == LT ? GEU : LTU);
20220 break;
20221 case LE:
20222 case GT:
20223 if (mode == DImode || op1 != constm1_rtx)
20224 return false;
20225 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20226 code = (code == LE ? GEU : LTU);
20227 break;
20228
20229 default:
20230 return false;
20231 }
20232 /* Swapping operands may cause constant to appear as first operand. */
20233 if (!nonimmediate_operand (op0, VOIDmode))
20234 {
20235 if (!can_create_pseudo_p ())
20236 return false;
20237 op0 = force_reg (mode, op0);
20238 }
20239 *pop = ix86_expand_compare (code, op0, op1);
20240 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20241 return true;
20242 }
20243
20244 bool
20245 ix86_expand_int_movcc (rtx operands[])
20246 {
20247 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20248 rtx_insn *compare_seq;
20249 rtx compare_op;
20250 enum machine_mode mode = GET_MODE (operands[0]);
20251 bool sign_bit_compare_p = false;
20252 rtx op0 = XEXP (operands[1], 0);
20253 rtx op1 = XEXP (operands[1], 1);
20254
20255 if (GET_MODE (op0) == TImode
20256 || (GET_MODE (op0) == DImode
20257 && !TARGET_64BIT))
20258 return false;
20259
20260 start_sequence ();
20261 compare_op = ix86_expand_compare (code, op0, op1);
20262 compare_seq = get_insns ();
20263 end_sequence ();
20264
20265 compare_code = GET_CODE (compare_op);
20266
20267 if ((op1 == const0_rtx && (code == GE || code == LT))
20268 || (op1 == constm1_rtx && (code == GT || code == LE)))
20269 sign_bit_compare_p = true;
20270
20271 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20272 HImode insns, we'd be swallowed in word prefix ops. */
20273
20274 if ((mode != HImode || TARGET_FAST_PREFIX)
20275 && (mode != (TARGET_64BIT ? TImode : DImode))
20276 && CONST_INT_P (operands[2])
20277 && CONST_INT_P (operands[3]))
20278 {
20279 rtx out = operands[0];
20280 HOST_WIDE_INT ct = INTVAL (operands[2]);
20281 HOST_WIDE_INT cf = INTVAL (operands[3]);
20282 HOST_WIDE_INT diff;
20283
20284 diff = ct - cf;
20285 /* Sign bit compares are better done using shifts than we do by using
20286 sbb. */
20287 if (sign_bit_compare_p
20288 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20289 {
20290 /* Detect overlap between destination and compare sources. */
20291 rtx tmp = out;
20292
20293 if (!sign_bit_compare_p)
20294 {
20295 rtx flags;
20296 bool fpcmp = false;
20297
20298 compare_code = GET_CODE (compare_op);
20299
20300 flags = XEXP (compare_op, 0);
20301
20302 if (GET_MODE (flags) == CCFPmode
20303 || GET_MODE (flags) == CCFPUmode)
20304 {
20305 fpcmp = true;
20306 compare_code
20307 = ix86_fp_compare_code_to_integer (compare_code);
20308 }
20309
20310 /* To simplify rest of code, restrict to the GEU case. */
20311 if (compare_code == LTU)
20312 {
20313 HOST_WIDE_INT tmp = ct;
20314 ct = cf;
20315 cf = tmp;
20316 compare_code = reverse_condition (compare_code);
20317 code = reverse_condition (code);
20318 }
20319 else
20320 {
20321 if (fpcmp)
20322 PUT_CODE (compare_op,
20323 reverse_condition_maybe_unordered
20324 (GET_CODE (compare_op)));
20325 else
20326 PUT_CODE (compare_op,
20327 reverse_condition (GET_CODE (compare_op)));
20328 }
20329 diff = ct - cf;
20330
20331 if (reg_overlap_mentioned_p (out, op0)
20332 || reg_overlap_mentioned_p (out, op1))
20333 tmp = gen_reg_rtx (mode);
20334
20335 if (mode == DImode)
20336 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20337 else
20338 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20339 flags, compare_op));
20340 }
20341 else
20342 {
20343 if (code == GT || code == GE)
20344 code = reverse_condition (code);
20345 else
20346 {
20347 HOST_WIDE_INT tmp = ct;
20348 ct = cf;
20349 cf = tmp;
20350 diff = ct - cf;
20351 }
20352 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20353 }
20354
20355 if (diff == 1)
20356 {
20357 /*
20358 * cmpl op0,op1
20359 * sbbl dest,dest
20360 * [addl dest, ct]
20361 *
20362 * Size 5 - 8.
20363 */
20364 if (ct)
20365 tmp = expand_simple_binop (mode, PLUS,
20366 tmp, GEN_INT (ct),
20367 copy_rtx (tmp), 1, OPTAB_DIRECT);
20368 }
20369 else if (cf == -1)
20370 {
20371 /*
20372 * cmpl op0,op1
20373 * sbbl dest,dest
20374 * orl $ct, dest
20375 *
20376 * Size 8.
20377 */
20378 tmp = expand_simple_binop (mode, IOR,
20379 tmp, GEN_INT (ct),
20380 copy_rtx (tmp), 1, OPTAB_DIRECT);
20381 }
20382 else if (diff == -1 && ct)
20383 {
20384 /*
20385 * cmpl op0,op1
20386 * sbbl dest,dest
20387 * notl dest
20388 * [addl dest, cf]
20389 *
20390 * Size 8 - 11.
20391 */
20392 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20393 if (cf)
20394 tmp = expand_simple_binop (mode, PLUS,
20395 copy_rtx (tmp), GEN_INT (cf),
20396 copy_rtx (tmp), 1, OPTAB_DIRECT);
20397 }
20398 else
20399 {
20400 /*
20401 * cmpl op0,op1
20402 * sbbl dest,dest
20403 * [notl dest]
20404 * andl cf - ct, dest
20405 * [addl dest, ct]
20406 *
20407 * Size 8 - 11.
20408 */
20409
20410 if (cf == 0)
20411 {
20412 cf = ct;
20413 ct = 0;
20414 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20415 }
20416
20417 tmp = expand_simple_binop (mode, AND,
20418 copy_rtx (tmp),
20419 gen_int_mode (cf - ct, mode),
20420 copy_rtx (tmp), 1, OPTAB_DIRECT);
20421 if (ct)
20422 tmp = expand_simple_binop (mode, PLUS,
20423 copy_rtx (tmp), GEN_INT (ct),
20424 copy_rtx (tmp), 1, OPTAB_DIRECT);
20425 }
20426
20427 if (!rtx_equal_p (tmp, out))
20428 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20429
20430 return true;
20431 }
20432
20433 if (diff < 0)
20434 {
20435 enum machine_mode cmp_mode = GET_MODE (op0);
20436
20437 HOST_WIDE_INT tmp;
20438 tmp = ct, ct = cf, cf = tmp;
20439 diff = -diff;
20440
20441 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20442 {
20443 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20444
20445 /* We may be reversing unordered compare to normal compare, that
20446 is not valid in general (we may convert non-trapping condition
20447 to trapping one), however on i386 we currently emit all
20448 comparisons unordered. */
20449 compare_code = reverse_condition_maybe_unordered (compare_code);
20450 code = reverse_condition_maybe_unordered (code);
20451 }
20452 else
20453 {
20454 compare_code = reverse_condition (compare_code);
20455 code = reverse_condition (code);
20456 }
20457 }
20458
20459 compare_code = UNKNOWN;
20460 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20461 && CONST_INT_P (op1))
20462 {
20463 if (op1 == const0_rtx
20464 && (code == LT || code == GE))
20465 compare_code = code;
20466 else if (op1 == constm1_rtx)
20467 {
20468 if (code == LE)
20469 compare_code = LT;
20470 else if (code == GT)
20471 compare_code = GE;
20472 }
20473 }
20474
20475 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20476 if (compare_code != UNKNOWN
20477 && GET_MODE (op0) == GET_MODE (out)
20478 && (cf == -1 || ct == -1))
20479 {
20480 /* If lea code below could be used, only optimize
20481 if it results in a 2 insn sequence. */
20482
20483 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20484 || diff == 3 || diff == 5 || diff == 9)
20485 || (compare_code == LT && ct == -1)
20486 || (compare_code == GE && cf == -1))
20487 {
20488 /*
20489 * notl op1 (if necessary)
20490 * sarl $31, op1
20491 * orl cf, op1
20492 */
20493 if (ct != -1)
20494 {
20495 cf = ct;
20496 ct = -1;
20497 code = reverse_condition (code);
20498 }
20499
20500 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20501
20502 out = expand_simple_binop (mode, IOR,
20503 out, GEN_INT (cf),
20504 out, 1, OPTAB_DIRECT);
20505 if (out != operands[0])
20506 emit_move_insn (operands[0], out);
20507
20508 return true;
20509 }
20510 }
20511
20512
20513 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
20514 || diff == 3 || diff == 5 || diff == 9)
20515 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
20516 && (mode != DImode
20517 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
20518 {
20519 /*
20520 * xorl dest,dest
20521 * cmpl op1,op2
20522 * setcc dest
20523 * lea cf(dest*(ct-cf)),dest
20524 *
20525 * Size 14.
20526 *
20527 * This also catches the degenerate setcc-only case.
20528 */
20529
20530 rtx tmp;
20531 int nops;
20532
20533 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20534
20535 nops = 0;
20536 /* On x86_64 the lea instruction operates on Pmode, so we need
20537 to get arithmetics done in proper mode to match. */
20538 if (diff == 1)
20539 tmp = copy_rtx (out);
20540 else
20541 {
20542 rtx out1;
20543 out1 = copy_rtx (out);
20544 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
20545 nops++;
20546 if (diff & 1)
20547 {
20548 tmp = gen_rtx_PLUS (mode, tmp, out1);
20549 nops++;
20550 }
20551 }
20552 if (cf != 0)
20553 {
20554 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
20555 nops++;
20556 }
20557 if (!rtx_equal_p (tmp, out))
20558 {
20559 if (nops == 1)
20560 out = force_operand (tmp, copy_rtx (out));
20561 else
20562 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
20563 }
20564 if (!rtx_equal_p (out, operands[0]))
20565 emit_move_insn (operands[0], copy_rtx (out));
20566
20567 return true;
20568 }
20569
20570 /*
20571 * General case: Jumpful:
20572 * xorl dest,dest cmpl op1, op2
20573 * cmpl op1, op2 movl ct, dest
20574 * setcc dest jcc 1f
20575 * decl dest movl cf, dest
20576 * andl (cf-ct),dest 1:
20577 * addl ct,dest
20578 *
20579 * Size 20. Size 14.
20580 *
20581 * This is reasonably steep, but branch mispredict costs are
20582 * high on modern cpus, so consider failing only if optimizing
20583 * for space.
20584 */
20585
20586 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20587 && BRANCH_COST (optimize_insn_for_speed_p (),
20588 false) >= 2)
20589 {
20590 if (cf == 0)
20591 {
20592 enum machine_mode cmp_mode = GET_MODE (op0);
20593
20594 cf = ct;
20595 ct = 0;
20596
20597 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20598 {
20599 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20600
20601 /* We may be reversing unordered compare to normal compare,
20602 that is not valid in general (we may convert non-trapping
20603 condition to trapping one), however on i386 we currently
20604 emit all comparisons unordered. */
20605 code = reverse_condition_maybe_unordered (code);
20606 }
20607 else
20608 {
20609 code = reverse_condition (code);
20610 if (compare_code != UNKNOWN)
20611 compare_code = reverse_condition (compare_code);
20612 }
20613 }
20614
20615 if (compare_code != UNKNOWN)
20616 {
20617 /* notl op1 (if needed)
20618 sarl $31, op1
20619 andl (cf-ct), op1
20620 addl ct, op1
20621
20622 For x < 0 (resp. x <= -1) there will be no notl,
20623 so if possible swap the constants to get rid of the
20624 complement.
20625 True/false will be -1/0 while code below (store flag
20626 followed by decrement) is 0/-1, so the constants need
20627 to be exchanged once more. */
20628
20629 if (compare_code == GE || !cf)
20630 {
20631 code = reverse_condition (code);
20632 compare_code = LT;
20633 }
20634 else
20635 {
20636 HOST_WIDE_INT tmp = cf;
20637 cf = ct;
20638 ct = tmp;
20639 }
20640
20641 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20642 }
20643 else
20644 {
20645 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20646
20647 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
20648 constm1_rtx,
20649 copy_rtx (out), 1, OPTAB_DIRECT);
20650 }
20651
20652 out = expand_simple_binop (mode, AND, copy_rtx (out),
20653 gen_int_mode (cf - ct, mode),
20654 copy_rtx (out), 1, OPTAB_DIRECT);
20655 if (ct)
20656 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
20657 copy_rtx (out), 1, OPTAB_DIRECT);
20658 if (!rtx_equal_p (out, operands[0]))
20659 emit_move_insn (operands[0], copy_rtx (out));
20660
20661 return true;
20662 }
20663 }
20664
20665 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20666 {
20667 /* Try a few things more with specific constants and a variable. */
20668
20669 optab op;
20670 rtx var, orig_out, out, tmp;
20671
20672 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
20673 return false;
20674
20675 /* If one of the two operands is an interesting constant, load a
20676 constant with the above and mask it in with a logical operation. */
20677
20678 if (CONST_INT_P (operands[2]))
20679 {
20680 var = operands[3];
20681 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
20682 operands[3] = constm1_rtx, op = and_optab;
20683 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
20684 operands[3] = const0_rtx, op = ior_optab;
20685 else
20686 return false;
20687 }
20688 else if (CONST_INT_P (operands[3]))
20689 {
20690 var = operands[2];
20691 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
20692 operands[2] = constm1_rtx, op = and_optab;
20693 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
20694 operands[2] = const0_rtx, op = ior_optab;
20695 else
20696 return false;
20697 }
20698 else
20699 return false;
20700
20701 orig_out = operands[0];
20702 tmp = gen_reg_rtx (mode);
20703 operands[0] = tmp;
20704
20705 /* Recurse to get the constant loaded. */
20706 if (ix86_expand_int_movcc (operands) == 0)
20707 return false;
20708
20709 /* Mask in the interesting variable. */
20710 out = expand_binop (mode, op, var, tmp, orig_out, 0,
20711 OPTAB_WIDEN);
20712 if (!rtx_equal_p (out, orig_out))
20713 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
20714
20715 return true;
20716 }
20717
20718 /*
20719 * For comparison with above,
20720 *
20721 * movl cf,dest
20722 * movl ct,tmp
20723 * cmpl op1,op2
20724 * cmovcc tmp,dest
20725 *
20726 * Size 15.
20727 */
20728
20729 if (! nonimmediate_operand (operands[2], mode))
20730 operands[2] = force_reg (mode, operands[2]);
20731 if (! nonimmediate_operand (operands[3], mode))
20732 operands[3] = force_reg (mode, operands[3]);
20733
20734 if (! register_operand (operands[2], VOIDmode)
20735 && (mode == QImode
20736 || ! register_operand (operands[3], VOIDmode)))
20737 operands[2] = force_reg (mode, operands[2]);
20738
20739 if (mode == QImode
20740 && ! register_operand (operands[3], VOIDmode))
20741 operands[3] = force_reg (mode, operands[3]);
20742
20743 emit_insn (compare_seq);
20744 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
20745 gen_rtx_IF_THEN_ELSE (mode,
20746 compare_op, operands[2],
20747 operands[3])));
20748 return true;
20749 }
20750
20751 /* Swap, force into registers, or otherwise massage the two operands
20752 to an sse comparison with a mask result. Thus we differ a bit from
20753 ix86_prepare_fp_compare_args which expects to produce a flags result.
20754
20755 The DEST operand exists to help determine whether to commute commutative
20756 operators. The POP0/POP1 operands are updated in place. The new
20757 comparison code is returned, or UNKNOWN if not implementable. */
20758
20759 static enum rtx_code
20760 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
20761 rtx *pop0, rtx *pop1)
20762 {
20763 rtx tmp;
20764
20765 switch (code)
20766 {
20767 case LTGT:
20768 case UNEQ:
20769 /* AVX supports all the needed comparisons. */
20770 if (TARGET_AVX)
20771 break;
20772 /* We have no LTGT as an operator. We could implement it with
20773 NE & ORDERED, but this requires an extra temporary. It's
20774 not clear that it's worth it. */
20775 return UNKNOWN;
20776
20777 case LT:
20778 case LE:
20779 case UNGT:
20780 case UNGE:
20781 /* These are supported directly. */
20782 break;
20783
20784 case EQ:
20785 case NE:
20786 case UNORDERED:
20787 case ORDERED:
20788 /* AVX has 3 operand comparisons, no need to swap anything. */
20789 if (TARGET_AVX)
20790 break;
20791 /* For commutative operators, try to canonicalize the destination
20792 operand to be first in the comparison - this helps reload to
20793 avoid extra moves. */
20794 if (!dest || !rtx_equal_p (dest, *pop1))
20795 break;
20796 /* FALLTHRU */
20797
20798 case GE:
20799 case GT:
20800 case UNLE:
20801 case UNLT:
20802 /* These are not supported directly before AVX, and furthermore
20803 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
20804 comparison operands to transform into something that is
20805 supported. */
20806 tmp = *pop0;
20807 *pop0 = *pop1;
20808 *pop1 = tmp;
20809 code = swap_condition (code);
20810 break;
20811
20812 default:
20813 gcc_unreachable ();
20814 }
20815
20816 return code;
20817 }
20818
20819 /* Detect conditional moves that exactly match min/max operational
20820 semantics. Note that this is IEEE safe, as long as we don't
20821 interchange the operands.
20822
20823 Returns FALSE if this conditional move doesn't match a MIN/MAX,
20824 and TRUE if the operation is successful and instructions are emitted. */
20825
20826 static bool
20827 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
20828 rtx cmp_op1, rtx if_true, rtx if_false)
20829 {
20830 enum machine_mode mode;
20831 bool is_min;
20832 rtx tmp;
20833
20834 if (code == LT)
20835 ;
20836 else if (code == UNGE)
20837 {
20838 tmp = if_true;
20839 if_true = if_false;
20840 if_false = tmp;
20841 }
20842 else
20843 return false;
20844
20845 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
20846 is_min = true;
20847 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
20848 is_min = false;
20849 else
20850 return false;
20851
20852 mode = GET_MODE (dest);
20853
20854 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
20855 but MODE may be a vector mode and thus not appropriate. */
20856 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
20857 {
20858 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
20859 rtvec v;
20860
20861 if_true = force_reg (mode, if_true);
20862 v = gen_rtvec (2, if_true, if_false);
20863 tmp = gen_rtx_UNSPEC (mode, v, u);
20864 }
20865 else
20866 {
20867 code = is_min ? SMIN : SMAX;
20868 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
20869 }
20870
20871 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
20872 return true;
20873 }
20874
20875 /* Expand an sse vector comparison. Return the register with the result. */
20876
20877 static rtx
20878 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
20879 rtx op_true, rtx op_false)
20880 {
20881 enum machine_mode mode = GET_MODE (dest);
20882 enum machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
20883
20884 /* In general case result of comparison can differ from operands' type. */
20885 enum machine_mode cmp_mode;
20886
20887 /* In AVX512F the result of comparison is an integer mask. */
20888 bool maskcmp = false;
20889 rtx x;
20890
20891 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
20892 {
20893 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
20894 gcc_assert (cmp_mode != BLKmode);
20895
20896 maskcmp = true;
20897 }
20898 else
20899 cmp_mode = cmp_ops_mode;
20900
20901
20902 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
20903 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
20904 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
20905
20906 if (optimize
20907 || reg_overlap_mentioned_p (dest, op_true)
20908 || reg_overlap_mentioned_p (dest, op_false))
20909 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
20910
20911 /* Compare patterns for int modes are unspec in AVX512F only. */
20912 if (maskcmp && (code == GT || code == EQ))
20913 {
20914 rtx (*gen)(rtx, rtx, rtx);
20915
20916 switch (cmp_ops_mode)
20917 {
20918 case V16SImode:
20919 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
20920 break;
20921 case V8DImode:
20922 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
20923 break;
20924 default:
20925 gen = NULL;
20926 }
20927
20928 if (gen)
20929 {
20930 emit_insn (gen (dest, cmp_op0, cmp_op1));
20931 return dest;
20932 }
20933 }
20934 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
20935
20936 if (cmp_mode != mode && !maskcmp)
20937 {
20938 x = force_reg (cmp_ops_mode, x);
20939 convert_move (dest, x, false);
20940 }
20941 else
20942 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20943
20944 return dest;
20945 }
20946
20947 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
20948 operations. This is used for both scalar and vector conditional moves. */
20949
20950 static void
20951 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
20952 {
20953 enum machine_mode mode = GET_MODE (dest);
20954 enum machine_mode cmpmode = GET_MODE (cmp);
20955
20956 /* In AVX512F the result of comparison is an integer mask. */
20957 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
20958
20959 rtx t2, t3, x;
20960
20961 if (vector_all_ones_operand (op_true, mode)
20962 && rtx_equal_p (op_false, CONST0_RTX (mode))
20963 && !maskcmp)
20964 {
20965 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
20966 }
20967 else if (op_false == CONST0_RTX (mode)
20968 && !maskcmp)
20969 {
20970 op_true = force_reg (mode, op_true);
20971 x = gen_rtx_AND (mode, cmp, op_true);
20972 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20973 }
20974 else if (op_true == CONST0_RTX (mode)
20975 && !maskcmp)
20976 {
20977 op_false = force_reg (mode, op_false);
20978 x = gen_rtx_NOT (mode, cmp);
20979 x = gen_rtx_AND (mode, x, op_false);
20980 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20981 }
20982 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
20983 && !maskcmp)
20984 {
20985 op_false = force_reg (mode, op_false);
20986 x = gen_rtx_IOR (mode, cmp, op_false);
20987 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20988 }
20989 else if (TARGET_XOP
20990 && !maskcmp)
20991 {
20992 op_true = force_reg (mode, op_true);
20993
20994 if (!nonimmediate_operand (op_false, mode))
20995 op_false = force_reg (mode, op_false);
20996
20997 emit_insn (gen_rtx_SET (mode, dest,
20998 gen_rtx_IF_THEN_ELSE (mode, cmp,
20999 op_true,
21000 op_false)));
21001 }
21002 else
21003 {
21004 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21005 rtx d = dest;
21006
21007 if (!nonimmediate_operand (op_true, mode))
21008 op_true = force_reg (mode, op_true);
21009
21010 op_false = force_reg (mode, op_false);
21011
21012 switch (mode)
21013 {
21014 case V4SFmode:
21015 if (TARGET_SSE4_1)
21016 gen = gen_sse4_1_blendvps;
21017 break;
21018 case V2DFmode:
21019 if (TARGET_SSE4_1)
21020 gen = gen_sse4_1_blendvpd;
21021 break;
21022 case V16QImode:
21023 case V8HImode:
21024 case V4SImode:
21025 case V2DImode:
21026 if (TARGET_SSE4_1)
21027 {
21028 gen = gen_sse4_1_pblendvb;
21029 if (mode != V16QImode)
21030 d = gen_reg_rtx (V16QImode);
21031 op_false = gen_lowpart (V16QImode, op_false);
21032 op_true = gen_lowpart (V16QImode, op_true);
21033 cmp = gen_lowpart (V16QImode, cmp);
21034 }
21035 break;
21036 case V8SFmode:
21037 if (TARGET_AVX)
21038 gen = gen_avx_blendvps256;
21039 break;
21040 case V4DFmode:
21041 if (TARGET_AVX)
21042 gen = gen_avx_blendvpd256;
21043 break;
21044 case V32QImode:
21045 case V16HImode:
21046 case V8SImode:
21047 case V4DImode:
21048 if (TARGET_AVX2)
21049 {
21050 gen = gen_avx2_pblendvb;
21051 if (mode != V32QImode)
21052 d = gen_reg_rtx (V32QImode);
21053 op_false = gen_lowpart (V32QImode, op_false);
21054 op_true = gen_lowpart (V32QImode, op_true);
21055 cmp = gen_lowpart (V32QImode, cmp);
21056 }
21057 break;
21058
21059 case V64QImode:
21060 gen = gen_avx512bw_blendmv64qi;
21061 break;
21062 case V32HImode:
21063 gen = gen_avx512bw_blendmv32hi;
21064 break;
21065 case V16SImode:
21066 gen = gen_avx512f_blendmv16si;
21067 break;
21068 case V8DImode:
21069 gen = gen_avx512f_blendmv8di;
21070 break;
21071 case V8DFmode:
21072 gen = gen_avx512f_blendmv8df;
21073 break;
21074 case V16SFmode:
21075 gen = gen_avx512f_blendmv16sf;
21076 break;
21077
21078 default:
21079 break;
21080 }
21081
21082 if (gen != NULL)
21083 {
21084 emit_insn (gen (d, op_false, op_true, cmp));
21085 if (d != dest)
21086 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21087 }
21088 else
21089 {
21090 op_true = force_reg (mode, op_true);
21091
21092 t2 = gen_reg_rtx (mode);
21093 if (optimize)
21094 t3 = gen_reg_rtx (mode);
21095 else
21096 t3 = dest;
21097
21098 x = gen_rtx_AND (mode, op_true, cmp);
21099 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21100
21101 x = gen_rtx_NOT (mode, cmp);
21102 x = gen_rtx_AND (mode, x, op_false);
21103 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21104
21105 x = gen_rtx_IOR (mode, t3, t2);
21106 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21107 }
21108 }
21109 }
21110
21111 /* Expand a floating-point conditional move. Return true if successful. */
21112
21113 bool
21114 ix86_expand_fp_movcc (rtx operands[])
21115 {
21116 enum machine_mode mode = GET_MODE (operands[0]);
21117 enum rtx_code code = GET_CODE (operands[1]);
21118 rtx tmp, compare_op;
21119 rtx op0 = XEXP (operands[1], 0);
21120 rtx op1 = XEXP (operands[1], 1);
21121
21122 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21123 {
21124 enum machine_mode cmode;
21125
21126 /* Since we've no cmove for sse registers, don't force bad register
21127 allocation just to gain access to it. Deny movcc when the
21128 comparison mode doesn't match the move mode. */
21129 cmode = GET_MODE (op0);
21130 if (cmode == VOIDmode)
21131 cmode = GET_MODE (op1);
21132 if (cmode != mode)
21133 return false;
21134
21135 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21136 if (code == UNKNOWN)
21137 return false;
21138
21139 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21140 operands[2], operands[3]))
21141 return true;
21142
21143 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21144 operands[2], operands[3]);
21145 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21146 return true;
21147 }
21148
21149 if (GET_MODE (op0) == TImode
21150 || (GET_MODE (op0) == DImode
21151 && !TARGET_64BIT))
21152 return false;
21153
21154 /* The floating point conditional move instructions don't directly
21155 support conditions resulting from a signed integer comparison. */
21156
21157 compare_op = ix86_expand_compare (code, op0, op1);
21158 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21159 {
21160 tmp = gen_reg_rtx (QImode);
21161 ix86_expand_setcc (tmp, code, op0, op1);
21162
21163 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21164 }
21165
21166 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21167 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21168 operands[2], operands[3])));
21169
21170 return true;
21171 }
21172
21173 /* Expand a floating-point vector conditional move; a vcond operation
21174 rather than a movcc operation. */
21175
21176 bool
21177 ix86_expand_fp_vcond (rtx operands[])
21178 {
21179 enum rtx_code code = GET_CODE (operands[3]);
21180 rtx cmp;
21181
21182 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21183 &operands[4], &operands[5]);
21184 if (code == UNKNOWN)
21185 {
21186 rtx temp;
21187 switch (GET_CODE (operands[3]))
21188 {
21189 case LTGT:
21190 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21191 operands[5], operands[0], operands[0]);
21192 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21193 operands[5], operands[1], operands[2]);
21194 code = AND;
21195 break;
21196 case UNEQ:
21197 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21198 operands[5], operands[0], operands[0]);
21199 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21200 operands[5], operands[1], operands[2]);
21201 code = IOR;
21202 break;
21203 default:
21204 gcc_unreachable ();
21205 }
21206 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21207 OPTAB_DIRECT);
21208 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21209 return true;
21210 }
21211
21212 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21213 operands[5], operands[1], operands[2]))
21214 return true;
21215
21216 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21217 operands[1], operands[2]);
21218 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21219 return true;
21220 }
21221
21222 /* Expand a signed/unsigned integral vector conditional move. */
21223
21224 bool
21225 ix86_expand_int_vcond (rtx operands[])
21226 {
21227 enum machine_mode data_mode = GET_MODE (operands[0]);
21228 enum machine_mode mode = GET_MODE (operands[4]);
21229 enum rtx_code code = GET_CODE (operands[3]);
21230 bool negate = false;
21231 rtx x, cop0, cop1;
21232
21233 cop0 = operands[4];
21234 cop1 = operands[5];
21235
21236 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21237 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21238 if ((code == LT || code == GE)
21239 && data_mode == mode
21240 && cop1 == CONST0_RTX (mode)
21241 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21242 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21243 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21244 && (GET_MODE_SIZE (data_mode) == 16
21245 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21246 {
21247 rtx negop = operands[2 - (code == LT)];
21248 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21249 if (negop == CONST1_RTX (data_mode))
21250 {
21251 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21252 operands[0], 1, OPTAB_DIRECT);
21253 if (res != operands[0])
21254 emit_move_insn (operands[0], res);
21255 return true;
21256 }
21257 else if (GET_MODE_INNER (data_mode) != DImode
21258 && vector_all_ones_operand (negop, data_mode))
21259 {
21260 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21261 operands[0], 0, OPTAB_DIRECT);
21262 if (res != operands[0])
21263 emit_move_insn (operands[0], res);
21264 return true;
21265 }
21266 }
21267
21268 if (!nonimmediate_operand (cop1, mode))
21269 cop1 = force_reg (mode, cop1);
21270 if (!general_operand (operands[1], data_mode))
21271 operands[1] = force_reg (data_mode, operands[1]);
21272 if (!general_operand (operands[2], data_mode))
21273 operands[2] = force_reg (data_mode, operands[2]);
21274
21275 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21276 if (TARGET_XOP
21277 && (mode == V16QImode || mode == V8HImode
21278 || mode == V4SImode || mode == V2DImode))
21279 ;
21280 else
21281 {
21282 /* Canonicalize the comparison to EQ, GT, GTU. */
21283 switch (code)
21284 {
21285 case EQ:
21286 case GT:
21287 case GTU:
21288 break;
21289
21290 case NE:
21291 case LE:
21292 case LEU:
21293 code = reverse_condition (code);
21294 negate = true;
21295 break;
21296
21297 case GE:
21298 case GEU:
21299 code = reverse_condition (code);
21300 negate = true;
21301 /* FALLTHRU */
21302
21303 case LT:
21304 case LTU:
21305 code = swap_condition (code);
21306 x = cop0, cop0 = cop1, cop1 = x;
21307 break;
21308
21309 default:
21310 gcc_unreachable ();
21311 }
21312
21313 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21314 if (mode == V2DImode)
21315 {
21316 switch (code)
21317 {
21318 case EQ:
21319 /* SSE4.1 supports EQ. */
21320 if (!TARGET_SSE4_1)
21321 return false;
21322 break;
21323
21324 case GT:
21325 case GTU:
21326 /* SSE4.2 supports GT/GTU. */
21327 if (!TARGET_SSE4_2)
21328 return false;
21329 break;
21330
21331 default:
21332 gcc_unreachable ();
21333 }
21334 }
21335
21336 /* Unsigned parallel compare is not supported by the hardware.
21337 Play some tricks to turn this into a signed comparison
21338 against 0. */
21339 if (code == GTU)
21340 {
21341 cop0 = force_reg (mode, cop0);
21342
21343 switch (mode)
21344 {
21345 case V16SImode:
21346 case V8DImode:
21347 case V8SImode:
21348 case V4DImode:
21349 case V4SImode:
21350 case V2DImode:
21351 {
21352 rtx t1, t2, mask;
21353 rtx (*gen_sub3) (rtx, rtx, rtx);
21354
21355 switch (mode)
21356 {
21357 case V16SImode: gen_sub3 = gen_subv16si3; break;
21358 case V8DImode: gen_sub3 = gen_subv8di3; break;
21359 case V8SImode: gen_sub3 = gen_subv8si3; break;
21360 case V4DImode: gen_sub3 = gen_subv4di3; break;
21361 case V4SImode: gen_sub3 = gen_subv4si3; break;
21362 case V2DImode: gen_sub3 = gen_subv2di3; break;
21363 default:
21364 gcc_unreachable ();
21365 }
21366 /* Subtract (-(INT MAX) - 1) from both operands to make
21367 them signed. */
21368 mask = ix86_build_signbit_mask (mode, true, false);
21369 t1 = gen_reg_rtx (mode);
21370 emit_insn (gen_sub3 (t1, cop0, mask));
21371
21372 t2 = gen_reg_rtx (mode);
21373 emit_insn (gen_sub3 (t2, cop1, mask));
21374
21375 cop0 = t1;
21376 cop1 = t2;
21377 code = GT;
21378 }
21379 break;
21380
21381 case V64QImode:
21382 case V32HImode:
21383 case V32QImode:
21384 case V16HImode:
21385 case V16QImode:
21386 case V8HImode:
21387 /* Perform a parallel unsigned saturating subtraction. */
21388 x = gen_reg_rtx (mode);
21389 emit_insn (gen_rtx_SET (VOIDmode, x,
21390 gen_rtx_US_MINUS (mode, cop0, cop1)));
21391
21392 cop0 = x;
21393 cop1 = CONST0_RTX (mode);
21394 code = EQ;
21395 negate = !negate;
21396 break;
21397
21398 default:
21399 gcc_unreachable ();
21400 }
21401 }
21402 }
21403
21404 /* Allow the comparison to be done in one mode, but the movcc to
21405 happen in another mode. */
21406 if (data_mode == mode)
21407 {
21408 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21409 operands[1+negate], operands[2-negate]);
21410 }
21411 else
21412 {
21413 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21414 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21415 operands[1+negate], operands[2-negate]);
21416 if (GET_MODE (x) == mode)
21417 x = gen_lowpart (data_mode, x);
21418 }
21419
21420 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21421 operands[2-negate]);
21422 return true;
21423 }
21424
21425 /* AVX512F does support 64-byte integer vector operations,
21426 thus the longest vector we are faced with is V64QImode. */
21427 #define MAX_VECT_LEN 64
21428
21429 struct expand_vec_perm_d
21430 {
21431 rtx target, op0, op1;
21432 unsigned char perm[MAX_VECT_LEN];
21433 enum machine_mode vmode;
21434 unsigned char nelt;
21435 bool one_operand_p;
21436 bool testing_p;
21437 };
21438
21439 static bool
21440 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
21441 struct expand_vec_perm_d *d)
21442 {
21443 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21444 expander, so args are either in d, or in op0, op1 etc. */
21445 enum machine_mode mode = GET_MODE (d ? d->op0 : op0);
21446 enum machine_mode maskmode = mode;
21447 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21448
21449 switch (mode)
21450 {
21451 case V8HImode:
21452 if (TARGET_AVX512VL && TARGET_AVX512BW)
21453 gen = gen_avx512vl_vpermi2varv8hi3;
21454 break;
21455 case V16HImode:
21456 if (TARGET_AVX512VL && TARGET_AVX512BW)
21457 gen = gen_avx512vl_vpermi2varv16hi3;
21458 break;
21459 case V32HImode:
21460 if (TARGET_AVX512BW)
21461 gen = gen_avx512bw_vpermi2varv32hi3;
21462 break;
21463 case V4SImode:
21464 if (TARGET_AVX512VL)
21465 gen = gen_avx512vl_vpermi2varv4si3;
21466 break;
21467 case V8SImode:
21468 if (TARGET_AVX512VL)
21469 gen = gen_avx512vl_vpermi2varv8si3;
21470 break;
21471 case V16SImode:
21472 if (TARGET_AVX512F)
21473 gen = gen_avx512f_vpermi2varv16si3;
21474 break;
21475 case V4SFmode:
21476 if (TARGET_AVX512VL)
21477 {
21478 gen = gen_avx512vl_vpermi2varv4sf3;
21479 maskmode = V4SImode;
21480 }
21481 break;
21482 case V8SFmode:
21483 if (TARGET_AVX512VL)
21484 {
21485 gen = gen_avx512vl_vpermi2varv8sf3;
21486 maskmode = V8SImode;
21487 }
21488 break;
21489 case V16SFmode:
21490 if (TARGET_AVX512F)
21491 {
21492 gen = gen_avx512f_vpermi2varv16sf3;
21493 maskmode = V16SImode;
21494 }
21495 break;
21496 case V2DImode:
21497 if (TARGET_AVX512VL)
21498 gen = gen_avx512vl_vpermi2varv2di3;
21499 break;
21500 case V4DImode:
21501 if (TARGET_AVX512VL)
21502 gen = gen_avx512vl_vpermi2varv4di3;
21503 break;
21504 case V8DImode:
21505 if (TARGET_AVX512F)
21506 gen = gen_avx512f_vpermi2varv8di3;
21507 break;
21508 case V2DFmode:
21509 if (TARGET_AVX512VL)
21510 {
21511 gen = gen_avx512vl_vpermi2varv2df3;
21512 maskmode = V2DImode;
21513 }
21514 break;
21515 case V4DFmode:
21516 if (TARGET_AVX512VL)
21517 {
21518 gen = gen_avx512vl_vpermi2varv4df3;
21519 maskmode = V4DImode;
21520 }
21521 break;
21522 case V8DFmode:
21523 if (TARGET_AVX512F)
21524 {
21525 gen = gen_avx512f_vpermi2varv8df3;
21526 maskmode = V8DImode;
21527 }
21528 break;
21529 default:
21530 break;
21531 }
21532
21533 if (gen == NULL)
21534 return false;
21535
21536 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21537 expander, so args are either in d, or in op0, op1 etc. */
21538 if (d)
21539 {
21540 rtx vec[64];
21541 target = d->target;
21542 op0 = d->op0;
21543 op1 = d->op1;
21544 for (int i = 0; i < d->nelt; ++i)
21545 vec[i] = GEN_INT (d->perm[i]);
21546 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
21547 }
21548
21549 emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
21550 return true;
21551 }
21552
21553 /* Expand a variable vector permutation. */
21554
21555 void
21556 ix86_expand_vec_perm (rtx operands[])
21557 {
21558 rtx target = operands[0];
21559 rtx op0 = operands[1];
21560 rtx op1 = operands[2];
21561 rtx mask = operands[3];
21562 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
21563 enum machine_mode mode = GET_MODE (op0);
21564 enum machine_mode maskmode = GET_MODE (mask);
21565 int w, e, i;
21566 bool one_operand_shuffle = rtx_equal_p (op0, op1);
21567
21568 /* Number of elements in the vector. */
21569 w = GET_MODE_NUNITS (mode);
21570 e = GET_MODE_UNIT_SIZE (mode);
21571 gcc_assert (w <= 64);
21572
21573 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL))
21574 return;
21575
21576 if (TARGET_AVX2)
21577 {
21578 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
21579 {
21580 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
21581 an constant shuffle operand. With a tiny bit of effort we can
21582 use VPERMD instead. A re-interpretation stall for V4DFmode is
21583 unfortunate but there's no avoiding it.
21584 Similarly for V16HImode we don't have instructions for variable
21585 shuffling, while for V32QImode we can use after preparing suitable
21586 masks vpshufb; vpshufb; vpermq; vpor. */
21587
21588 if (mode == V16HImode)
21589 {
21590 maskmode = mode = V32QImode;
21591 w = 32;
21592 e = 1;
21593 }
21594 else
21595 {
21596 maskmode = mode = V8SImode;
21597 w = 8;
21598 e = 4;
21599 }
21600 t1 = gen_reg_rtx (maskmode);
21601
21602 /* Replicate the low bits of the V4DImode mask into V8SImode:
21603 mask = { A B C D }
21604 t1 = { A A B B C C D D }. */
21605 for (i = 0; i < w / 2; ++i)
21606 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
21607 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21608 vt = force_reg (maskmode, vt);
21609 mask = gen_lowpart (maskmode, mask);
21610 if (maskmode == V8SImode)
21611 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
21612 else
21613 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
21614
21615 /* Multiply the shuffle indicies by two. */
21616 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
21617 OPTAB_DIRECT);
21618
21619 /* Add one to the odd shuffle indicies:
21620 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
21621 for (i = 0; i < w / 2; ++i)
21622 {
21623 vec[i * 2] = const0_rtx;
21624 vec[i * 2 + 1] = const1_rtx;
21625 }
21626 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21627 vt = validize_mem (force_const_mem (maskmode, vt));
21628 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
21629 OPTAB_DIRECT);
21630
21631 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
21632 operands[3] = mask = t1;
21633 target = gen_reg_rtx (mode);
21634 op0 = gen_lowpart (mode, op0);
21635 op1 = gen_lowpart (mode, op1);
21636 }
21637
21638 switch (mode)
21639 {
21640 case V8SImode:
21641 /* The VPERMD and VPERMPS instructions already properly ignore
21642 the high bits of the shuffle elements. No need for us to
21643 perform an AND ourselves. */
21644 if (one_operand_shuffle)
21645 {
21646 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
21647 if (target != operands[0])
21648 emit_move_insn (operands[0],
21649 gen_lowpart (GET_MODE (operands[0]), target));
21650 }
21651 else
21652 {
21653 t1 = gen_reg_rtx (V8SImode);
21654 t2 = gen_reg_rtx (V8SImode);
21655 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
21656 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
21657 goto merge_two;
21658 }
21659 return;
21660
21661 case V8SFmode:
21662 mask = gen_lowpart (V8SImode, mask);
21663 if (one_operand_shuffle)
21664 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
21665 else
21666 {
21667 t1 = gen_reg_rtx (V8SFmode);
21668 t2 = gen_reg_rtx (V8SFmode);
21669 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
21670 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
21671 goto merge_two;
21672 }
21673 return;
21674
21675 case V4SImode:
21676 /* By combining the two 128-bit input vectors into one 256-bit
21677 input vector, we can use VPERMD and VPERMPS for the full
21678 two-operand shuffle. */
21679 t1 = gen_reg_rtx (V8SImode);
21680 t2 = gen_reg_rtx (V8SImode);
21681 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
21682 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21683 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
21684 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
21685 return;
21686
21687 case V4SFmode:
21688 t1 = gen_reg_rtx (V8SFmode);
21689 t2 = gen_reg_rtx (V8SImode);
21690 mask = gen_lowpart (V4SImode, mask);
21691 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
21692 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21693 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
21694 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
21695 return;
21696
21697 case V32QImode:
21698 t1 = gen_reg_rtx (V32QImode);
21699 t2 = gen_reg_rtx (V32QImode);
21700 t3 = gen_reg_rtx (V32QImode);
21701 vt2 = GEN_INT (-128);
21702 for (i = 0; i < 32; i++)
21703 vec[i] = vt2;
21704 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21705 vt = force_reg (V32QImode, vt);
21706 for (i = 0; i < 32; i++)
21707 vec[i] = i < 16 ? vt2 : const0_rtx;
21708 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21709 vt2 = force_reg (V32QImode, vt2);
21710 /* From mask create two adjusted masks, which contain the same
21711 bits as mask in the low 7 bits of each vector element.
21712 The first mask will have the most significant bit clear
21713 if it requests element from the same 128-bit lane
21714 and MSB set if it requests element from the other 128-bit lane.
21715 The second mask will have the opposite values of the MSB,
21716 and additionally will have its 128-bit lanes swapped.
21717 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
21718 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
21719 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
21720 stands for other 12 bytes. */
21721 /* The bit whether element is from the same lane or the other
21722 lane is bit 4, so shift it up by 3 to the MSB position. */
21723 t5 = gen_reg_rtx (V4DImode);
21724 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
21725 GEN_INT (3)));
21726 /* Clear MSB bits from the mask just in case it had them set. */
21727 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
21728 /* After this t1 will have MSB set for elements from other lane. */
21729 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
21730 /* Clear bits other than MSB. */
21731 emit_insn (gen_andv32qi3 (t1, t1, vt));
21732 /* Or in the lower bits from mask into t3. */
21733 emit_insn (gen_iorv32qi3 (t3, t1, t2));
21734 /* And invert MSB bits in t1, so MSB is set for elements from the same
21735 lane. */
21736 emit_insn (gen_xorv32qi3 (t1, t1, vt));
21737 /* Swap 128-bit lanes in t3. */
21738 t6 = gen_reg_rtx (V4DImode);
21739 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
21740 const2_rtx, GEN_INT (3),
21741 const0_rtx, const1_rtx));
21742 /* And or in the lower bits from mask into t1. */
21743 emit_insn (gen_iorv32qi3 (t1, t1, t2));
21744 if (one_operand_shuffle)
21745 {
21746 /* Each of these shuffles will put 0s in places where
21747 element from the other 128-bit lane is needed, otherwise
21748 will shuffle in the requested value. */
21749 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
21750 gen_lowpart (V32QImode, t6)));
21751 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
21752 /* For t3 the 128-bit lanes are swapped again. */
21753 t7 = gen_reg_rtx (V4DImode);
21754 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
21755 const2_rtx, GEN_INT (3),
21756 const0_rtx, const1_rtx));
21757 /* And oring both together leads to the result. */
21758 emit_insn (gen_iorv32qi3 (target, t1,
21759 gen_lowpart (V32QImode, t7)));
21760 if (target != operands[0])
21761 emit_move_insn (operands[0],
21762 gen_lowpart (GET_MODE (operands[0]), target));
21763 return;
21764 }
21765
21766 t4 = gen_reg_rtx (V32QImode);
21767 /* Similarly to the above one_operand_shuffle code,
21768 just for repeated twice for each operand. merge_two:
21769 code will merge the two results together. */
21770 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
21771 gen_lowpart (V32QImode, t6)));
21772 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
21773 gen_lowpart (V32QImode, t6)));
21774 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
21775 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
21776 t7 = gen_reg_rtx (V4DImode);
21777 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
21778 const2_rtx, GEN_INT (3),
21779 const0_rtx, const1_rtx));
21780 t8 = gen_reg_rtx (V4DImode);
21781 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
21782 const2_rtx, GEN_INT (3),
21783 const0_rtx, const1_rtx));
21784 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
21785 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
21786 t1 = t4;
21787 t2 = t3;
21788 goto merge_two;
21789
21790 default:
21791 gcc_assert (GET_MODE_SIZE (mode) <= 16);
21792 break;
21793 }
21794 }
21795
21796 if (TARGET_XOP)
21797 {
21798 /* The XOP VPPERM insn supports three inputs. By ignoring the
21799 one_operand_shuffle special case, we avoid creating another
21800 set of constant vectors in memory. */
21801 one_operand_shuffle = false;
21802
21803 /* mask = mask & {2*w-1, ...} */
21804 vt = GEN_INT (2*w - 1);
21805 }
21806 else
21807 {
21808 /* mask = mask & {w-1, ...} */
21809 vt = GEN_INT (w - 1);
21810 }
21811
21812 for (i = 0; i < w; i++)
21813 vec[i] = vt;
21814 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21815 mask = expand_simple_binop (maskmode, AND, mask, vt,
21816 NULL_RTX, 0, OPTAB_DIRECT);
21817
21818 /* For non-QImode operations, convert the word permutation control
21819 into a byte permutation control. */
21820 if (mode != V16QImode)
21821 {
21822 mask = expand_simple_binop (maskmode, ASHIFT, mask,
21823 GEN_INT (exact_log2 (e)),
21824 NULL_RTX, 0, OPTAB_DIRECT);
21825
21826 /* Convert mask to vector of chars. */
21827 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
21828
21829 /* Replicate each of the input bytes into byte positions:
21830 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
21831 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
21832 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
21833 for (i = 0; i < 16; ++i)
21834 vec[i] = GEN_INT (i/e * e);
21835 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21836 vt = validize_mem (force_const_mem (V16QImode, vt));
21837 if (TARGET_XOP)
21838 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
21839 else
21840 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
21841
21842 /* Convert it into the byte positions by doing
21843 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
21844 for (i = 0; i < 16; ++i)
21845 vec[i] = GEN_INT (i % e);
21846 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21847 vt = validize_mem (force_const_mem (V16QImode, vt));
21848 emit_insn (gen_addv16qi3 (mask, mask, vt));
21849 }
21850
21851 /* The actual shuffle operations all operate on V16QImode. */
21852 op0 = gen_lowpart (V16QImode, op0);
21853 op1 = gen_lowpart (V16QImode, op1);
21854
21855 if (TARGET_XOP)
21856 {
21857 if (GET_MODE (target) != V16QImode)
21858 target = gen_reg_rtx (V16QImode);
21859 emit_insn (gen_xop_pperm (target, op0, op1, mask));
21860 if (target != operands[0])
21861 emit_move_insn (operands[0],
21862 gen_lowpart (GET_MODE (operands[0]), target));
21863 }
21864 else if (one_operand_shuffle)
21865 {
21866 if (GET_MODE (target) != V16QImode)
21867 target = gen_reg_rtx (V16QImode);
21868 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
21869 if (target != operands[0])
21870 emit_move_insn (operands[0],
21871 gen_lowpart (GET_MODE (operands[0]), target));
21872 }
21873 else
21874 {
21875 rtx xops[6];
21876 bool ok;
21877
21878 /* Shuffle the two input vectors independently. */
21879 t1 = gen_reg_rtx (V16QImode);
21880 t2 = gen_reg_rtx (V16QImode);
21881 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
21882 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
21883
21884 merge_two:
21885 /* Then merge them together. The key is whether any given control
21886 element contained a bit set that indicates the second word. */
21887 mask = operands[3];
21888 vt = GEN_INT (w);
21889 if (maskmode == V2DImode && !TARGET_SSE4_1)
21890 {
21891 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
21892 more shuffle to convert the V2DI input mask into a V4SI
21893 input mask. At which point the masking that expand_int_vcond
21894 will work as desired. */
21895 rtx t3 = gen_reg_rtx (V4SImode);
21896 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
21897 const0_rtx, const0_rtx,
21898 const2_rtx, const2_rtx));
21899 mask = t3;
21900 maskmode = V4SImode;
21901 e = w = 4;
21902 }
21903
21904 for (i = 0; i < w; i++)
21905 vec[i] = vt;
21906 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21907 vt = force_reg (maskmode, vt);
21908 mask = expand_simple_binop (maskmode, AND, mask, vt,
21909 NULL_RTX, 0, OPTAB_DIRECT);
21910
21911 if (GET_MODE (target) != mode)
21912 target = gen_reg_rtx (mode);
21913 xops[0] = target;
21914 xops[1] = gen_lowpart (mode, t2);
21915 xops[2] = gen_lowpart (mode, t1);
21916 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
21917 xops[4] = mask;
21918 xops[5] = vt;
21919 ok = ix86_expand_int_vcond (xops);
21920 gcc_assert (ok);
21921 if (target != operands[0])
21922 emit_move_insn (operands[0],
21923 gen_lowpart (GET_MODE (operands[0]), target));
21924 }
21925 }
21926
21927 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
21928 true if we should do zero extension, else sign extension. HIGH_P is
21929 true if we want the N/2 high elements, else the low elements. */
21930
21931 void
21932 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
21933 {
21934 enum machine_mode imode = GET_MODE (src);
21935 rtx tmp;
21936
21937 if (TARGET_SSE4_1)
21938 {
21939 rtx (*unpack)(rtx, rtx);
21940 rtx (*extract)(rtx, rtx) = NULL;
21941 enum machine_mode halfmode = BLKmode;
21942
21943 switch (imode)
21944 {
21945 case V64QImode:
21946 if (unsigned_p)
21947 unpack = gen_avx512bw_zero_extendv32qiv32hi2;
21948 else
21949 unpack = gen_avx512bw_sign_extendv32qiv32hi2;
21950 halfmode = V32QImode;
21951 extract
21952 = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
21953 break;
21954 case V32QImode:
21955 if (unsigned_p)
21956 unpack = gen_avx2_zero_extendv16qiv16hi2;
21957 else
21958 unpack = gen_avx2_sign_extendv16qiv16hi2;
21959 halfmode = V16QImode;
21960 extract
21961 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
21962 break;
21963 case V32HImode:
21964 if (unsigned_p)
21965 unpack = gen_avx512f_zero_extendv16hiv16si2;
21966 else
21967 unpack = gen_avx512f_sign_extendv16hiv16si2;
21968 halfmode = V16HImode;
21969 extract
21970 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
21971 break;
21972 case V16HImode:
21973 if (unsigned_p)
21974 unpack = gen_avx2_zero_extendv8hiv8si2;
21975 else
21976 unpack = gen_avx2_sign_extendv8hiv8si2;
21977 halfmode = V8HImode;
21978 extract
21979 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
21980 break;
21981 case V16SImode:
21982 if (unsigned_p)
21983 unpack = gen_avx512f_zero_extendv8siv8di2;
21984 else
21985 unpack = gen_avx512f_sign_extendv8siv8di2;
21986 halfmode = V8SImode;
21987 extract
21988 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
21989 break;
21990 case V8SImode:
21991 if (unsigned_p)
21992 unpack = gen_avx2_zero_extendv4siv4di2;
21993 else
21994 unpack = gen_avx2_sign_extendv4siv4di2;
21995 halfmode = V4SImode;
21996 extract
21997 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
21998 break;
21999 case V16QImode:
22000 if (unsigned_p)
22001 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
22002 else
22003 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
22004 break;
22005 case V8HImode:
22006 if (unsigned_p)
22007 unpack = gen_sse4_1_zero_extendv4hiv4si2;
22008 else
22009 unpack = gen_sse4_1_sign_extendv4hiv4si2;
22010 break;
22011 case V4SImode:
22012 if (unsigned_p)
22013 unpack = gen_sse4_1_zero_extendv2siv2di2;
22014 else
22015 unpack = gen_sse4_1_sign_extendv2siv2di2;
22016 break;
22017 default:
22018 gcc_unreachable ();
22019 }
22020
22021 if (GET_MODE_SIZE (imode) >= 32)
22022 {
22023 tmp = gen_reg_rtx (halfmode);
22024 emit_insn (extract (tmp, src));
22025 }
22026 else if (high_p)
22027 {
22028 /* Shift higher 8 bytes to lower 8 bytes. */
22029 tmp = gen_reg_rtx (V1TImode);
22030 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
22031 GEN_INT (64)));
22032 tmp = gen_lowpart (imode, tmp);
22033 }
22034 else
22035 tmp = src;
22036
22037 emit_insn (unpack (dest, tmp));
22038 }
22039 else
22040 {
22041 rtx (*unpack)(rtx, rtx, rtx);
22042
22043 switch (imode)
22044 {
22045 case V16QImode:
22046 if (high_p)
22047 unpack = gen_vec_interleave_highv16qi;
22048 else
22049 unpack = gen_vec_interleave_lowv16qi;
22050 break;
22051 case V8HImode:
22052 if (high_p)
22053 unpack = gen_vec_interleave_highv8hi;
22054 else
22055 unpack = gen_vec_interleave_lowv8hi;
22056 break;
22057 case V4SImode:
22058 if (high_p)
22059 unpack = gen_vec_interleave_highv4si;
22060 else
22061 unpack = gen_vec_interleave_lowv4si;
22062 break;
22063 default:
22064 gcc_unreachable ();
22065 }
22066
22067 if (unsigned_p)
22068 tmp = force_reg (imode, CONST0_RTX (imode));
22069 else
22070 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
22071 src, pc_rtx, pc_rtx);
22072
22073 rtx tmp2 = gen_reg_rtx (imode);
22074 emit_insn (unpack (tmp2, src, tmp));
22075 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
22076 }
22077 }
22078
22079 /* Expand conditional increment or decrement using adb/sbb instructions.
22080 The default case using setcc followed by the conditional move can be
22081 done by generic code. */
22082 bool
22083 ix86_expand_int_addcc (rtx operands[])
22084 {
22085 enum rtx_code code = GET_CODE (operands[1]);
22086 rtx flags;
22087 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
22088 rtx compare_op;
22089 rtx val = const0_rtx;
22090 bool fpcmp = false;
22091 enum machine_mode mode;
22092 rtx op0 = XEXP (operands[1], 0);
22093 rtx op1 = XEXP (operands[1], 1);
22094
22095 if (operands[3] != const1_rtx
22096 && operands[3] != constm1_rtx)
22097 return false;
22098 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22099 return false;
22100 code = GET_CODE (compare_op);
22101
22102 flags = XEXP (compare_op, 0);
22103
22104 if (GET_MODE (flags) == CCFPmode
22105 || GET_MODE (flags) == CCFPUmode)
22106 {
22107 fpcmp = true;
22108 code = ix86_fp_compare_code_to_integer (code);
22109 }
22110
22111 if (code != LTU)
22112 {
22113 val = constm1_rtx;
22114 if (fpcmp)
22115 PUT_CODE (compare_op,
22116 reverse_condition_maybe_unordered
22117 (GET_CODE (compare_op)));
22118 else
22119 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
22120 }
22121
22122 mode = GET_MODE (operands[0]);
22123
22124 /* Construct either adc or sbb insn. */
22125 if ((code == LTU) == (operands[3] == constm1_rtx))
22126 {
22127 switch (mode)
22128 {
22129 case QImode:
22130 insn = gen_subqi3_carry;
22131 break;
22132 case HImode:
22133 insn = gen_subhi3_carry;
22134 break;
22135 case SImode:
22136 insn = gen_subsi3_carry;
22137 break;
22138 case DImode:
22139 insn = gen_subdi3_carry;
22140 break;
22141 default:
22142 gcc_unreachable ();
22143 }
22144 }
22145 else
22146 {
22147 switch (mode)
22148 {
22149 case QImode:
22150 insn = gen_addqi3_carry;
22151 break;
22152 case HImode:
22153 insn = gen_addhi3_carry;
22154 break;
22155 case SImode:
22156 insn = gen_addsi3_carry;
22157 break;
22158 case DImode:
22159 insn = gen_adddi3_carry;
22160 break;
22161 default:
22162 gcc_unreachable ();
22163 }
22164 }
22165 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22166
22167 return true;
22168 }
22169
22170
22171 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22172 but works for floating pointer parameters and nonoffsetable memories.
22173 For pushes, it returns just stack offsets; the values will be saved
22174 in the right order. Maximally three parts are generated. */
22175
22176 static int
22177 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
22178 {
22179 int size;
22180
22181 if (!TARGET_64BIT)
22182 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22183 else
22184 size = (GET_MODE_SIZE (mode) + 4) / 8;
22185
22186 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22187 gcc_assert (size >= 2 && size <= 4);
22188
22189 /* Optimize constant pool reference to immediates. This is used by fp
22190 moves, that force all constants to memory to allow combining. */
22191 if (MEM_P (operand) && MEM_READONLY_P (operand))
22192 {
22193 rtx tmp = maybe_get_pool_constant (operand);
22194 if (tmp)
22195 operand = tmp;
22196 }
22197
22198 if (MEM_P (operand) && !offsettable_memref_p (operand))
22199 {
22200 /* The only non-offsetable memories we handle are pushes. */
22201 int ok = push_operand (operand, VOIDmode);
22202
22203 gcc_assert (ok);
22204
22205 operand = copy_rtx (operand);
22206 PUT_MODE (operand, word_mode);
22207 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22208 return size;
22209 }
22210
22211 if (GET_CODE (operand) == CONST_VECTOR)
22212 {
22213 enum machine_mode imode = int_mode_for_mode (mode);
22214 /* Caution: if we looked through a constant pool memory above,
22215 the operand may actually have a different mode now. That's
22216 ok, since we want to pun this all the way back to an integer. */
22217 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22218 gcc_assert (operand != NULL);
22219 mode = imode;
22220 }
22221
22222 if (!TARGET_64BIT)
22223 {
22224 if (mode == DImode)
22225 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22226 else
22227 {
22228 int i;
22229
22230 if (REG_P (operand))
22231 {
22232 gcc_assert (reload_completed);
22233 for (i = 0; i < size; i++)
22234 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22235 }
22236 else if (offsettable_memref_p (operand))
22237 {
22238 operand = adjust_address (operand, SImode, 0);
22239 parts[0] = operand;
22240 for (i = 1; i < size; i++)
22241 parts[i] = adjust_address (operand, SImode, 4 * i);
22242 }
22243 else if (GET_CODE (operand) == CONST_DOUBLE)
22244 {
22245 REAL_VALUE_TYPE r;
22246 long l[4];
22247
22248 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22249 switch (mode)
22250 {
22251 case TFmode:
22252 real_to_target (l, &r, mode);
22253 parts[3] = gen_int_mode (l[3], SImode);
22254 parts[2] = gen_int_mode (l[2], SImode);
22255 break;
22256 case XFmode:
22257 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22258 long double may not be 80-bit. */
22259 real_to_target (l, &r, mode);
22260 parts[2] = gen_int_mode (l[2], SImode);
22261 break;
22262 case DFmode:
22263 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22264 break;
22265 default:
22266 gcc_unreachable ();
22267 }
22268 parts[1] = gen_int_mode (l[1], SImode);
22269 parts[0] = gen_int_mode (l[0], SImode);
22270 }
22271 else
22272 gcc_unreachable ();
22273 }
22274 }
22275 else
22276 {
22277 if (mode == TImode)
22278 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22279 if (mode == XFmode || mode == TFmode)
22280 {
22281 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22282 if (REG_P (operand))
22283 {
22284 gcc_assert (reload_completed);
22285 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22286 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22287 }
22288 else if (offsettable_memref_p (operand))
22289 {
22290 operand = adjust_address (operand, DImode, 0);
22291 parts[0] = operand;
22292 parts[1] = adjust_address (operand, upper_mode, 8);
22293 }
22294 else if (GET_CODE (operand) == CONST_DOUBLE)
22295 {
22296 REAL_VALUE_TYPE r;
22297 long l[4];
22298
22299 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22300 real_to_target (l, &r, mode);
22301
22302 /* Do not use shift by 32 to avoid warning on 32bit systems. */
22303 if (HOST_BITS_PER_WIDE_INT >= 64)
22304 parts[0]
22305 = gen_int_mode
22306 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22307 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22308 DImode);
22309 else
22310 parts[0] = immed_double_const (l[0], l[1], DImode);
22311
22312 if (upper_mode == SImode)
22313 parts[1] = gen_int_mode (l[2], SImode);
22314 else if (HOST_BITS_PER_WIDE_INT >= 64)
22315 parts[1]
22316 = gen_int_mode
22317 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22318 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22319 DImode);
22320 else
22321 parts[1] = immed_double_const (l[2], l[3], DImode);
22322 }
22323 else
22324 gcc_unreachable ();
22325 }
22326 }
22327
22328 return size;
22329 }
22330
22331 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22332 Return false when normal moves are needed; true when all required
22333 insns have been emitted. Operands 2-4 contain the input values
22334 int the correct order; operands 5-7 contain the output values. */
22335
22336 void
22337 ix86_split_long_move (rtx operands[])
22338 {
22339 rtx part[2][4];
22340 int nparts, i, j;
22341 int push = 0;
22342 int collisions = 0;
22343 enum machine_mode mode = GET_MODE (operands[0]);
22344 bool collisionparts[4];
22345
22346 /* The DFmode expanders may ask us to move double.
22347 For 64bit target this is single move. By hiding the fact
22348 here we simplify i386.md splitters. */
22349 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22350 {
22351 /* Optimize constant pool reference to immediates. This is used by
22352 fp moves, that force all constants to memory to allow combining. */
22353
22354 if (MEM_P (operands[1])
22355 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22356 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22357 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22358 if (push_operand (operands[0], VOIDmode))
22359 {
22360 operands[0] = copy_rtx (operands[0]);
22361 PUT_MODE (operands[0], word_mode);
22362 }
22363 else
22364 operands[0] = gen_lowpart (DImode, operands[0]);
22365 operands[1] = gen_lowpart (DImode, operands[1]);
22366 emit_move_insn (operands[0], operands[1]);
22367 return;
22368 }
22369
22370 /* The only non-offsettable memory we handle is push. */
22371 if (push_operand (operands[0], VOIDmode))
22372 push = 1;
22373 else
22374 gcc_assert (!MEM_P (operands[0])
22375 || offsettable_memref_p (operands[0]));
22376
22377 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22378 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22379
22380 /* When emitting push, take care for source operands on the stack. */
22381 if (push && MEM_P (operands[1])
22382 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22383 {
22384 rtx src_base = XEXP (part[1][nparts - 1], 0);
22385
22386 /* Compensate for the stack decrement by 4. */
22387 if (!TARGET_64BIT && nparts == 3
22388 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22389 src_base = plus_constant (Pmode, src_base, 4);
22390
22391 /* src_base refers to the stack pointer and is
22392 automatically decreased by emitted push. */
22393 for (i = 0; i < nparts; i++)
22394 part[1][i] = change_address (part[1][i],
22395 GET_MODE (part[1][i]), src_base);
22396 }
22397
22398 /* We need to do copy in the right order in case an address register
22399 of the source overlaps the destination. */
22400 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22401 {
22402 rtx tmp;
22403
22404 for (i = 0; i < nparts; i++)
22405 {
22406 collisionparts[i]
22407 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22408 if (collisionparts[i])
22409 collisions++;
22410 }
22411
22412 /* Collision in the middle part can be handled by reordering. */
22413 if (collisions == 1 && nparts == 3 && collisionparts [1])
22414 {
22415 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22416 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22417 }
22418 else if (collisions == 1
22419 && nparts == 4
22420 && (collisionparts [1] || collisionparts [2]))
22421 {
22422 if (collisionparts [1])
22423 {
22424 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22425 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22426 }
22427 else
22428 {
22429 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
22430 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
22431 }
22432 }
22433
22434 /* If there are more collisions, we can't handle it by reordering.
22435 Do an lea to the last part and use only one colliding move. */
22436 else if (collisions > 1)
22437 {
22438 rtx base;
22439
22440 collisions = 1;
22441
22442 base = part[0][nparts - 1];
22443
22444 /* Handle the case when the last part isn't valid for lea.
22445 Happens in 64-bit mode storing the 12-byte XFmode. */
22446 if (GET_MODE (base) != Pmode)
22447 base = gen_rtx_REG (Pmode, REGNO (base));
22448
22449 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
22450 part[1][0] = replace_equiv_address (part[1][0], base);
22451 for (i = 1; i < nparts; i++)
22452 {
22453 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22454 part[1][i] = replace_equiv_address (part[1][i], tmp);
22455 }
22456 }
22457 }
22458
22459 if (push)
22460 {
22461 if (!TARGET_64BIT)
22462 {
22463 if (nparts == 3)
22464 {
22465 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22466 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22467 stack_pointer_rtx, GEN_INT (-4)));
22468 emit_move_insn (part[0][2], part[1][2]);
22469 }
22470 else if (nparts == 4)
22471 {
22472 emit_move_insn (part[0][3], part[1][3]);
22473 emit_move_insn (part[0][2], part[1][2]);
22474 }
22475 }
22476 else
22477 {
22478 /* In 64bit mode we don't have 32bit push available. In case this is
22479 register, it is OK - we will just use larger counterpart. We also
22480 retype memory - these comes from attempt to avoid REX prefix on
22481 moving of second half of TFmode value. */
22482 if (GET_MODE (part[1][1]) == SImode)
22483 {
22484 switch (GET_CODE (part[1][1]))
22485 {
22486 case MEM:
22487 part[1][1] = adjust_address (part[1][1], DImode, 0);
22488 break;
22489
22490 case REG:
22491 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
22492 break;
22493
22494 default:
22495 gcc_unreachable ();
22496 }
22497
22498 if (GET_MODE (part[1][0]) == SImode)
22499 part[1][0] = part[1][1];
22500 }
22501 }
22502 emit_move_insn (part[0][1], part[1][1]);
22503 emit_move_insn (part[0][0], part[1][0]);
22504 return;
22505 }
22506
22507 /* Choose correct order to not overwrite the source before it is copied. */
22508 if ((REG_P (part[0][0])
22509 && REG_P (part[1][1])
22510 && (REGNO (part[0][0]) == REGNO (part[1][1])
22511 || (nparts == 3
22512 && REGNO (part[0][0]) == REGNO (part[1][2]))
22513 || (nparts == 4
22514 && REGNO (part[0][0]) == REGNO (part[1][3]))))
22515 || (collisions > 0
22516 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
22517 {
22518 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
22519 {
22520 operands[2 + i] = part[0][j];
22521 operands[6 + i] = part[1][j];
22522 }
22523 }
22524 else
22525 {
22526 for (i = 0; i < nparts; i++)
22527 {
22528 operands[2 + i] = part[0][i];
22529 operands[6 + i] = part[1][i];
22530 }
22531 }
22532
22533 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
22534 if (optimize_insn_for_size_p ())
22535 {
22536 for (j = 0; j < nparts - 1; j++)
22537 if (CONST_INT_P (operands[6 + j])
22538 && operands[6 + j] != const0_rtx
22539 && REG_P (operands[2 + j]))
22540 for (i = j; i < nparts - 1; i++)
22541 if (CONST_INT_P (operands[7 + i])
22542 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
22543 operands[7 + i] = operands[2 + j];
22544 }
22545
22546 for (i = 0; i < nparts; i++)
22547 emit_move_insn (operands[2 + i], operands[6 + i]);
22548
22549 return;
22550 }
22551
22552 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
22553 left shift by a constant, either using a single shift or
22554 a sequence of add instructions. */
22555
22556 static void
22557 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
22558 {
22559 rtx (*insn)(rtx, rtx, rtx);
22560
22561 if (count == 1
22562 || (count * ix86_cost->add <= ix86_cost->shift_const
22563 && !optimize_insn_for_size_p ()))
22564 {
22565 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
22566 while (count-- > 0)
22567 emit_insn (insn (operand, operand, operand));
22568 }
22569 else
22570 {
22571 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22572 emit_insn (insn (operand, operand, GEN_INT (count)));
22573 }
22574 }
22575
22576 void
22577 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
22578 {
22579 rtx (*gen_ashl3)(rtx, rtx, rtx);
22580 rtx (*gen_shld)(rtx, rtx, rtx);
22581 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22582
22583 rtx low[2], high[2];
22584 int count;
22585
22586 if (CONST_INT_P (operands[2]))
22587 {
22588 split_double_mode (mode, operands, 2, low, high);
22589 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22590
22591 if (count >= half_width)
22592 {
22593 emit_move_insn (high[0], low[1]);
22594 emit_move_insn (low[0], const0_rtx);
22595
22596 if (count > half_width)
22597 ix86_expand_ashl_const (high[0], count - half_width, mode);
22598 }
22599 else
22600 {
22601 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22602
22603 if (!rtx_equal_p (operands[0], operands[1]))
22604 emit_move_insn (operands[0], operands[1]);
22605
22606 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
22607 ix86_expand_ashl_const (low[0], count, mode);
22608 }
22609 return;
22610 }
22611
22612 split_double_mode (mode, operands, 1, low, high);
22613
22614 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22615
22616 if (operands[1] == const1_rtx)
22617 {
22618 /* Assuming we've chosen a QImode capable registers, then 1 << N
22619 can be done with two 32/64-bit shifts, no branches, no cmoves. */
22620 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
22621 {
22622 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
22623
22624 ix86_expand_clear (low[0]);
22625 ix86_expand_clear (high[0]);
22626 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
22627
22628 d = gen_lowpart (QImode, low[0]);
22629 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22630 s = gen_rtx_EQ (QImode, flags, const0_rtx);
22631 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22632
22633 d = gen_lowpart (QImode, high[0]);
22634 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22635 s = gen_rtx_NE (QImode, flags, const0_rtx);
22636 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22637 }
22638
22639 /* Otherwise, we can get the same results by manually performing
22640 a bit extract operation on bit 5/6, and then performing the two
22641 shifts. The two methods of getting 0/1 into low/high are exactly
22642 the same size. Avoiding the shift in the bit extract case helps
22643 pentium4 a bit; no one else seems to care much either way. */
22644 else
22645 {
22646 enum machine_mode half_mode;
22647 rtx (*gen_lshr3)(rtx, rtx, rtx);
22648 rtx (*gen_and3)(rtx, rtx, rtx);
22649 rtx (*gen_xor3)(rtx, rtx, rtx);
22650 HOST_WIDE_INT bits;
22651 rtx x;
22652
22653 if (mode == DImode)
22654 {
22655 half_mode = SImode;
22656 gen_lshr3 = gen_lshrsi3;
22657 gen_and3 = gen_andsi3;
22658 gen_xor3 = gen_xorsi3;
22659 bits = 5;
22660 }
22661 else
22662 {
22663 half_mode = DImode;
22664 gen_lshr3 = gen_lshrdi3;
22665 gen_and3 = gen_anddi3;
22666 gen_xor3 = gen_xordi3;
22667 bits = 6;
22668 }
22669
22670 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
22671 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
22672 else
22673 x = gen_lowpart (half_mode, operands[2]);
22674 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
22675
22676 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
22677 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
22678 emit_move_insn (low[0], high[0]);
22679 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
22680 }
22681
22682 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22683 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
22684 return;
22685 }
22686
22687 if (operands[1] == constm1_rtx)
22688 {
22689 /* For -1 << N, we can avoid the shld instruction, because we
22690 know that we're shifting 0...31/63 ones into a -1. */
22691 emit_move_insn (low[0], constm1_rtx);
22692 if (optimize_insn_for_size_p ())
22693 emit_move_insn (high[0], low[0]);
22694 else
22695 emit_move_insn (high[0], constm1_rtx);
22696 }
22697 else
22698 {
22699 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22700
22701 if (!rtx_equal_p (operands[0], operands[1]))
22702 emit_move_insn (operands[0], operands[1]);
22703
22704 split_double_mode (mode, operands, 1, low, high);
22705 emit_insn (gen_shld (high[0], low[0], operands[2]));
22706 }
22707
22708 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22709
22710 if (TARGET_CMOVE && scratch)
22711 {
22712 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22713 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22714
22715 ix86_expand_clear (scratch);
22716 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
22717 }
22718 else
22719 {
22720 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22721 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22722
22723 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
22724 }
22725 }
22726
22727 void
22728 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
22729 {
22730 rtx (*gen_ashr3)(rtx, rtx, rtx)
22731 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
22732 rtx (*gen_shrd)(rtx, rtx, rtx);
22733 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22734
22735 rtx low[2], high[2];
22736 int count;
22737
22738 if (CONST_INT_P (operands[2]))
22739 {
22740 split_double_mode (mode, operands, 2, low, high);
22741 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22742
22743 if (count == GET_MODE_BITSIZE (mode) - 1)
22744 {
22745 emit_move_insn (high[0], high[1]);
22746 emit_insn (gen_ashr3 (high[0], high[0],
22747 GEN_INT (half_width - 1)));
22748 emit_move_insn (low[0], high[0]);
22749
22750 }
22751 else if (count >= half_width)
22752 {
22753 emit_move_insn (low[0], high[1]);
22754 emit_move_insn (high[0], low[0]);
22755 emit_insn (gen_ashr3 (high[0], high[0],
22756 GEN_INT (half_width - 1)));
22757
22758 if (count > half_width)
22759 emit_insn (gen_ashr3 (low[0], low[0],
22760 GEN_INT (count - half_width)));
22761 }
22762 else
22763 {
22764 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22765
22766 if (!rtx_equal_p (operands[0], operands[1]))
22767 emit_move_insn (operands[0], operands[1]);
22768
22769 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22770 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
22771 }
22772 }
22773 else
22774 {
22775 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22776
22777 if (!rtx_equal_p (operands[0], operands[1]))
22778 emit_move_insn (operands[0], operands[1]);
22779
22780 split_double_mode (mode, operands, 1, low, high);
22781
22782 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22783 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
22784
22785 if (TARGET_CMOVE && scratch)
22786 {
22787 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22788 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22789
22790 emit_move_insn (scratch, high[0]);
22791 emit_insn (gen_ashr3 (scratch, scratch,
22792 GEN_INT (half_width - 1)));
22793 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22794 scratch));
22795 }
22796 else
22797 {
22798 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
22799 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
22800
22801 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
22802 }
22803 }
22804 }
22805
22806 void
22807 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
22808 {
22809 rtx (*gen_lshr3)(rtx, rtx, rtx)
22810 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
22811 rtx (*gen_shrd)(rtx, rtx, rtx);
22812 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22813
22814 rtx low[2], high[2];
22815 int count;
22816
22817 if (CONST_INT_P (operands[2]))
22818 {
22819 split_double_mode (mode, operands, 2, low, high);
22820 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22821
22822 if (count >= half_width)
22823 {
22824 emit_move_insn (low[0], high[1]);
22825 ix86_expand_clear (high[0]);
22826
22827 if (count > half_width)
22828 emit_insn (gen_lshr3 (low[0], low[0],
22829 GEN_INT (count - half_width)));
22830 }
22831 else
22832 {
22833 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22834
22835 if (!rtx_equal_p (operands[0], operands[1]))
22836 emit_move_insn (operands[0], operands[1]);
22837
22838 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22839 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
22840 }
22841 }
22842 else
22843 {
22844 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22845
22846 if (!rtx_equal_p (operands[0], operands[1]))
22847 emit_move_insn (operands[0], operands[1]);
22848
22849 split_double_mode (mode, operands, 1, low, high);
22850
22851 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22852 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
22853
22854 if (TARGET_CMOVE && scratch)
22855 {
22856 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22857 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22858
22859 ix86_expand_clear (scratch);
22860 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22861 scratch));
22862 }
22863 else
22864 {
22865 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22866 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22867
22868 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
22869 }
22870 }
22871 }
22872
22873 /* Predict just emitted jump instruction to be taken with probability PROB. */
22874 static void
22875 predict_jump (int prob)
22876 {
22877 rtx insn = get_last_insn ();
22878 gcc_assert (JUMP_P (insn));
22879 add_int_reg_note (insn, REG_BR_PROB, prob);
22880 }
22881
22882 /* Helper function for the string operations below. Dest VARIABLE whether
22883 it is aligned to VALUE bytes. If true, jump to the label. */
22884 static rtx_code_label *
22885 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
22886 {
22887 rtx_code_label *label = gen_label_rtx ();
22888 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
22889 if (GET_MODE (variable) == DImode)
22890 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
22891 else
22892 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
22893 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
22894 1, label);
22895 if (epilogue)
22896 predict_jump (REG_BR_PROB_BASE * 50 / 100);
22897 else
22898 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22899 return label;
22900 }
22901
22902 /* Adjust COUNTER by the VALUE. */
22903 static void
22904 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
22905 {
22906 rtx (*gen_add)(rtx, rtx, rtx)
22907 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
22908
22909 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
22910 }
22911
22912 /* Zero extend possibly SImode EXP to Pmode register. */
22913 rtx
22914 ix86_zero_extend_to_Pmode (rtx exp)
22915 {
22916 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
22917 }
22918
22919 /* Divide COUNTREG by SCALE. */
22920 static rtx
22921 scale_counter (rtx countreg, int scale)
22922 {
22923 rtx sc;
22924
22925 if (scale == 1)
22926 return countreg;
22927 if (CONST_INT_P (countreg))
22928 return GEN_INT (INTVAL (countreg) / scale);
22929 gcc_assert (REG_P (countreg));
22930
22931 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
22932 GEN_INT (exact_log2 (scale)),
22933 NULL, 1, OPTAB_DIRECT);
22934 return sc;
22935 }
22936
22937 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
22938 DImode for constant loop counts. */
22939
22940 static enum machine_mode
22941 counter_mode (rtx count_exp)
22942 {
22943 if (GET_MODE (count_exp) != VOIDmode)
22944 return GET_MODE (count_exp);
22945 if (!CONST_INT_P (count_exp))
22946 return Pmode;
22947 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
22948 return DImode;
22949 return SImode;
22950 }
22951
22952 /* Copy the address to a Pmode register. This is used for x32 to
22953 truncate DImode TLS address to a SImode register. */
22954
22955 static rtx
22956 ix86_copy_addr_to_reg (rtx addr)
22957 {
22958 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
22959 return copy_addr_to_reg (addr);
22960 else
22961 {
22962 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
22963 return gen_rtx_SUBREG (SImode, copy_to_mode_reg (DImode, addr), 0);
22964 }
22965 }
22966
22967 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
22968 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
22969 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
22970 memory by VALUE (supposed to be in MODE).
22971
22972 The size is rounded down to whole number of chunk size moved at once.
22973 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
22974
22975
22976 static void
22977 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
22978 rtx destptr, rtx srcptr, rtx value,
22979 rtx count, enum machine_mode mode, int unroll,
22980 int expected_size, bool issetmem)
22981 {
22982 rtx_code_label *out_label, *top_label;
22983 rtx iter, tmp;
22984 enum machine_mode iter_mode = counter_mode (count);
22985 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
22986 rtx piece_size = GEN_INT (piece_size_n);
22987 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
22988 rtx size;
22989 int i;
22990
22991 top_label = gen_label_rtx ();
22992 out_label = gen_label_rtx ();
22993 iter = gen_reg_rtx (iter_mode);
22994
22995 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
22996 NULL, 1, OPTAB_DIRECT);
22997 /* Those two should combine. */
22998 if (piece_size == const1_rtx)
22999 {
23000 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
23001 true, out_label);
23002 predict_jump (REG_BR_PROB_BASE * 10 / 100);
23003 }
23004 emit_move_insn (iter, const0_rtx);
23005
23006 emit_label (top_label);
23007
23008 tmp = convert_modes (Pmode, iter_mode, iter, true);
23009
23010 /* This assert could be relaxed - in this case we'll need to compute
23011 smallest power of two, containing in PIECE_SIZE_N and pass it to
23012 offset_address. */
23013 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
23014 destmem = offset_address (destmem, tmp, piece_size_n);
23015 destmem = adjust_address (destmem, mode, 0);
23016
23017 if (!issetmem)
23018 {
23019 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
23020 srcmem = adjust_address (srcmem, mode, 0);
23021
23022 /* When unrolling for chips that reorder memory reads and writes,
23023 we can save registers by using single temporary.
23024 Also using 4 temporaries is overkill in 32bit mode. */
23025 if (!TARGET_64BIT && 0)
23026 {
23027 for (i = 0; i < unroll; i++)
23028 {
23029 if (i)
23030 {
23031 destmem =
23032 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23033 srcmem =
23034 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23035 }
23036 emit_move_insn (destmem, srcmem);
23037 }
23038 }
23039 else
23040 {
23041 rtx tmpreg[4];
23042 gcc_assert (unroll <= 4);
23043 for (i = 0; i < unroll; i++)
23044 {
23045 tmpreg[i] = gen_reg_rtx (mode);
23046 if (i)
23047 {
23048 srcmem =
23049 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23050 }
23051 emit_move_insn (tmpreg[i], srcmem);
23052 }
23053 for (i = 0; i < unroll; i++)
23054 {
23055 if (i)
23056 {
23057 destmem =
23058 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23059 }
23060 emit_move_insn (destmem, tmpreg[i]);
23061 }
23062 }
23063 }
23064 else
23065 for (i = 0; i < unroll; i++)
23066 {
23067 if (i)
23068 destmem =
23069 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23070 emit_move_insn (destmem, value);
23071 }
23072
23073 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
23074 true, OPTAB_LIB_WIDEN);
23075 if (tmp != iter)
23076 emit_move_insn (iter, tmp);
23077
23078 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
23079 true, top_label);
23080 if (expected_size != -1)
23081 {
23082 expected_size /= GET_MODE_SIZE (mode) * unroll;
23083 if (expected_size == 0)
23084 predict_jump (0);
23085 else if (expected_size > REG_BR_PROB_BASE)
23086 predict_jump (REG_BR_PROB_BASE - 1);
23087 else
23088 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
23089 }
23090 else
23091 predict_jump (REG_BR_PROB_BASE * 80 / 100);
23092 iter = ix86_zero_extend_to_Pmode (iter);
23093 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
23094 true, OPTAB_LIB_WIDEN);
23095 if (tmp != destptr)
23096 emit_move_insn (destptr, tmp);
23097 if (!issetmem)
23098 {
23099 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
23100 true, OPTAB_LIB_WIDEN);
23101 if (tmp != srcptr)
23102 emit_move_insn (srcptr, tmp);
23103 }
23104 emit_label (out_label);
23105 }
23106
23107 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
23108 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
23109 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
23110 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
23111 ORIG_VALUE is the original value passed to memset to fill the memory with.
23112 Other arguments have same meaning as for previous function. */
23113
23114 static void
23115 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
23116 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
23117 rtx count,
23118 enum machine_mode mode, bool issetmem)
23119 {
23120 rtx destexp;
23121 rtx srcexp;
23122 rtx countreg;
23123 HOST_WIDE_INT rounded_count;
23124
23125 /* If possible, it is shorter to use rep movs.
23126 TODO: Maybe it is better to move this logic to decide_alg. */
23127 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
23128 && (!issetmem || orig_value == const0_rtx))
23129 mode = SImode;
23130
23131 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
23132 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
23133
23134 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
23135 GET_MODE_SIZE (mode)));
23136 if (mode != QImode)
23137 {
23138 destexp = gen_rtx_ASHIFT (Pmode, countreg,
23139 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23140 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
23141 }
23142 else
23143 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
23144 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
23145 {
23146 rounded_count = (INTVAL (count)
23147 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23148 destmem = shallow_copy_rtx (destmem);
23149 set_mem_size (destmem, rounded_count);
23150 }
23151 else if (MEM_SIZE_KNOWN_P (destmem))
23152 clear_mem_size (destmem);
23153
23154 if (issetmem)
23155 {
23156 value = force_reg (mode, gen_lowpart (mode, value));
23157 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
23158 }
23159 else
23160 {
23161 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
23162 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
23163 if (mode != QImode)
23164 {
23165 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
23166 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23167 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23168 }
23169 else
23170 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23171 if (CONST_INT_P (count))
23172 {
23173 rounded_count = (INTVAL (count)
23174 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23175 srcmem = shallow_copy_rtx (srcmem);
23176 set_mem_size (srcmem, rounded_count);
23177 }
23178 else
23179 {
23180 if (MEM_SIZE_KNOWN_P (srcmem))
23181 clear_mem_size (srcmem);
23182 }
23183 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23184 destexp, srcexp));
23185 }
23186 }
23187
23188 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23189 DESTMEM.
23190 SRC is passed by pointer to be updated on return.
23191 Return value is updated DST. */
23192 static rtx
23193 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23194 HOST_WIDE_INT size_to_move)
23195 {
23196 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23197 enum insn_code code;
23198 enum machine_mode move_mode;
23199 int piece_size, i;
23200
23201 /* Find the widest mode in which we could perform moves.
23202 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23203 it until move of such size is supported. */
23204 piece_size = 1 << floor_log2 (size_to_move);
23205 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23206 code = optab_handler (mov_optab, move_mode);
23207 while (code == CODE_FOR_nothing && piece_size > 1)
23208 {
23209 piece_size >>= 1;
23210 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23211 code = optab_handler (mov_optab, move_mode);
23212 }
23213
23214 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23215 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23216 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23217 {
23218 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23219 move_mode = mode_for_vector (word_mode, nunits);
23220 code = optab_handler (mov_optab, move_mode);
23221 if (code == CODE_FOR_nothing)
23222 {
23223 move_mode = word_mode;
23224 piece_size = GET_MODE_SIZE (move_mode);
23225 code = optab_handler (mov_optab, move_mode);
23226 }
23227 }
23228 gcc_assert (code != CODE_FOR_nothing);
23229
23230 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23231 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23232
23233 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23234 gcc_assert (size_to_move % piece_size == 0);
23235 adjust = GEN_INT (piece_size);
23236 for (i = 0; i < size_to_move; i += piece_size)
23237 {
23238 /* We move from memory to memory, so we'll need to do it via
23239 a temporary register. */
23240 tempreg = gen_reg_rtx (move_mode);
23241 emit_insn (GEN_FCN (code) (tempreg, src));
23242 emit_insn (GEN_FCN (code) (dst, tempreg));
23243
23244 emit_move_insn (destptr,
23245 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23246 emit_move_insn (srcptr,
23247 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23248
23249 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23250 piece_size);
23251 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23252 piece_size);
23253 }
23254
23255 /* Update DST and SRC rtx. */
23256 *srcmem = src;
23257 return dst;
23258 }
23259
23260 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23261 static void
23262 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23263 rtx destptr, rtx srcptr, rtx count, int max_size)
23264 {
23265 rtx src, dest;
23266 if (CONST_INT_P (count))
23267 {
23268 HOST_WIDE_INT countval = INTVAL (count);
23269 HOST_WIDE_INT epilogue_size = countval % max_size;
23270 int i;
23271
23272 /* For now MAX_SIZE should be a power of 2. This assert could be
23273 relaxed, but it'll require a bit more complicated epilogue
23274 expanding. */
23275 gcc_assert ((max_size & (max_size - 1)) == 0);
23276 for (i = max_size; i >= 1; i >>= 1)
23277 {
23278 if (epilogue_size & i)
23279 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23280 }
23281 return;
23282 }
23283 if (max_size > 8)
23284 {
23285 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23286 count, 1, OPTAB_DIRECT);
23287 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23288 count, QImode, 1, 4, false);
23289 return;
23290 }
23291
23292 /* When there are stringops, we can cheaply increase dest and src pointers.
23293 Otherwise we save code size by maintaining offset (zero is readily
23294 available from preceding rep operation) and using x86 addressing modes.
23295 */
23296 if (TARGET_SINGLE_STRINGOP)
23297 {
23298 if (max_size > 4)
23299 {
23300 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23301 src = change_address (srcmem, SImode, srcptr);
23302 dest = change_address (destmem, SImode, destptr);
23303 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23304 emit_label (label);
23305 LABEL_NUSES (label) = 1;
23306 }
23307 if (max_size > 2)
23308 {
23309 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23310 src = change_address (srcmem, HImode, srcptr);
23311 dest = change_address (destmem, HImode, destptr);
23312 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23313 emit_label (label);
23314 LABEL_NUSES (label) = 1;
23315 }
23316 if (max_size > 1)
23317 {
23318 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23319 src = change_address (srcmem, QImode, srcptr);
23320 dest = change_address (destmem, QImode, destptr);
23321 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23322 emit_label (label);
23323 LABEL_NUSES (label) = 1;
23324 }
23325 }
23326 else
23327 {
23328 rtx offset = force_reg (Pmode, const0_rtx);
23329 rtx tmp;
23330
23331 if (max_size > 4)
23332 {
23333 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23334 src = change_address (srcmem, SImode, srcptr);
23335 dest = change_address (destmem, SImode, destptr);
23336 emit_move_insn (dest, src);
23337 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23338 true, OPTAB_LIB_WIDEN);
23339 if (tmp != offset)
23340 emit_move_insn (offset, tmp);
23341 emit_label (label);
23342 LABEL_NUSES (label) = 1;
23343 }
23344 if (max_size > 2)
23345 {
23346 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23347 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23348 src = change_address (srcmem, HImode, tmp);
23349 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23350 dest = change_address (destmem, HImode, tmp);
23351 emit_move_insn (dest, src);
23352 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23353 true, OPTAB_LIB_WIDEN);
23354 if (tmp != offset)
23355 emit_move_insn (offset, tmp);
23356 emit_label (label);
23357 LABEL_NUSES (label) = 1;
23358 }
23359 if (max_size > 1)
23360 {
23361 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23362 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23363 src = change_address (srcmem, QImode, tmp);
23364 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23365 dest = change_address (destmem, QImode, tmp);
23366 emit_move_insn (dest, src);
23367 emit_label (label);
23368 LABEL_NUSES (label) = 1;
23369 }
23370 }
23371 }
23372
23373 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23374 with value PROMOTED_VAL.
23375 SRC is passed by pointer to be updated on return.
23376 Return value is updated DST. */
23377 static rtx
23378 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23379 HOST_WIDE_INT size_to_move)
23380 {
23381 rtx dst = destmem, adjust;
23382 enum insn_code code;
23383 enum machine_mode move_mode;
23384 int piece_size, i;
23385
23386 /* Find the widest mode in which we could perform moves.
23387 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23388 it until move of such size is supported. */
23389 move_mode = GET_MODE (promoted_val);
23390 if (move_mode == VOIDmode)
23391 move_mode = QImode;
23392 if (size_to_move < GET_MODE_SIZE (move_mode))
23393 {
23394 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23395 promoted_val = gen_lowpart (move_mode, promoted_val);
23396 }
23397 piece_size = GET_MODE_SIZE (move_mode);
23398 code = optab_handler (mov_optab, move_mode);
23399 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23400
23401 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23402
23403 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23404 gcc_assert (size_to_move % piece_size == 0);
23405 adjust = GEN_INT (piece_size);
23406 for (i = 0; i < size_to_move; i += piece_size)
23407 {
23408 if (piece_size <= GET_MODE_SIZE (word_mode))
23409 {
23410 emit_insn (gen_strset (destptr, dst, promoted_val));
23411 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23412 piece_size);
23413 continue;
23414 }
23415
23416 emit_insn (GEN_FCN (code) (dst, promoted_val));
23417
23418 emit_move_insn (destptr,
23419 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23420
23421 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23422 piece_size);
23423 }
23424
23425 /* Update DST rtx. */
23426 return dst;
23427 }
23428 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23429 static void
23430 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23431 rtx count, int max_size)
23432 {
23433 count =
23434 expand_simple_binop (counter_mode (count), AND, count,
23435 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23436 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23437 gen_lowpart (QImode, value), count, QImode,
23438 1, max_size / 2, true);
23439 }
23440
23441 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23442 static void
23443 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23444 rtx count, int max_size)
23445 {
23446 rtx dest;
23447
23448 if (CONST_INT_P (count))
23449 {
23450 HOST_WIDE_INT countval = INTVAL (count);
23451 HOST_WIDE_INT epilogue_size = countval % max_size;
23452 int i;
23453
23454 /* For now MAX_SIZE should be a power of 2. This assert could be
23455 relaxed, but it'll require a bit more complicated epilogue
23456 expanding. */
23457 gcc_assert ((max_size & (max_size - 1)) == 0);
23458 for (i = max_size; i >= 1; i >>= 1)
23459 {
23460 if (epilogue_size & i)
23461 {
23462 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23463 destmem = emit_memset (destmem, destptr, vec_value, i);
23464 else
23465 destmem = emit_memset (destmem, destptr, value, i);
23466 }
23467 }
23468 return;
23469 }
23470 if (max_size > 32)
23471 {
23472 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23473 return;
23474 }
23475 if (max_size > 16)
23476 {
23477 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
23478 if (TARGET_64BIT)
23479 {
23480 dest = change_address (destmem, DImode, destptr);
23481 emit_insn (gen_strset (destptr, dest, value));
23482 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23483 emit_insn (gen_strset (destptr, dest, value));
23484 }
23485 else
23486 {
23487 dest = change_address (destmem, SImode, destptr);
23488 emit_insn (gen_strset (destptr, dest, value));
23489 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23490 emit_insn (gen_strset (destptr, dest, value));
23491 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23492 emit_insn (gen_strset (destptr, dest, value));
23493 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
23494 emit_insn (gen_strset (destptr, dest, value));
23495 }
23496 emit_label (label);
23497 LABEL_NUSES (label) = 1;
23498 }
23499 if (max_size > 8)
23500 {
23501 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
23502 if (TARGET_64BIT)
23503 {
23504 dest = change_address (destmem, DImode, destptr);
23505 emit_insn (gen_strset (destptr, dest, value));
23506 }
23507 else
23508 {
23509 dest = change_address (destmem, SImode, destptr);
23510 emit_insn (gen_strset (destptr, dest, value));
23511 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23512 emit_insn (gen_strset (destptr, dest, value));
23513 }
23514 emit_label (label);
23515 LABEL_NUSES (label) = 1;
23516 }
23517 if (max_size > 4)
23518 {
23519 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23520 dest = change_address (destmem, SImode, destptr);
23521 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
23522 emit_label (label);
23523 LABEL_NUSES (label) = 1;
23524 }
23525 if (max_size > 2)
23526 {
23527 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23528 dest = change_address (destmem, HImode, destptr);
23529 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
23530 emit_label (label);
23531 LABEL_NUSES (label) = 1;
23532 }
23533 if (max_size > 1)
23534 {
23535 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23536 dest = change_address (destmem, QImode, destptr);
23537 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
23538 emit_label (label);
23539 LABEL_NUSES (label) = 1;
23540 }
23541 }
23542
23543 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
23544 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
23545 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
23546 ignored.
23547 Return value is updated DESTMEM. */
23548 static rtx
23549 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
23550 rtx destptr, rtx srcptr, rtx value,
23551 rtx vec_value, rtx count, int align,
23552 int desired_alignment, bool issetmem)
23553 {
23554 int i;
23555 for (i = 1; i < desired_alignment; i <<= 1)
23556 {
23557 if (align <= i)
23558 {
23559 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
23560 if (issetmem)
23561 {
23562 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23563 destmem = emit_memset (destmem, destptr, vec_value, i);
23564 else
23565 destmem = emit_memset (destmem, destptr, value, i);
23566 }
23567 else
23568 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23569 ix86_adjust_counter (count, i);
23570 emit_label (label);
23571 LABEL_NUSES (label) = 1;
23572 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
23573 }
23574 }
23575 return destmem;
23576 }
23577
23578 /* Test if COUNT&SIZE is nonzero and if so, expand movme
23579 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
23580 and jump to DONE_LABEL. */
23581 static void
23582 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
23583 rtx destptr, rtx srcptr,
23584 rtx value, rtx vec_value,
23585 rtx count, int size,
23586 rtx done_label, bool issetmem)
23587 {
23588 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
23589 enum machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
23590 rtx modesize;
23591 int n;
23592
23593 /* If we do not have vector value to copy, we must reduce size. */
23594 if (issetmem)
23595 {
23596 if (!vec_value)
23597 {
23598 if (GET_MODE (value) == VOIDmode && size > 8)
23599 mode = Pmode;
23600 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
23601 mode = GET_MODE (value);
23602 }
23603 else
23604 mode = GET_MODE (vec_value), value = vec_value;
23605 }
23606 else
23607 {
23608 /* Choose appropriate vector mode. */
23609 if (size >= 32)
23610 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
23611 else if (size >= 16)
23612 mode = TARGET_SSE ? V16QImode : DImode;
23613 srcmem = change_address (srcmem, mode, srcptr);
23614 }
23615 destmem = change_address (destmem, mode, destptr);
23616 modesize = GEN_INT (GET_MODE_SIZE (mode));
23617 gcc_assert (GET_MODE_SIZE (mode) <= size);
23618 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23619 {
23620 if (issetmem)
23621 emit_move_insn (destmem, gen_lowpart (mode, value));
23622 else
23623 {
23624 emit_move_insn (destmem, srcmem);
23625 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23626 }
23627 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23628 }
23629
23630 destmem = offset_address (destmem, count, 1);
23631 destmem = offset_address (destmem, GEN_INT (-2 * size),
23632 GET_MODE_SIZE (mode));
23633 if (!issetmem)
23634 {
23635 srcmem = offset_address (srcmem, count, 1);
23636 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
23637 GET_MODE_SIZE (mode));
23638 }
23639 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23640 {
23641 if (issetmem)
23642 emit_move_insn (destmem, gen_lowpart (mode, value));
23643 else
23644 {
23645 emit_move_insn (destmem, srcmem);
23646 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23647 }
23648 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23649 }
23650 emit_jump_insn (gen_jump (done_label));
23651 emit_barrier ();
23652
23653 emit_label (label);
23654 LABEL_NUSES (label) = 1;
23655 }
23656
23657 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
23658 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
23659 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
23660 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
23661 DONE_LABEL is a label after the whole copying sequence. The label is created
23662 on demand if *DONE_LABEL is NULL.
23663 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
23664 bounds after the initial copies.
23665
23666 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
23667 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
23668 we will dispatch to a library call for large blocks.
23669
23670 In pseudocode we do:
23671
23672 if (COUNT < SIZE)
23673 {
23674 Assume that SIZE is 4. Bigger sizes are handled analogously
23675 if (COUNT & 4)
23676 {
23677 copy 4 bytes from SRCPTR to DESTPTR
23678 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
23679 goto done_label
23680 }
23681 if (!COUNT)
23682 goto done_label;
23683 copy 1 byte from SRCPTR to DESTPTR
23684 if (COUNT & 2)
23685 {
23686 copy 2 bytes from SRCPTR to DESTPTR
23687 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
23688 }
23689 }
23690 else
23691 {
23692 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
23693 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
23694
23695 OLD_DESPTR = DESTPTR;
23696 Align DESTPTR up to DESIRED_ALIGN
23697 SRCPTR += DESTPTR - OLD_DESTPTR
23698 COUNT -= DEST_PTR - OLD_DESTPTR
23699 if (DYNAMIC_CHECK)
23700 Round COUNT down to multiple of SIZE
23701 << optional caller supplied zero size guard is here >>
23702 << optional caller suppplied dynamic check is here >>
23703 << caller supplied main copy loop is here >>
23704 }
23705 done_label:
23706 */
23707 static void
23708 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
23709 rtx *destptr, rtx *srcptr,
23710 enum machine_mode mode,
23711 rtx value, rtx vec_value,
23712 rtx *count,
23713 rtx_code_label **done_label,
23714 int size,
23715 int desired_align,
23716 int align,
23717 unsigned HOST_WIDE_INT *min_size,
23718 bool dynamic_check,
23719 bool issetmem)
23720 {
23721 rtx_code_label *loop_label = NULL, *label;
23722 int n;
23723 rtx modesize;
23724 int prolog_size = 0;
23725 rtx mode_value;
23726
23727 /* Chose proper value to copy. */
23728 if (issetmem && VECTOR_MODE_P (mode))
23729 mode_value = vec_value;
23730 else
23731 mode_value = value;
23732 gcc_assert (GET_MODE_SIZE (mode) <= size);
23733
23734 /* See if block is big or small, handle small blocks. */
23735 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
23736 {
23737 int size2 = size;
23738 loop_label = gen_label_rtx ();
23739
23740 if (!*done_label)
23741 *done_label = gen_label_rtx ();
23742
23743 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
23744 1, loop_label);
23745 size2 >>= 1;
23746
23747 /* Handle sizes > 3. */
23748 for (;size2 > 2; size2 >>= 1)
23749 expand_small_movmem_or_setmem (destmem, srcmem,
23750 *destptr, *srcptr,
23751 value, vec_value,
23752 *count,
23753 size2, *done_label, issetmem);
23754 /* Nothing to copy? Jump to DONE_LABEL if so */
23755 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
23756 1, *done_label);
23757
23758 /* Do a byte copy. */
23759 destmem = change_address (destmem, QImode, *destptr);
23760 if (issetmem)
23761 emit_move_insn (destmem, gen_lowpart (QImode, value));
23762 else
23763 {
23764 srcmem = change_address (srcmem, QImode, *srcptr);
23765 emit_move_insn (destmem, srcmem);
23766 }
23767
23768 /* Handle sizes 2 and 3. */
23769 label = ix86_expand_aligntest (*count, 2, false);
23770 destmem = change_address (destmem, HImode, *destptr);
23771 destmem = offset_address (destmem, *count, 1);
23772 destmem = offset_address (destmem, GEN_INT (-2), 2);
23773 if (issetmem)
23774 emit_move_insn (destmem, gen_lowpart (HImode, value));
23775 else
23776 {
23777 srcmem = change_address (srcmem, HImode, *srcptr);
23778 srcmem = offset_address (srcmem, *count, 1);
23779 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
23780 emit_move_insn (destmem, srcmem);
23781 }
23782
23783 emit_label (label);
23784 LABEL_NUSES (label) = 1;
23785 emit_jump_insn (gen_jump (*done_label));
23786 emit_barrier ();
23787 }
23788 else
23789 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
23790 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
23791
23792 /* Start memcpy for COUNT >= SIZE. */
23793 if (loop_label)
23794 {
23795 emit_label (loop_label);
23796 LABEL_NUSES (loop_label) = 1;
23797 }
23798
23799 /* Copy first desired_align bytes. */
23800 if (!issetmem)
23801 srcmem = change_address (srcmem, mode, *srcptr);
23802 destmem = change_address (destmem, mode, *destptr);
23803 modesize = GEN_INT (GET_MODE_SIZE (mode));
23804 for (n = 0; prolog_size < desired_align - align; n++)
23805 {
23806 if (issetmem)
23807 emit_move_insn (destmem, mode_value);
23808 else
23809 {
23810 emit_move_insn (destmem, srcmem);
23811 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23812 }
23813 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23814 prolog_size += GET_MODE_SIZE (mode);
23815 }
23816
23817
23818 /* Copy last SIZE bytes. */
23819 destmem = offset_address (destmem, *count, 1);
23820 destmem = offset_address (destmem,
23821 GEN_INT (-size - prolog_size),
23822 1);
23823 if (issetmem)
23824 emit_move_insn (destmem, mode_value);
23825 else
23826 {
23827 srcmem = offset_address (srcmem, *count, 1);
23828 srcmem = offset_address (srcmem,
23829 GEN_INT (-size - prolog_size),
23830 1);
23831 emit_move_insn (destmem, srcmem);
23832 }
23833 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
23834 {
23835 destmem = offset_address (destmem, modesize, 1);
23836 if (issetmem)
23837 emit_move_insn (destmem, mode_value);
23838 else
23839 {
23840 srcmem = offset_address (srcmem, modesize, 1);
23841 emit_move_insn (destmem, srcmem);
23842 }
23843 }
23844
23845 /* Align destination. */
23846 if (desired_align > 1 && desired_align > align)
23847 {
23848 rtx saveddest = *destptr;
23849
23850 gcc_assert (desired_align <= size);
23851 /* Align destptr up, place it to new register. */
23852 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
23853 GEN_INT (prolog_size),
23854 NULL_RTX, 1, OPTAB_DIRECT);
23855 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
23856 GEN_INT (-desired_align),
23857 *destptr, 1, OPTAB_DIRECT);
23858 /* See how many bytes we skipped. */
23859 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
23860 *destptr,
23861 saveddest, 1, OPTAB_DIRECT);
23862 /* Adjust srcptr and count. */
23863 if (!issetmem)
23864 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, saveddest,
23865 *srcptr, 1, OPTAB_DIRECT);
23866 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23867 saveddest, *count, 1, OPTAB_DIRECT);
23868 /* We copied at most size + prolog_size. */
23869 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
23870 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
23871 else
23872 *min_size = 0;
23873
23874 /* Our loops always round down the bock size, but for dispatch to library
23875 we need precise value. */
23876 if (dynamic_check)
23877 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
23878 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
23879 }
23880 else
23881 {
23882 gcc_assert (prolog_size == 0);
23883 /* Decrease count, so we won't end up copying last word twice. */
23884 if (!CONST_INT_P (*count))
23885 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23886 constm1_rtx, *count, 1, OPTAB_DIRECT);
23887 else
23888 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
23889 if (*min_size)
23890 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
23891 }
23892 }
23893
23894
23895 /* This function is like the previous one, except here we know how many bytes
23896 need to be copied. That allows us to update alignment not only of DST, which
23897 is returned, but also of SRC, which is passed as a pointer for that
23898 reason. */
23899 static rtx
23900 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
23901 rtx srcreg, rtx value, rtx vec_value,
23902 int desired_align, int align_bytes,
23903 bool issetmem)
23904 {
23905 rtx src = NULL;
23906 rtx orig_dst = dst;
23907 rtx orig_src = NULL;
23908 int piece_size = 1;
23909 int copied_bytes = 0;
23910
23911 if (!issetmem)
23912 {
23913 gcc_assert (srcp != NULL);
23914 src = *srcp;
23915 orig_src = src;
23916 }
23917
23918 for (piece_size = 1;
23919 piece_size <= desired_align && copied_bytes < align_bytes;
23920 piece_size <<= 1)
23921 {
23922 if (align_bytes & piece_size)
23923 {
23924 if (issetmem)
23925 {
23926 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
23927 dst = emit_memset (dst, destreg, vec_value, piece_size);
23928 else
23929 dst = emit_memset (dst, destreg, value, piece_size);
23930 }
23931 else
23932 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
23933 copied_bytes += piece_size;
23934 }
23935 }
23936 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
23937 set_mem_align (dst, desired_align * BITS_PER_UNIT);
23938 if (MEM_SIZE_KNOWN_P (orig_dst))
23939 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
23940
23941 if (!issetmem)
23942 {
23943 int src_align_bytes = get_mem_align_offset (src, desired_align
23944 * BITS_PER_UNIT);
23945 if (src_align_bytes >= 0)
23946 src_align_bytes = desired_align - src_align_bytes;
23947 if (src_align_bytes >= 0)
23948 {
23949 unsigned int src_align;
23950 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
23951 {
23952 if ((src_align_bytes & (src_align - 1))
23953 == (align_bytes & (src_align - 1)))
23954 break;
23955 }
23956 if (src_align > (unsigned int) desired_align)
23957 src_align = desired_align;
23958 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
23959 set_mem_align (src, src_align * BITS_PER_UNIT);
23960 }
23961 if (MEM_SIZE_KNOWN_P (orig_src))
23962 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
23963 *srcp = src;
23964 }
23965
23966 return dst;
23967 }
23968
23969 /* Return true if ALG can be used in current context.
23970 Assume we expand memset if MEMSET is true. */
23971 static bool
23972 alg_usable_p (enum stringop_alg alg, bool memset)
23973 {
23974 if (alg == no_stringop)
23975 return false;
23976 if (alg == vector_loop)
23977 return TARGET_SSE || TARGET_AVX;
23978 /* Algorithms using the rep prefix want at least edi and ecx;
23979 additionally, memset wants eax and memcpy wants esi. Don't
23980 consider such algorithms if the user has appropriated those
23981 registers for their own purposes. */
23982 if (alg == rep_prefix_1_byte
23983 || alg == rep_prefix_4_byte
23984 || alg == rep_prefix_8_byte)
23985 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
23986 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
23987 return true;
23988 }
23989
23990 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
23991 static enum stringop_alg
23992 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
23993 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
23994 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
23995 {
23996 const struct stringop_algs * algs;
23997 bool optimize_for_speed;
23998 int max = 0;
23999 const struct processor_costs *cost;
24000 int i;
24001 bool any_alg_usable_p = false;
24002
24003 *noalign = false;
24004 *dynamic_check = -1;
24005
24006 /* Even if the string operation call is cold, we still might spend a lot
24007 of time processing large blocks. */
24008 if (optimize_function_for_size_p (cfun)
24009 || (optimize_insn_for_size_p ()
24010 && (max_size < 256
24011 || (expected_size != -1 && expected_size < 256))))
24012 optimize_for_speed = false;
24013 else
24014 optimize_for_speed = true;
24015
24016 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
24017 if (memset)
24018 algs = &cost->memset[TARGET_64BIT != 0];
24019 else
24020 algs = &cost->memcpy[TARGET_64BIT != 0];
24021
24022 /* See maximal size for user defined algorithm. */
24023 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24024 {
24025 enum stringop_alg candidate = algs->size[i].alg;
24026 bool usable = alg_usable_p (candidate, memset);
24027 any_alg_usable_p |= usable;
24028
24029 if (candidate != libcall && candidate && usable)
24030 max = algs->size[i].max;
24031 }
24032
24033 /* If expected size is not known but max size is small enough
24034 so inline version is a win, set expected size into
24035 the range. */
24036 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
24037 && expected_size == -1)
24038 expected_size = min_size / 2 + max_size / 2;
24039
24040 /* If user specified the algorithm, honnor it if possible. */
24041 if (ix86_stringop_alg != no_stringop
24042 && alg_usable_p (ix86_stringop_alg, memset))
24043 return ix86_stringop_alg;
24044 /* rep; movq or rep; movl is the smallest variant. */
24045 else if (!optimize_for_speed)
24046 {
24047 *noalign = true;
24048 if (!count || (count & 3) || (memset && !zero_memset))
24049 return alg_usable_p (rep_prefix_1_byte, memset)
24050 ? rep_prefix_1_byte : loop_1_byte;
24051 else
24052 return alg_usable_p (rep_prefix_4_byte, memset)
24053 ? rep_prefix_4_byte : loop;
24054 }
24055 /* Very tiny blocks are best handled via the loop, REP is expensive to
24056 setup. */
24057 else if (expected_size != -1 && expected_size < 4)
24058 return loop_1_byte;
24059 else if (expected_size != -1)
24060 {
24061 enum stringop_alg alg = libcall;
24062 bool alg_noalign = false;
24063 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24064 {
24065 /* We get here if the algorithms that were not libcall-based
24066 were rep-prefix based and we are unable to use rep prefixes
24067 based on global register usage. Break out of the loop and
24068 use the heuristic below. */
24069 if (algs->size[i].max == 0)
24070 break;
24071 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
24072 {
24073 enum stringop_alg candidate = algs->size[i].alg;
24074
24075 if (candidate != libcall && alg_usable_p (candidate, memset))
24076 {
24077 alg = candidate;
24078 alg_noalign = algs->size[i].noalign;
24079 }
24080 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
24081 last non-libcall inline algorithm. */
24082 if (TARGET_INLINE_ALL_STRINGOPS)
24083 {
24084 /* When the current size is best to be copied by a libcall,
24085 but we are still forced to inline, run the heuristic below
24086 that will pick code for medium sized blocks. */
24087 if (alg != libcall)
24088 {
24089 *noalign = alg_noalign;
24090 return alg;
24091 }
24092 break;
24093 }
24094 else if (alg_usable_p (candidate, memset))
24095 {
24096 *noalign = algs->size[i].noalign;
24097 return candidate;
24098 }
24099 }
24100 }
24101 }
24102 /* When asked to inline the call anyway, try to pick meaningful choice.
24103 We look for maximal size of block that is faster to copy by hand and
24104 take blocks of at most of that size guessing that average size will
24105 be roughly half of the block.
24106
24107 If this turns out to be bad, we might simply specify the preferred
24108 choice in ix86_costs. */
24109 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24110 && (algs->unknown_size == libcall
24111 || !alg_usable_p (algs->unknown_size, memset)))
24112 {
24113 enum stringop_alg alg;
24114
24115 /* If there aren't any usable algorithms, then recursing on
24116 smaller sizes isn't going to find anything. Just return the
24117 simple byte-at-a-time copy loop. */
24118 if (!any_alg_usable_p)
24119 {
24120 /* Pick something reasonable. */
24121 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24122 *dynamic_check = 128;
24123 return loop_1_byte;
24124 }
24125 if (max <= 0)
24126 max = 4096;
24127 alg = decide_alg (count, max / 2, min_size, max_size, memset,
24128 zero_memset, dynamic_check, noalign);
24129 gcc_assert (*dynamic_check == -1);
24130 gcc_assert (alg != libcall);
24131 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24132 *dynamic_check = max;
24133 return alg;
24134 }
24135 return (alg_usable_p (algs->unknown_size, memset)
24136 ? algs->unknown_size : libcall);
24137 }
24138
24139 /* Decide on alignment. We know that the operand is already aligned to ALIGN
24140 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
24141 static int
24142 decide_alignment (int align,
24143 enum stringop_alg alg,
24144 int expected_size,
24145 enum machine_mode move_mode)
24146 {
24147 int desired_align = 0;
24148
24149 gcc_assert (alg != no_stringop);
24150
24151 if (alg == libcall)
24152 return 0;
24153 if (move_mode == VOIDmode)
24154 return 0;
24155
24156 desired_align = GET_MODE_SIZE (move_mode);
24157 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
24158 copying whole cacheline at once. */
24159 if (TARGET_PENTIUMPRO
24160 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
24161 desired_align = 8;
24162
24163 if (optimize_size)
24164 desired_align = 1;
24165 if (desired_align < align)
24166 desired_align = align;
24167 if (expected_size != -1 && expected_size < 4)
24168 desired_align = align;
24169
24170 return desired_align;
24171 }
24172
24173
24174 /* Helper function for memcpy. For QImode value 0xXY produce
24175 0xXYXYXYXY of wide specified by MODE. This is essentially
24176 a * 0x10101010, but we can do slightly better than
24177 synth_mult by unwinding the sequence by hand on CPUs with
24178 slow multiply. */
24179 static rtx
24180 promote_duplicated_reg (enum machine_mode mode, rtx val)
24181 {
24182 enum machine_mode valmode = GET_MODE (val);
24183 rtx tmp;
24184 int nops = mode == DImode ? 3 : 2;
24185
24186 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24187 if (val == const0_rtx)
24188 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24189 if (CONST_INT_P (val))
24190 {
24191 HOST_WIDE_INT v = INTVAL (val) & 255;
24192
24193 v |= v << 8;
24194 v |= v << 16;
24195 if (mode == DImode)
24196 v |= (v << 16) << 16;
24197 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24198 }
24199
24200 if (valmode == VOIDmode)
24201 valmode = QImode;
24202 if (valmode != QImode)
24203 val = gen_lowpart (QImode, val);
24204 if (mode == QImode)
24205 return val;
24206 if (!TARGET_PARTIAL_REG_STALL)
24207 nops--;
24208 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24209 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24210 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24211 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24212 {
24213 rtx reg = convert_modes (mode, QImode, val, true);
24214 tmp = promote_duplicated_reg (mode, const1_rtx);
24215 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24216 OPTAB_DIRECT);
24217 }
24218 else
24219 {
24220 rtx reg = convert_modes (mode, QImode, val, true);
24221
24222 if (!TARGET_PARTIAL_REG_STALL)
24223 if (mode == SImode)
24224 emit_insn (gen_movsi_insv_1 (reg, reg));
24225 else
24226 emit_insn (gen_movdi_insv_1 (reg, reg));
24227 else
24228 {
24229 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24230 NULL, 1, OPTAB_DIRECT);
24231 reg =
24232 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24233 }
24234 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24235 NULL, 1, OPTAB_DIRECT);
24236 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24237 if (mode == SImode)
24238 return reg;
24239 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24240 NULL, 1, OPTAB_DIRECT);
24241 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24242 return reg;
24243 }
24244 }
24245
24246 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24247 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24248 alignment from ALIGN to DESIRED_ALIGN. */
24249 static rtx
24250 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24251 int align)
24252 {
24253 rtx promoted_val;
24254
24255 if (TARGET_64BIT
24256 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24257 promoted_val = promote_duplicated_reg (DImode, val);
24258 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24259 promoted_val = promote_duplicated_reg (SImode, val);
24260 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24261 promoted_val = promote_duplicated_reg (HImode, val);
24262 else
24263 promoted_val = val;
24264
24265 return promoted_val;
24266 }
24267
24268 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24269 operations when profitable. The code depends upon architecture, block size
24270 and alignment, but always has one of the following overall structures:
24271
24272 Aligned move sequence:
24273
24274 1) Prologue guard: Conditional that jumps up to epilogues for small
24275 blocks that can be handled by epilogue alone. This is faster
24276 but also needed for correctness, since prologue assume the block
24277 is larger than the desired alignment.
24278
24279 Optional dynamic check for size and libcall for large
24280 blocks is emitted here too, with -minline-stringops-dynamically.
24281
24282 2) Prologue: copy first few bytes in order to get destination
24283 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24284 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24285 copied. We emit either a jump tree on power of two sized
24286 blocks, or a byte loop.
24287
24288 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24289 with specified algorithm.
24290
24291 4) Epilogue: code copying tail of the block that is too small to be
24292 handled by main body (or up to size guarded by prologue guard).
24293
24294 Misaligned move sequence
24295
24296 1) missaligned move prologue/epilogue containing:
24297 a) Prologue handling small memory blocks and jumping to done_label
24298 (skipped if blocks are known to be large enough)
24299 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24300 needed by single possibly misaligned move
24301 (skipped if alignment is not needed)
24302 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24303
24304 2) Zero size guard dispatching to done_label, if needed
24305
24306 3) dispatch to library call, if needed,
24307
24308 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24309 with specified algorithm. */
24310 bool
24311 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24312 rtx align_exp, rtx expected_align_exp,
24313 rtx expected_size_exp, rtx min_size_exp,
24314 rtx max_size_exp, rtx probable_max_size_exp,
24315 bool issetmem)
24316 {
24317 rtx destreg;
24318 rtx srcreg = NULL;
24319 rtx_code_label *label = NULL;
24320 rtx tmp;
24321 rtx_code_label *jump_around_label = NULL;
24322 HOST_WIDE_INT align = 1;
24323 unsigned HOST_WIDE_INT count = 0;
24324 HOST_WIDE_INT expected_size = -1;
24325 int size_needed = 0, epilogue_size_needed;
24326 int desired_align = 0, align_bytes = 0;
24327 enum stringop_alg alg;
24328 rtx promoted_val = NULL;
24329 rtx vec_promoted_val = NULL;
24330 bool force_loopy_epilogue = false;
24331 int dynamic_check;
24332 bool need_zero_guard = false;
24333 bool noalign;
24334 enum machine_mode move_mode = VOIDmode;
24335 int unroll_factor = 1;
24336 /* TODO: Once value ranges are available, fill in proper data. */
24337 unsigned HOST_WIDE_INT min_size = 0;
24338 unsigned HOST_WIDE_INT max_size = -1;
24339 unsigned HOST_WIDE_INT probable_max_size = -1;
24340 bool misaligned_prologue_used = false;
24341
24342 if (CONST_INT_P (align_exp))
24343 align = INTVAL (align_exp);
24344 /* i386 can do misaligned access on reasonably increased cost. */
24345 if (CONST_INT_P (expected_align_exp)
24346 && INTVAL (expected_align_exp) > align)
24347 align = INTVAL (expected_align_exp);
24348 /* ALIGN is the minimum of destination and source alignment, but we care here
24349 just about destination alignment. */
24350 else if (!issetmem
24351 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24352 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24353
24354 if (CONST_INT_P (count_exp))
24355 {
24356 min_size = max_size = probable_max_size = count = expected_size
24357 = INTVAL (count_exp);
24358 /* When COUNT is 0, there is nothing to do. */
24359 if (!count)
24360 return true;
24361 }
24362 else
24363 {
24364 if (min_size_exp)
24365 min_size = INTVAL (min_size_exp);
24366 if (max_size_exp)
24367 max_size = INTVAL (max_size_exp);
24368 if (probable_max_size_exp)
24369 probable_max_size = INTVAL (probable_max_size_exp);
24370 if (CONST_INT_P (expected_size_exp))
24371 expected_size = INTVAL (expected_size_exp);
24372 }
24373
24374 /* Make sure we don't need to care about overflow later on. */
24375 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
24376 return false;
24377
24378 /* Step 0: Decide on preferred algorithm, desired alignment and
24379 size of chunks to be copied by main loop. */
24380 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24381 issetmem,
24382 issetmem && val_exp == const0_rtx,
24383 &dynamic_check, &noalign);
24384 if (alg == libcall)
24385 return false;
24386 gcc_assert (alg != no_stringop);
24387
24388 /* For now vector-version of memset is generated only for memory zeroing, as
24389 creating of promoted vector value is very cheap in this case. */
24390 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24391 alg = unrolled_loop;
24392
24393 if (!count)
24394 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24395 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24396 if (!issetmem)
24397 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24398
24399 unroll_factor = 1;
24400 move_mode = word_mode;
24401 switch (alg)
24402 {
24403 case libcall:
24404 case no_stringop:
24405 case last_alg:
24406 gcc_unreachable ();
24407 case loop_1_byte:
24408 need_zero_guard = true;
24409 move_mode = QImode;
24410 break;
24411 case loop:
24412 need_zero_guard = true;
24413 break;
24414 case unrolled_loop:
24415 need_zero_guard = true;
24416 unroll_factor = (TARGET_64BIT ? 4 : 2);
24417 break;
24418 case vector_loop:
24419 need_zero_guard = true;
24420 unroll_factor = 4;
24421 /* Find the widest supported mode. */
24422 move_mode = word_mode;
24423 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24424 != CODE_FOR_nothing)
24425 move_mode = GET_MODE_WIDER_MODE (move_mode);
24426
24427 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24428 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24429 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24430 {
24431 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24432 move_mode = mode_for_vector (word_mode, nunits);
24433 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24434 move_mode = word_mode;
24435 }
24436 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24437 break;
24438 case rep_prefix_8_byte:
24439 move_mode = DImode;
24440 break;
24441 case rep_prefix_4_byte:
24442 move_mode = SImode;
24443 break;
24444 case rep_prefix_1_byte:
24445 move_mode = QImode;
24446 break;
24447 }
24448 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24449 epilogue_size_needed = size_needed;
24450
24451 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24452 if (!TARGET_ALIGN_STRINGOPS || noalign)
24453 align = desired_align;
24454
24455 /* Step 1: Prologue guard. */
24456
24457 /* Alignment code needs count to be in register. */
24458 if (CONST_INT_P (count_exp) && desired_align > align)
24459 {
24460 if (INTVAL (count_exp) > desired_align
24461 && INTVAL (count_exp) > size_needed)
24462 {
24463 align_bytes
24464 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24465 if (align_bytes <= 0)
24466 align_bytes = 0;
24467 else
24468 align_bytes = desired_align - align_bytes;
24469 }
24470 if (align_bytes == 0)
24471 count_exp = force_reg (counter_mode (count_exp), count_exp);
24472 }
24473 gcc_assert (desired_align >= 1 && align >= 1);
24474
24475 /* Misaligned move sequences handle both prologue and epilogue at once.
24476 Default code generation results in a smaller code for large alignments
24477 and also avoids redundant job when sizes are known precisely. */
24478 misaligned_prologue_used
24479 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24480 && MAX (desired_align, epilogue_size_needed) <= 32
24481 && desired_align <= epilogue_size_needed
24482 && ((desired_align > align && !align_bytes)
24483 || (!count && epilogue_size_needed > 1)));
24484
24485 /* Do the cheap promotion to allow better CSE across the
24486 main loop and epilogue (ie one load of the big constant in the
24487 front of all code.
24488 For now the misaligned move sequences do not have fast path
24489 without broadcasting. */
24490 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
24491 {
24492 if (alg == vector_loop)
24493 {
24494 gcc_assert (val_exp == const0_rtx);
24495 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
24496 promoted_val = promote_duplicated_reg_to_size (val_exp,
24497 GET_MODE_SIZE (word_mode),
24498 desired_align, align);
24499 }
24500 else
24501 {
24502 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24503 desired_align, align);
24504 }
24505 }
24506 /* Misaligned move sequences handles both prologues and epilogues at once.
24507 Default code generation results in smaller code for large alignments and
24508 also avoids redundant job when sizes are known precisely. */
24509 if (misaligned_prologue_used)
24510 {
24511 /* Misaligned move prologue handled small blocks by itself. */
24512 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
24513 (dst, src, &destreg, &srcreg,
24514 move_mode, promoted_val, vec_promoted_val,
24515 &count_exp,
24516 &jump_around_label,
24517 desired_align < align
24518 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
24519 desired_align, align, &min_size, dynamic_check, issetmem);
24520 if (!issetmem)
24521 src = change_address (src, BLKmode, srcreg);
24522 dst = change_address (dst, BLKmode, destreg);
24523 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24524 epilogue_size_needed = 0;
24525 if (need_zero_guard && !min_size)
24526 {
24527 /* It is possible that we copied enough so the main loop will not
24528 execute. */
24529 gcc_assert (size_needed > 1);
24530 if (jump_around_label == NULL_RTX)
24531 jump_around_label = gen_label_rtx ();
24532 emit_cmp_and_jump_insns (count_exp,
24533 GEN_INT (size_needed),
24534 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
24535 if (expected_size == -1
24536 || expected_size < (desired_align - align) / 2 + size_needed)
24537 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24538 else
24539 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24540 }
24541 }
24542 /* Ensure that alignment prologue won't copy past end of block. */
24543 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
24544 {
24545 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
24546 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
24547 Make sure it is power of 2. */
24548 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
24549
24550 /* To improve performance of small blocks, we jump around the VAL
24551 promoting mode. This mean that if the promoted VAL is not constant,
24552 we might not use it in the epilogue and have to use byte
24553 loop variant. */
24554 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
24555 force_loopy_epilogue = true;
24556 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24557 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24558 {
24559 /* If main algorithm works on QImode, no epilogue is needed.
24560 For small sizes just don't align anything. */
24561 if (size_needed == 1)
24562 desired_align = align;
24563 else
24564 goto epilogue;
24565 }
24566 else if (!count
24567 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24568 {
24569 label = gen_label_rtx ();
24570 emit_cmp_and_jump_insns (count_exp,
24571 GEN_INT (epilogue_size_needed),
24572 LTU, 0, counter_mode (count_exp), 1, label);
24573 if (expected_size == -1 || expected_size < epilogue_size_needed)
24574 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24575 else
24576 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24577 }
24578 }
24579
24580 /* Emit code to decide on runtime whether library call or inline should be
24581 used. */
24582 if (dynamic_check != -1)
24583 {
24584 if (!issetmem && CONST_INT_P (count_exp))
24585 {
24586 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
24587 {
24588 emit_block_move_via_libcall (dst, src, count_exp, false);
24589 count_exp = const0_rtx;
24590 goto epilogue;
24591 }
24592 }
24593 else
24594 {
24595 rtx_code_label *hot_label = gen_label_rtx ();
24596 if (jump_around_label == NULL_RTX)
24597 jump_around_label = gen_label_rtx ();
24598 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
24599 LEU, 0, counter_mode (count_exp),
24600 1, hot_label);
24601 predict_jump (REG_BR_PROB_BASE * 90 / 100);
24602 if (issetmem)
24603 set_storage_via_libcall (dst, count_exp, val_exp, false);
24604 else
24605 emit_block_move_via_libcall (dst, src, count_exp, false);
24606 emit_jump (jump_around_label);
24607 emit_label (hot_label);
24608 }
24609 }
24610
24611 /* Step 2: Alignment prologue. */
24612 /* Do the expensive promotion once we branched off the small blocks. */
24613 if (issetmem && !promoted_val)
24614 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24615 desired_align, align);
24616
24617 if (desired_align > align && !misaligned_prologue_used)
24618 {
24619 if (align_bytes == 0)
24620 {
24621 /* Except for the first move in prologue, we no longer know
24622 constant offset in aliasing info. It don't seems to worth
24623 the pain to maintain it for the first move, so throw away
24624 the info early. */
24625 dst = change_address (dst, BLKmode, destreg);
24626 if (!issetmem)
24627 src = change_address (src, BLKmode, srcreg);
24628 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
24629 promoted_val, vec_promoted_val,
24630 count_exp, align, desired_align,
24631 issetmem);
24632 /* At most desired_align - align bytes are copied. */
24633 if (min_size < (unsigned)(desired_align - align))
24634 min_size = 0;
24635 else
24636 min_size -= desired_align - align;
24637 }
24638 else
24639 {
24640 /* If we know how many bytes need to be stored before dst is
24641 sufficiently aligned, maintain aliasing info accurately. */
24642 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
24643 srcreg,
24644 promoted_val,
24645 vec_promoted_val,
24646 desired_align,
24647 align_bytes,
24648 issetmem);
24649
24650 count_exp = plus_constant (counter_mode (count_exp),
24651 count_exp, -align_bytes);
24652 count -= align_bytes;
24653 min_size -= align_bytes;
24654 max_size -= align_bytes;
24655 }
24656 if (need_zero_guard
24657 && !min_size
24658 && (count < (unsigned HOST_WIDE_INT) size_needed
24659 || (align_bytes == 0
24660 && count < ((unsigned HOST_WIDE_INT) size_needed
24661 + desired_align - align))))
24662 {
24663 /* It is possible that we copied enough so the main loop will not
24664 execute. */
24665 gcc_assert (size_needed > 1);
24666 if (label == NULL_RTX)
24667 label = gen_label_rtx ();
24668 emit_cmp_and_jump_insns (count_exp,
24669 GEN_INT (size_needed),
24670 LTU, 0, counter_mode (count_exp), 1, label);
24671 if (expected_size == -1
24672 || expected_size < (desired_align - align) / 2 + size_needed)
24673 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24674 else
24675 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24676 }
24677 }
24678 if (label && size_needed == 1)
24679 {
24680 emit_label (label);
24681 LABEL_NUSES (label) = 1;
24682 label = NULL;
24683 epilogue_size_needed = 1;
24684 if (issetmem)
24685 promoted_val = val_exp;
24686 }
24687 else if (label == NULL_RTX && !misaligned_prologue_used)
24688 epilogue_size_needed = size_needed;
24689
24690 /* Step 3: Main loop. */
24691
24692 switch (alg)
24693 {
24694 case libcall:
24695 case no_stringop:
24696 case last_alg:
24697 gcc_unreachable ();
24698 case loop_1_byte:
24699 case loop:
24700 case unrolled_loop:
24701 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
24702 count_exp, move_mode, unroll_factor,
24703 expected_size, issetmem);
24704 break;
24705 case vector_loop:
24706 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
24707 vec_promoted_val, count_exp, move_mode,
24708 unroll_factor, expected_size, issetmem);
24709 break;
24710 case rep_prefix_8_byte:
24711 case rep_prefix_4_byte:
24712 case rep_prefix_1_byte:
24713 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
24714 val_exp, count_exp, move_mode, issetmem);
24715 break;
24716 }
24717 /* Adjust properly the offset of src and dest memory for aliasing. */
24718 if (CONST_INT_P (count_exp))
24719 {
24720 if (!issetmem)
24721 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
24722 (count / size_needed) * size_needed);
24723 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
24724 (count / size_needed) * size_needed);
24725 }
24726 else
24727 {
24728 if (!issetmem)
24729 src = change_address (src, BLKmode, srcreg);
24730 dst = change_address (dst, BLKmode, destreg);
24731 }
24732
24733 /* Step 4: Epilogue to copy the remaining bytes. */
24734 epilogue:
24735 if (label)
24736 {
24737 /* When the main loop is done, COUNT_EXP might hold original count,
24738 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
24739 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
24740 bytes. Compensate if needed. */
24741
24742 if (size_needed < epilogue_size_needed)
24743 {
24744 tmp =
24745 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
24746 GEN_INT (size_needed - 1), count_exp, 1,
24747 OPTAB_DIRECT);
24748 if (tmp != count_exp)
24749 emit_move_insn (count_exp, tmp);
24750 }
24751 emit_label (label);
24752 LABEL_NUSES (label) = 1;
24753 }
24754
24755 if (count_exp != const0_rtx && epilogue_size_needed > 1)
24756 {
24757 if (force_loopy_epilogue)
24758 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
24759 epilogue_size_needed);
24760 else
24761 {
24762 if (issetmem)
24763 expand_setmem_epilogue (dst, destreg, promoted_val,
24764 vec_promoted_val, count_exp,
24765 epilogue_size_needed);
24766 else
24767 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
24768 epilogue_size_needed);
24769 }
24770 }
24771 if (jump_around_label)
24772 emit_label (jump_around_label);
24773 return true;
24774 }
24775
24776
24777 /* Expand the appropriate insns for doing strlen if not just doing
24778 repnz; scasb
24779
24780 out = result, initialized with the start address
24781 align_rtx = alignment of the address.
24782 scratch = scratch register, initialized with the startaddress when
24783 not aligned, otherwise undefined
24784
24785 This is just the body. It needs the initializations mentioned above and
24786 some address computing at the end. These things are done in i386.md. */
24787
24788 static void
24789 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
24790 {
24791 int align;
24792 rtx tmp;
24793 rtx_code_label *align_2_label = NULL;
24794 rtx_code_label *align_3_label = NULL;
24795 rtx_code_label *align_4_label = gen_label_rtx ();
24796 rtx_code_label *end_0_label = gen_label_rtx ();
24797 rtx mem;
24798 rtx tmpreg = gen_reg_rtx (SImode);
24799 rtx scratch = gen_reg_rtx (SImode);
24800 rtx cmp;
24801
24802 align = 0;
24803 if (CONST_INT_P (align_rtx))
24804 align = INTVAL (align_rtx);
24805
24806 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
24807
24808 /* Is there a known alignment and is it less than 4? */
24809 if (align < 4)
24810 {
24811 rtx scratch1 = gen_reg_rtx (Pmode);
24812 emit_move_insn (scratch1, out);
24813 /* Is there a known alignment and is it not 2? */
24814 if (align != 2)
24815 {
24816 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
24817 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
24818
24819 /* Leave just the 3 lower bits. */
24820 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
24821 NULL_RTX, 0, OPTAB_WIDEN);
24822
24823 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24824 Pmode, 1, align_4_label);
24825 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
24826 Pmode, 1, align_2_label);
24827 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
24828 Pmode, 1, align_3_label);
24829 }
24830 else
24831 {
24832 /* Since the alignment is 2, we have to check 2 or 0 bytes;
24833 check if is aligned to 4 - byte. */
24834
24835 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
24836 NULL_RTX, 0, OPTAB_WIDEN);
24837
24838 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24839 Pmode, 1, align_4_label);
24840 }
24841
24842 mem = change_address (src, QImode, out);
24843
24844 /* Now compare the bytes. */
24845
24846 /* Compare the first n unaligned byte on a byte per byte basis. */
24847 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
24848 QImode, 1, end_0_label);
24849
24850 /* Increment the address. */
24851 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24852
24853 /* Not needed with an alignment of 2 */
24854 if (align != 2)
24855 {
24856 emit_label (align_2_label);
24857
24858 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24859 end_0_label);
24860
24861 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24862
24863 emit_label (align_3_label);
24864 }
24865
24866 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24867 end_0_label);
24868
24869 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24870 }
24871
24872 /* Generate loop to check 4 bytes at a time. It is not a good idea to
24873 align this loop. It gives only huge programs, but does not help to
24874 speed up. */
24875 emit_label (align_4_label);
24876
24877 mem = change_address (src, SImode, out);
24878 emit_move_insn (scratch, mem);
24879 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
24880
24881 /* This formula yields a nonzero result iff one of the bytes is zero.
24882 This saves three branches inside loop and many cycles. */
24883
24884 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
24885 emit_insn (gen_one_cmplsi2 (scratch, scratch));
24886 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
24887 emit_insn (gen_andsi3 (tmpreg, tmpreg,
24888 gen_int_mode (0x80808080, SImode)));
24889 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
24890 align_4_label);
24891
24892 if (TARGET_CMOVE)
24893 {
24894 rtx reg = gen_reg_rtx (SImode);
24895 rtx reg2 = gen_reg_rtx (Pmode);
24896 emit_move_insn (reg, tmpreg);
24897 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
24898
24899 /* If zero is not in the first two bytes, move two bytes forward. */
24900 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24901 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24902 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24903 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
24904 gen_rtx_IF_THEN_ELSE (SImode, tmp,
24905 reg,
24906 tmpreg)));
24907 /* Emit lea manually to avoid clobbering of flags. */
24908 emit_insn (gen_rtx_SET (SImode, reg2,
24909 gen_rtx_PLUS (Pmode, out, const2_rtx)));
24910
24911 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24912 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24913 emit_insn (gen_rtx_SET (VOIDmode, out,
24914 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
24915 reg2,
24916 out)));
24917 }
24918 else
24919 {
24920 rtx_code_label *end_2_label = gen_label_rtx ();
24921 /* Is zero in the first two bytes? */
24922
24923 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24924 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24925 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
24926 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
24927 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
24928 pc_rtx);
24929 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
24930 JUMP_LABEL (tmp) = end_2_label;
24931
24932 /* Not in the first two. Move two bytes forward. */
24933 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
24934 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
24935
24936 emit_label (end_2_label);
24937
24938 }
24939
24940 /* Avoid branch in fixing the byte. */
24941 tmpreg = gen_lowpart (QImode, tmpreg);
24942 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
24943 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
24944 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
24945 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
24946
24947 emit_label (end_0_label);
24948 }
24949
24950 /* Expand strlen. */
24951
24952 bool
24953 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
24954 {
24955 rtx addr, scratch1, scratch2, scratch3, scratch4;
24956
24957 /* The generic case of strlen expander is long. Avoid it's
24958 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
24959
24960 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24961 && !TARGET_INLINE_ALL_STRINGOPS
24962 && !optimize_insn_for_size_p ()
24963 && (!CONST_INT_P (align) || INTVAL (align) < 4))
24964 return false;
24965
24966 addr = force_reg (Pmode, XEXP (src, 0));
24967 scratch1 = gen_reg_rtx (Pmode);
24968
24969 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24970 && !optimize_insn_for_size_p ())
24971 {
24972 /* Well it seems that some optimizer does not combine a call like
24973 foo(strlen(bar), strlen(bar));
24974 when the move and the subtraction is done here. It does calculate
24975 the length just once when these instructions are done inside of
24976 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
24977 often used and I use one fewer register for the lifetime of
24978 output_strlen_unroll() this is better. */
24979
24980 emit_move_insn (out, addr);
24981
24982 ix86_expand_strlensi_unroll_1 (out, src, align);
24983
24984 /* strlensi_unroll_1 returns the address of the zero at the end of
24985 the string, like memchr(), so compute the length by subtracting
24986 the start address. */
24987 emit_insn (ix86_gen_sub3 (out, out, addr));
24988 }
24989 else
24990 {
24991 rtx unspec;
24992
24993 /* Can't use this if the user has appropriated eax, ecx, or edi. */
24994 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
24995 return false;
24996
24997 scratch2 = gen_reg_rtx (Pmode);
24998 scratch3 = gen_reg_rtx (Pmode);
24999 scratch4 = force_reg (Pmode, constm1_rtx);
25000
25001 emit_move_insn (scratch3, addr);
25002 eoschar = force_reg (QImode, eoschar);
25003
25004 src = replace_equiv_address_nv (src, scratch3);
25005
25006 /* If .md starts supporting :P, this can be done in .md. */
25007 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
25008 scratch4), UNSPEC_SCAS);
25009 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
25010 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
25011 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
25012 }
25013 return true;
25014 }
25015
25016 /* For given symbol (function) construct code to compute address of it's PLT
25017 entry in large x86-64 PIC model. */
25018 static rtx
25019 construct_plt_address (rtx symbol)
25020 {
25021 rtx tmp, unspec;
25022
25023 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
25024 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
25025 gcc_assert (Pmode == DImode);
25026
25027 tmp = gen_reg_rtx (Pmode);
25028 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
25029
25030 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
25031 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
25032 return tmp;
25033 }
25034
25035 rtx
25036 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
25037 rtx callarg2,
25038 rtx pop, bool sibcall)
25039 {
25040 rtx vec[3];
25041 rtx use = NULL, call;
25042 unsigned int vec_len = 0;
25043
25044 if (pop == const0_rtx)
25045 pop = NULL;
25046 gcc_assert (!TARGET_64BIT || !pop);
25047
25048 if (TARGET_MACHO && !TARGET_64BIT)
25049 {
25050 #if TARGET_MACHO
25051 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
25052 fnaddr = machopic_indirect_call_target (fnaddr);
25053 #endif
25054 }
25055 else
25056 {
25057 /* Static functions and indirect calls don't need the pic register. */
25058 if (flag_pic
25059 && (!TARGET_64BIT
25060 || (ix86_cmodel == CM_LARGE_PIC
25061 && DEFAULT_ABI != MS_ABI))
25062 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25063 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
25064 {
25065 use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
25066 if (ix86_use_pseudo_pic_reg ())
25067 emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
25068 pic_offset_table_rtx);
25069 }
25070 }
25071
25072 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
25073 {
25074 rtx al = gen_rtx_REG (QImode, AX_REG);
25075 emit_move_insn (al, callarg2);
25076 use_reg (&use, al);
25077 }
25078
25079 if (ix86_cmodel == CM_LARGE_PIC
25080 && !TARGET_PECOFF
25081 && MEM_P (fnaddr)
25082 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25083 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
25084 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
25085 else if (sibcall
25086 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
25087 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
25088 {
25089 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
25090 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
25091 }
25092
25093 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
25094 if (retval)
25095 call = gen_rtx_SET (VOIDmode, retval, call);
25096 vec[vec_len++] = call;
25097
25098 if (pop)
25099 {
25100 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
25101 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
25102 vec[vec_len++] = pop;
25103 }
25104
25105 if (TARGET_64BIT_MS_ABI
25106 && (!callarg2 || INTVAL (callarg2) != -2))
25107 {
25108 int const cregs_size
25109 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
25110 int i;
25111
25112 for (i = 0; i < cregs_size; i++)
25113 {
25114 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
25115 enum machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
25116
25117 clobber_reg (&use, gen_rtx_REG (mode, regno));
25118 }
25119 }
25120
25121 if (vec_len > 1)
25122 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
25123 call = emit_call_insn (call);
25124 if (use)
25125 CALL_INSN_FUNCTION_USAGE (call) = use;
25126
25127 return call;
25128 }
25129
25130 /* Output the assembly for a call instruction. */
25131
25132 const char *
25133 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
25134 {
25135 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
25136 bool seh_nop_p = false;
25137 const char *xasm;
25138
25139 if (SIBLING_CALL_P (insn))
25140 {
25141 if (direct_p)
25142 xasm = "jmp\t%P0";
25143 /* SEH epilogue detection requires the indirect branch case
25144 to include REX.W. */
25145 else if (TARGET_SEH)
25146 xasm = "rex.W jmp %A0";
25147 else
25148 xasm = "jmp\t%A0";
25149
25150 output_asm_insn (xasm, &call_op);
25151 return "";
25152 }
25153
25154 /* SEH unwinding can require an extra nop to be emitted in several
25155 circumstances. Determine if we have one of those. */
25156 if (TARGET_SEH)
25157 {
25158 rtx_insn *i;
25159
25160 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
25161 {
25162 /* If we get to another real insn, we don't need the nop. */
25163 if (INSN_P (i))
25164 break;
25165
25166 /* If we get to the epilogue note, prevent a catch region from
25167 being adjacent to the standard epilogue sequence. If non-
25168 call-exceptions, we'll have done this during epilogue emission. */
25169 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25170 && !flag_non_call_exceptions
25171 && !can_throw_internal (insn))
25172 {
25173 seh_nop_p = true;
25174 break;
25175 }
25176 }
25177
25178 /* If we didn't find a real insn following the call, prevent the
25179 unwinder from looking into the next function. */
25180 if (i == NULL)
25181 seh_nop_p = true;
25182 }
25183
25184 if (direct_p)
25185 xasm = "call\t%P0";
25186 else
25187 xasm = "call\t%A0";
25188
25189 output_asm_insn (xasm, &call_op);
25190
25191 if (seh_nop_p)
25192 return "nop";
25193
25194 return "";
25195 }
25196 \f
25197 /* Clear stack slot assignments remembered from previous functions.
25198 This is called from INIT_EXPANDERS once before RTL is emitted for each
25199 function. */
25200
25201 static struct machine_function *
25202 ix86_init_machine_status (void)
25203 {
25204 struct machine_function *f;
25205
25206 f = ggc_cleared_alloc<machine_function> ();
25207 f->use_fast_prologue_epilogue_nregs = -1;
25208 f->call_abi = ix86_abi;
25209
25210 return f;
25211 }
25212
25213 /* Return a MEM corresponding to a stack slot with mode MODE.
25214 Allocate a new slot if necessary.
25215
25216 The RTL for a function can have several slots available: N is
25217 which slot to use. */
25218
25219 rtx
25220 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
25221 {
25222 struct stack_local_entry *s;
25223
25224 gcc_assert (n < MAX_386_STACK_LOCALS);
25225
25226 for (s = ix86_stack_locals; s; s = s->next)
25227 if (s->mode == mode && s->n == n)
25228 return validize_mem (copy_rtx (s->rtl));
25229
25230 s = ggc_alloc<stack_local_entry> ();
25231 s->n = n;
25232 s->mode = mode;
25233 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25234
25235 s->next = ix86_stack_locals;
25236 ix86_stack_locals = s;
25237 return validize_mem (copy_rtx (s->rtl));
25238 }
25239
25240 static void
25241 ix86_instantiate_decls (void)
25242 {
25243 struct stack_local_entry *s;
25244
25245 for (s = ix86_stack_locals; s; s = s->next)
25246 if (s->rtl != NULL_RTX)
25247 instantiate_decl_rtl (s->rtl);
25248 }
25249 \f
25250 /* Check whether x86 address PARTS is a pc-relative address. */
25251
25252 static bool
25253 rip_relative_addr_p (struct ix86_address *parts)
25254 {
25255 rtx base, index, disp;
25256
25257 base = parts->base;
25258 index = parts->index;
25259 disp = parts->disp;
25260
25261 if (disp && !base && !index)
25262 {
25263 if (TARGET_64BIT)
25264 {
25265 rtx symbol = disp;
25266
25267 if (GET_CODE (disp) == CONST)
25268 symbol = XEXP (disp, 0);
25269 if (GET_CODE (symbol) == PLUS
25270 && CONST_INT_P (XEXP (symbol, 1)))
25271 symbol = XEXP (symbol, 0);
25272
25273 if (GET_CODE (symbol) == LABEL_REF
25274 || (GET_CODE (symbol) == SYMBOL_REF
25275 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25276 || (GET_CODE (symbol) == UNSPEC
25277 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25278 || XINT (symbol, 1) == UNSPEC_PCREL
25279 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25280 return true;
25281 }
25282 }
25283 return false;
25284 }
25285
25286 /* Calculate the length of the memory address in the instruction encoding.
25287 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25288 or other prefixes. We never generate addr32 prefix for LEA insn. */
25289
25290 int
25291 memory_address_length (rtx addr, bool lea)
25292 {
25293 struct ix86_address parts;
25294 rtx base, index, disp;
25295 int len;
25296 int ok;
25297
25298 if (GET_CODE (addr) == PRE_DEC
25299 || GET_CODE (addr) == POST_INC
25300 || GET_CODE (addr) == PRE_MODIFY
25301 || GET_CODE (addr) == POST_MODIFY)
25302 return 0;
25303
25304 ok = ix86_decompose_address (addr, &parts);
25305 gcc_assert (ok);
25306
25307 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25308
25309 /* If this is not LEA instruction, add the length of addr32 prefix. */
25310 if (TARGET_64BIT && !lea
25311 && (SImode_address_operand (addr, VOIDmode)
25312 || (parts.base && GET_MODE (parts.base) == SImode)
25313 || (parts.index && GET_MODE (parts.index) == SImode)))
25314 len++;
25315
25316 base = parts.base;
25317 index = parts.index;
25318 disp = parts.disp;
25319
25320 if (base && GET_CODE (base) == SUBREG)
25321 base = SUBREG_REG (base);
25322 if (index && GET_CODE (index) == SUBREG)
25323 index = SUBREG_REG (index);
25324
25325 gcc_assert (base == NULL_RTX || REG_P (base));
25326 gcc_assert (index == NULL_RTX || REG_P (index));
25327
25328 /* Rule of thumb:
25329 - esp as the base always wants an index,
25330 - ebp as the base always wants a displacement,
25331 - r12 as the base always wants an index,
25332 - r13 as the base always wants a displacement. */
25333
25334 /* Register Indirect. */
25335 if (base && !index && !disp)
25336 {
25337 /* esp (for its index) and ebp (for its displacement) need
25338 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25339 code. */
25340 if (base == arg_pointer_rtx
25341 || base == frame_pointer_rtx
25342 || REGNO (base) == SP_REG
25343 || REGNO (base) == BP_REG
25344 || REGNO (base) == R12_REG
25345 || REGNO (base) == R13_REG)
25346 len++;
25347 }
25348
25349 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25350 is not disp32, but disp32(%rip), so for disp32
25351 SIB byte is needed, unless print_operand_address
25352 optimizes it into disp32(%rip) or (%rip) is implied
25353 by UNSPEC. */
25354 else if (disp && !base && !index)
25355 {
25356 len += 4;
25357 if (rip_relative_addr_p (&parts))
25358 len++;
25359 }
25360 else
25361 {
25362 /* Find the length of the displacement constant. */
25363 if (disp)
25364 {
25365 if (base && satisfies_constraint_K (disp))
25366 len += 1;
25367 else
25368 len += 4;
25369 }
25370 /* ebp always wants a displacement. Similarly r13. */
25371 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25372 len++;
25373
25374 /* An index requires the two-byte modrm form.... */
25375 if (index
25376 /* ...like esp (or r12), which always wants an index. */
25377 || base == arg_pointer_rtx
25378 || base == frame_pointer_rtx
25379 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25380 len++;
25381 }
25382
25383 return len;
25384 }
25385
25386 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25387 is set, expect that insn have 8bit immediate alternative. */
25388 int
25389 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
25390 {
25391 int len = 0;
25392 int i;
25393 extract_insn_cached (insn);
25394 for (i = recog_data.n_operands - 1; i >= 0; --i)
25395 if (CONSTANT_P (recog_data.operand[i]))
25396 {
25397 enum attr_mode mode = get_attr_mode (insn);
25398
25399 gcc_assert (!len);
25400 if (shortform && CONST_INT_P (recog_data.operand[i]))
25401 {
25402 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25403 switch (mode)
25404 {
25405 case MODE_QI:
25406 len = 1;
25407 continue;
25408 case MODE_HI:
25409 ival = trunc_int_for_mode (ival, HImode);
25410 break;
25411 case MODE_SI:
25412 ival = trunc_int_for_mode (ival, SImode);
25413 break;
25414 default:
25415 break;
25416 }
25417 if (IN_RANGE (ival, -128, 127))
25418 {
25419 len = 1;
25420 continue;
25421 }
25422 }
25423 switch (mode)
25424 {
25425 case MODE_QI:
25426 len = 1;
25427 break;
25428 case MODE_HI:
25429 len = 2;
25430 break;
25431 case MODE_SI:
25432 len = 4;
25433 break;
25434 /* Immediates for DImode instructions are encoded
25435 as 32bit sign extended values. */
25436 case MODE_DI:
25437 len = 4;
25438 break;
25439 default:
25440 fatal_insn ("unknown insn mode", insn);
25441 }
25442 }
25443 return len;
25444 }
25445
25446 /* Compute default value for "length_address" attribute. */
25447 int
25448 ix86_attr_length_address_default (rtx_insn *insn)
25449 {
25450 int i;
25451
25452 if (get_attr_type (insn) == TYPE_LEA)
25453 {
25454 rtx set = PATTERN (insn), addr;
25455
25456 if (GET_CODE (set) == PARALLEL)
25457 set = XVECEXP (set, 0, 0);
25458
25459 gcc_assert (GET_CODE (set) == SET);
25460
25461 addr = SET_SRC (set);
25462
25463 return memory_address_length (addr, true);
25464 }
25465
25466 extract_insn_cached (insn);
25467 for (i = recog_data.n_operands - 1; i >= 0; --i)
25468 if (MEM_P (recog_data.operand[i]))
25469 {
25470 constrain_operands_cached (reload_completed);
25471 if (which_alternative != -1)
25472 {
25473 const char *constraints = recog_data.constraints[i];
25474 int alt = which_alternative;
25475
25476 while (*constraints == '=' || *constraints == '+')
25477 constraints++;
25478 while (alt-- > 0)
25479 while (*constraints++ != ',')
25480 ;
25481 /* Skip ignored operands. */
25482 if (*constraints == 'X')
25483 continue;
25484 }
25485 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
25486 }
25487 return 0;
25488 }
25489
25490 /* Compute default value for "length_vex" attribute. It includes
25491 2 or 3 byte VEX prefix and 1 opcode byte. */
25492
25493 int
25494 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
25495 bool has_vex_w)
25496 {
25497 int i;
25498
25499 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
25500 byte VEX prefix. */
25501 if (!has_0f_opcode || has_vex_w)
25502 return 3 + 1;
25503
25504 /* We can always use 2 byte VEX prefix in 32bit. */
25505 if (!TARGET_64BIT)
25506 return 2 + 1;
25507
25508 extract_insn_cached (insn);
25509
25510 for (i = recog_data.n_operands - 1; i >= 0; --i)
25511 if (REG_P (recog_data.operand[i]))
25512 {
25513 /* REX.W bit uses 3 byte VEX prefix. */
25514 if (GET_MODE (recog_data.operand[i]) == DImode
25515 && GENERAL_REG_P (recog_data.operand[i]))
25516 return 3 + 1;
25517 }
25518 else
25519 {
25520 /* REX.X or REX.B bits use 3 byte VEX prefix. */
25521 if (MEM_P (recog_data.operand[i])
25522 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
25523 return 3 + 1;
25524 }
25525
25526 return 2 + 1;
25527 }
25528 \f
25529 /* Return the maximum number of instructions a cpu can issue. */
25530
25531 static int
25532 ix86_issue_rate (void)
25533 {
25534 switch (ix86_tune)
25535 {
25536 case PROCESSOR_PENTIUM:
25537 case PROCESSOR_BONNELL:
25538 case PROCESSOR_SILVERMONT:
25539 case PROCESSOR_INTEL:
25540 case PROCESSOR_K6:
25541 case PROCESSOR_BTVER2:
25542 case PROCESSOR_PENTIUM4:
25543 case PROCESSOR_NOCONA:
25544 return 2;
25545
25546 case PROCESSOR_PENTIUMPRO:
25547 case PROCESSOR_ATHLON:
25548 case PROCESSOR_K8:
25549 case PROCESSOR_AMDFAM10:
25550 case PROCESSOR_GENERIC:
25551 case PROCESSOR_BTVER1:
25552 return 3;
25553
25554 case PROCESSOR_BDVER1:
25555 case PROCESSOR_BDVER2:
25556 case PROCESSOR_BDVER3:
25557 case PROCESSOR_BDVER4:
25558 case PROCESSOR_CORE2:
25559 case PROCESSOR_NEHALEM:
25560 case PROCESSOR_SANDYBRIDGE:
25561 case PROCESSOR_HASWELL:
25562 return 4;
25563
25564 default:
25565 return 1;
25566 }
25567 }
25568
25569 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
25570 by DEP_INSN and nothing set by DEP_INSN. */
25571
25572 static bool
25573 ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
25574 {
25575 rtx set, set2;
25576
25577 /* Simplify the test for uninteresting insns. */
25578 if (insn_type != TYPE_SETCC
25579 && insn_type != TYPE_ICMOV
25580 && insn_type != TYPE_FCMOV
25581 && insn_type != TYPE_IBR)
25582 return false;
25583
25584 if ((set = single_set (dep_insn)) != 0)
25585 {
25586 set = SET_DEST (set);
25587 set2 = NULL_RTX;
25588 }
25589 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
25590 && XVECLEN (PATTERN (dep_insn), 0) == 2
25591 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
25592 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
25593 {
25594 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25595 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25596 }
25597 else
25598 return false;
25599
25600 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
25601 return false;
25602
25603 /* This test is true if the dependent insn reads the flags but
25604 not any other potentially set register. */
25605 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
25606 return false;
25607
25608 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
25609 return false;
25610
25611 return true;
25612 }
25613
25614 /* Return true iff USE_INSN has a memory address with operands set by
25615 SET_INSN. */
25616
25617 bool
25618 ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
25619 {
25620 int i;
25621 extract_insn_cached (use_insn);
25622 for (i = recog_data.n_operands - 1; i >= 0; --i)
25623 if (MEM_P (recog_data.operand[i]))
25624 {
25625 rtx addr = XEXP (recog_data.operand[i], 0);
25626 return modified_in_p (addr, set_insn) != 0;
25627 }
25628 return false;
25629 }
25630
25631 /* Helper function for exact_store_load_dependency.
25632 Return true if addr is found in insn. */
25633 static bool
25634 exact_dependency_1 (rtx addr, rtx insn)
25635 {
25636 enum rtx_code code;
25637 const char *format_ptr;
25638 int i, j;
25639
25640 code = GET_CODE (insn);
25641 switch (code)
25642 {
25643 case MEM:
25644 if (rtx_equal_p (addr, insn))
25645 return true;
25646 break;
25647 case REG:
25648 CASE_CONST_ANY:
25649 case SYMBOL_REF:
25650 case CODE_LABEL:
25651 case PC:
25652 case CC0:
25653 case EXPR_LIST:
25654 return false;
25655 default:
25656 break;
25657 }
25658
25659 format_ptr = GET_RTX_FORMAT (code);
25660 for (i = 0; i < GET_RTX_LENGTH (code); i++)
25661 {
25662 switch (*format_ptr++)
25663 {
25664 case 'e':
25665 if (exact_dependency_1 (addr, XEXP (insn, i)))
25666 return true;
25667 break;
25668 case 'E':
25669 for (j = 0; j < XVECLEN (insn, i); j++)
25670 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
25671 return true;
25672 break;
25673 }
25674 }
25675 return false;
25676 }
25677
25678 /* Return true if there exists exact dependency for store & load, i.e.
25679 the same memory address is used in them. */
25680 static bool
25681 exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
25682 {
25683 rtx set1, set2;
25684
25685 set1 = single_set (store);
25686 if (!set1)
25687 return false;
25688 if (!MEM_P (SET_DEST (set1)))
25689 return false;
25690 set2 = single_set (load);
25691 if (!set2)
25692 return false;
25693 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
25694 return true;
25695 return false;
25696 }
25697
25698 static int
25699 ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
25700 {
25701 enum attr_type insn_type, dep_insn_type;
25702 enum attr_memory memory;
25703 rtx set, set2;
25704 int dep_insn_code_number;
25705
25706 /* Anti and output dependencies have zero cost on all CPUs. */
25707 if (REG_NOTE_KIND (link) != 0)
25708 return 0;
25709
25710 dep_insn_code_number = recog_memoized (dep_insn);
25711
25712 /* If we can't recognize the insns, we can't really do anything. */
25713 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
25714 return cost;
25715
25716 insn_type = get_attr_type (insn);
25717 dep_insn_type = get_attr_type (dep_insn);
25718
25719 switch (ix86_tune)
25720 {
25721 case PROCESSOR_PENTIUM:
25722 /* Address Generation Interlock adds a cycle of latency. */
25723 if (insn_type == TYPE_LEA)
25724 {
25725 rtx addr = PATTERN (insn);
25726
25727 if (GET_CODE (addr) == PARALLEL)
25728 addr = XVECEXP (addr, 0, 0);
25729
25730 gcc_assert (GET_CODE (addr) == SET);
25731
25732 addr = SET_SRC (addr);
25733 if (modified_in_p (addr, dep_insn))
25734 cost += 1;
25735 }
25736 else if (ix86_agi_dependent (dep_insn, insn))
25737 cost += 1;
25738
25739 /* ??? Compares pair with jump/setcc. */
25740 if (ix86_flags_dependent (insn, dep_insn, insn_type))
25741 cost = 0;
25742
25743 /* Floating point stores require value to be ready one cycle earlier. */
25744 if (insn_type == TYPE_FMOV
25745 && get_attr_memory (insn) == MEMORY_STORE
25746 && !ix86_agi_dependent (dep_insn, insn))
25747 cost += 1;
25748 break;
25749
25750 case PROCESSOR_PENTIUMPRO:
25751 /* INT->FP conversion is expensive. */
25752 if (get_attr_fp_int_src (dep_insn))
25753 cost += 5;
25754
25755 /* There is one cycle extra latency between an FP op and a store. */
25756 if (insn_type == TYPE_FMOV
25757 && (set = single_set (dep_insn)) != NULL_RTX
25758 && (set2 = single_set (insn)) != NULL_RTX
25759 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
25760 && MEM_P (SET_DEST (set2)))
25761 cost += 1;
25762
25763 memory = get_attr_memory (insn);
25764
25765 /* Show ability of reorder buffer to hide latency of load by executing
25766 in parallel with previous instruction in case
25767 previous instruction is not needed to compute the address. */
25768 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25769 && !ix86_agi_dependent (dep_insn, insn))
25770 {
25771 /* Claim moves to take one cycle, as core can issue one load
25772 at time and the next load can start cycle later. */
25773 if (dep_insn_type == TYPE_IMOV
25774 || dep_insn_type == TYPE_FMOV)
25775 cost = 1;
25776 else if (cost > 1)
25777 cost--;
25778 }
25779 break;
25780
25781 case PROCESSOR_K6:
25782 /* The esp dependency is resolved before
25783 the instruction is really finished. */
25784 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25785 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25786 return 1;
25787
25788 /* INT->FP conversion is expensive. */
25789 if (get_attr_fp_int_src (dep_insn))
25790 cost += 5;
25791
25792 memory = get_attr_memory (insn);
25793
25794 /* Show ability of reorder buffer to hide latency of load by executing
25795 in parallel with previous instruction in case
25796 previous instruction is not needed to compute the address. */
25797 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25798 && !ix86_agi_dependent (dep_insn, insn))
25799 {
25800 /* Claim moves to take one cycle, as core can issue one load
25801 at time and the next load can start cycle later. */
25802 if (dep_insn_type == TYPE_IMOV
25803 || dep_insn_type == TYPE_FMOV)
25804 cost = 1;
25805 else if (cost > 2)
25806 cost -= 2;
25807 else
25808 cost = 1;
25809 }
25810 break;
25811
25812 case PROCESSOR_AMDFAM10:
25813 case PROCESSOR_BDVER1:
25814 case PROCESSOR_BDVER2:
25815 case PROCESSOR_BDVER3:
25816 case PROCESSOR_BDVER4:
25817 case PROCESSOR_BTVER1:
25818 case PROCESSOR_BTVER2:
25819 case PROCESSOR_GENERIC:
25820 /* Stack engine allows to execute push&pop instructions in parall. */
25821 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25822 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25823 return 0;
25824 /* FALLTHRU */
25825
25826 case PROCESSOR_ATHLON:
25827 case PROCESSOR_K8:
25828 memory = get_attr_memory (insn);
25829
25830 /* Show ability of reorder buffer to hide latency of load by executing
25831 in parallel with previous instruction in case
25832 previous instruction is not needed to compute the address. */
25833 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25834 && !ix86_agi_dependent (dep_insn, insn))
25835 {
25836 enum attr_unit unit = get_attr_unit (insn);
25837 int loadcost = 3;
25838
25839 /* Because of the difference between the length of integer and
25840 floating unit pipeline preparation stages, the memory operands
25841 for floating point are cheaper.
25842
25843 ??? For Athlon it the difference is most probably 2. */
25844 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
25845 loadcost = 3;
25846 else
25847 loadcost = TARGET_ATHLON ? 2 : 0;
25848
25849 if (cost >= loadcost)
25850 cost -= loadcost;
25851 else
25852 cost = 0;
25853 }
25854 break;
25855
25856 case PROCESSOR_CORE2:
25857 case PROCESSOR_NEHALEM:
25858 case PROCESSOR_SANDYBRIDGE:
25859 case PROCESSOR_HASWELL:
25860 /* Stack engine allows to execute push&pop instructions in parall. */
25861 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25862 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25863 return 0;
25864
25865 memory = get_attr_memory (insn);
25866
25867 /* Show ability of reorder buffer to hide latency of load by executing
25868 in parallel with previous instruction in case
25869 previous instruction is not needed to compute the address. */
25870 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25871 && !ix86_agi_dependent (dep_insn, insn))
25872 {
25873 if (cost >= 4)
25874 cost -= 4;
25875 else
25876 cost = 0;
25877 }
25878 break;
25879
25880 case PROCESSOR_SILVERMONT:
25881 case PROCESSOR_INTEL:
25882 if (!reload_completed)
25883 return cost;
25884
25885 /* Increase cost of integer loads. */
25886 memory = get_attr_memory (dep_insn);
25887 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25888 {
25889 enum attr_unit unit = get_attr_unit (dep_insn);
25890 if (unit == UNIT_INTEGER && cost == 1)
25891 {
25892 if (memory == MEMORY_LOAD)
25893 cost = 3;
25894 else
25895 {
25896 /* Increase cost of ld/st for short int types only
25897 because of store forwarding issue. */
25898 rtx set = single_set (dep_insn);
25899 if (set && (GET_MODE (SET_DEST (set)) == QImode
25900 || GET_MODE (SET_DEST (set)) == HImode))
25901 {
25902 /* Increase cost of store/load insn if exact
25903 dependence exists and it is load insn. */
25904 enum attr_memory insn_memory = get_attr_memory (insn);
25905 if (insn_memory == MEMORY_LOAD
25906 && exact_store_load_dependency (dep_insn, insn))
25907 cost = 3;
25908 }
25909 }
25910 }
25911 }
25912
25913 default:
25914 break;
25915 }
25916
25917 return cost;
25918 }
25919
25920 /* How many alternative schedules to try. This should be as wide as the
25921 scheduling freedom in the DFA, but no wider. Making this value too
25922 large results extra work for the scheduler. */
25923
25924 static int
25925 ia32_multipass_dfa_lookahead (void)
25926 {
25927 switch (ix86_tune)
25928 {
25929 case PROCESSOR_PENTIUM:
25930 return 2;
25931
25932 case PROCESSOR_PENTIUMPRO:
25933 case PROCESSOR_K6:
25934 return 1;
25935
25936 case PROCESSOR_BDVER1:
25937 case PROCESSOR_BDVER2:
25938 case PROCESSOR_BDVER3:
25939 case PROCESSOR_BDVER4:
25940 /* We use lookahead value 4 for BD both before and after reload
25941 schedules. Plan is to have value 8 included for O3. */
25942 return 4;
25943
25944 case PROCESSOR_CORE2:
25945 case PROCESSOR_NEHALEM:
25946 case PROCESSOR_SANDYBRIDGE:
25947 case PROCESSOR_HASWELL:
25948 case PROCESSOR_BONNELL:
25949 case PROCESSOR_SILVERMONT:
25950 case PROCESSOR_INTEL:
25951 /* Generally, we want haifa-sched:max_issue() to look ahead as far
25952 as many instructions can be executed on a cycle, i.e.,
25953 issue_rate. I wonder why tuning for many CPUs does not do this. */
25954 if (reload_completed)
25955 return ix86_issue_rate ();
25956 /* Don't use lookahead for pre-reload schedule to save compile time. */
25957 return 0;
25958
25959 default:
25960 return 0;
25961 }
25962 }
25963
25964 /* Return true if target platform supports macro-fusion. */
25965
25966 static bool
25967 ix86_macro_fusion_p ()
25968 {
25969 return TARGET_FUSE_CMP_AND_BRANCH;
25970 }
25971
25972 /* Check whether current microarchitecture support macro fusion
25973 for insn pair "CONDGEN + CONDJMP". Refer to
25974 "Intel Architectures Optimization Reference Manual". */
25975
25976 static bool
25977 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
25978 {
25979 rtx src, dest;
25980 enum rtx_code ccode;
25981 rtx compare_set = NULL_RTX, test_if, cond;
25982 rtx alu_set = NULL_RTX, addr = NULL_RTX;
25983
25984 if (!any_condjump_p (condjmp))
25985 return false;
25986
25987 if (get_attr_type (condgen) != TYPE_TEST
25988 && get_attr_type (condgen) != TYPE_ICMP
25989 && get_attr_type (condgen) != TYPE_INCDEC
25990 && get_attr_type (condgen) != TYPE_ALU)
25991 return false;
25992
25993 compare_set = single_set (condgen);
25994 if (compare_set == NULL_RTX
25995 && !TARGET_FUSE_ALU_AND_BRANCH)
25996 return false;
25997
25998 if (compare_set == NULL_RTX)
25999 {
26000 int i;
26001 rtx pat = PATTERN (condgen);
26002 for (i = 0; i < XVECLEN (pat, 0); i++)
26003 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
26004 {
26005 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
26006 if (GET_CODE (set_src) == COMPARE)
26007 compare_set = XVECEXP (pat, 0, i);
26008 else
26009 alu_set = XVECEXP (pat, 0, i);
26010 }
26011 }
26012 if (compare_set == NULL_RTX)
26013 return false;
26014 src = SET_SRC (compare_set);
26015 if (GET_CODE (src) != COMPARE)
26016 return false;
26017
26018 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
26019 supported. */
26020 if ((MEM_P (XEXP (src, 0))
26021 && CONST_INT_P (XEXP (src, 1)))
26022 || (MEM_P (XEXP (src, 1))
26023 && CONST_INT_P (XEXP (src, 0))))
26024 return false;
26025
26026 /* No fusion for RIP-relative address. */
26027 if (MEM_P (XEXP (src, 0)))
26028 addr = XEXP (XEXP (src, 0), 0);
26029 else if (MEM_P (XEXP (src, 1)))
26030 addr = XEXP (XEXP (src, 1), 0);
26031
26032 if (addr) {
26033 ix86_address parts;
26034 int ok = ix86_decompose_address (addr, &parts);
26035 gcc_assert (ok);
26036
26037 if (rip_relative_addr_p (&parts))
26038 return false;
26039 }
26040
26041 test_if = SET_SRC (pc_set (condjmp));
26042 cond = XEXP (test_if, 0);
26043 ccode = GET_CODE (cond);
26044 /* Check whether conditional jump use Sign or Overflow Flags. */
26045 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
26046 && (ccode == GE
26047 || ccode == GT
26048 || ccode == LE
26049 || ccode == LT))
26050 return false;
26051
26052 /* Return true for TYPE_TEST and TYPE_ICMP. */
26053 if (get_attr_type (condgen) == TYPE_TEST
26054 || get_attr_type (condgen) == TYPE_ICMP)
26055 return true;
26056
26057 /* The following is the case that macro-fusion for alu + jmp. */
26058 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
26059 return false;
26060
26061 /* No fusion for alu op with memory destination operand. */
26062 dest = SET_DEST (alu_set);
26063 if (MEM_P (dest))
26064 return false;
26065
26066 /* Macro-fusion for inc/dec + unsigned conditional jump is not
26067 supported. */
26068 if (get_attr_type (condgen) == TYPE_INCDEC
26069 && (ccode == GEU
26070 || ccode == GTU
26071 || ccode == LEU
26072 || ccode == LTU))
26073 return false;
26074
26075 return true;
26076 }
26077
26078 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
26079 execution. It is applied if
26080 (1) IMUL instruction is on the top of list;
26081 (2) There exists the only producer of independent IMUL instruction in
26082 ready list.
26083 Return index of IMUL producer if it was found and -1 otherwise. */
26084 static int
26085 do_reorder_for_imul (rtx_insn **ready, int n_ready)
26086 {
26087 rtx_insn *insn;
26088 rtx set, insn1, insn2;
26089 sd_iterator_def sd_it;
26090 dep_t dep;
26091 int index = -1;
26092 int i;
26093
26094 if (!TARGET_BONNELL)
26095 return index;
26096
26097 /* Check that IMUL instruction is on the top of ready list. */
26098 insn = ready[n_ready - 1];
26099 set = single_set (insn);
26100 if (!set)
26101 return index;
26102 if (!(GET_CODE (SET_SRC (set)) == MULT
26103 && GET_MODE (SET_SRC (set)) == SImode))
26104 return index;
26105
26106 /* Search for producer of independent IMUL instruction. */
26107 for (i = n_ready - 2; i >= 0; i--)
26108 {
26109 insn = ready[i];
26110 if (!NONDEBUG_INSN_P (insn))
26111 continue;
26112 /* Skip IMUL instruction. */
26113 insn2 = PATTERN (insn);
26114 if (GET_CODE (insn2) == PARALLEL)
26115 insn2 = XVECEXP (insn2, 0, 0);
26116 if (GET_CODE (insn2) == SET
26117 && GET_CODE (SET_SRC (insn2)) == MULT
26118 && GET_MODE (SET_SRC (insn2)) == SImode)
26119 continue;
26120
26121 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
26122 {
26123 rtx con;
26124 con = DEP_CON (dep);
26125 if (!NONDEBUG_INSN_P (con))
26126 continue;
26127 insn1 = PATTERN (con);
26128 if (GET_CODE (insn1) == PARALLEL)
26129 insn1 = XVECEXP (insn1, 0, 0);
26130
26131 if (GET_CODE (insn1) == SET
26132 && GET_CODE (SET_SRC (insn1)) == MULT
26133 && GET_MODE (SET_SRC (insn1)) == SImode)
26134 {
26135 sd_iterator_def sd_it1;
26136 dep_t dep1;
26137 /* Check if there is no other dependee for IMUL. */
26138 index = i;
26139 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
26140 {
26141 rtx pro;
26142 pro = DEP_PRO (dep1);
26143 if (!NONDEBUG_INSN_P (pro))
26144 continue;
26145 if (pro != insn)
26146 index = -1;
26147 }
26148 if (index >= 0)
26149 break;
26150 }
26151 }
26152 if (index >= 0)
26153 break;
26154 }
26155 return index;
26156 }
26157
26158 /* Try to find the best candidate on the top of ready list if two insns
26159 have the same priority - candidate is best if its dependees were
26160 scheduled earlier. Applied for Silvermont only.
26161 Return true if top 2 insns must be interchanged. */
26162 static bool
26163 swap_top_of_ready_list (rtx_insn **ready, int n_ready)
26164 {
26165 rtx_insn *top = ready[n_ready - 1];
26166 rtx_insn *next = ready[n_ready - 2];
26167 rtx set;
26168 sd_iterator_def sd_it;
26169 dep_t dep;
26170 int clock1 = -1;
26171 int clock2 = -1;
26172 #define INSN_TICK(INSN) (HID (INSN)->tick)
26173
26174 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26175 return false;
26176
26177 if (!NONDEBUG_INSN_P (top))
26178 return false;
26179 if (!NONJUMP_INSN_P (top))
26180 return false;
26181 if (!NONDEBUG_INSN_P (next))
26182 return false;
26183 if (!NONJUMP_INSN_P (next))
26184 return false;
26185 set = single_set (top);
26186 if (!set)
26187 return false;
26188 set = single_set (next);
26189 if (!set)
26190 return false;
26191
26192 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26193 {
26194 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26195 return false;
26196 /* Determine winner more precise. */
26197 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26198 {
26199 rtx pro;
26200 pro = DEP_PRO (dep);
26201 if (!NONDEBUG_INSN_P (pro))
26202 continue;
26203 if (INSN_TICK (pro) > clock1)
26204 clock1 = INSN_TICK (pro);
26205 }
26206 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26207 {
26208 rtx pro;
26209 pro = DEP_PRO (dep);
26210 if (!NONDEBUG_INSN_P (pro))
26211 continue;
26212 if (INSN_TICK (pro) > clock2)
26213 clock2 = INSN_TICK (pro);
26214 }
26215
26216 if (clock1 == clock2)
26217 {
26218 /* Determine winner - load must win. */
26219 enum attr_memory memory1, memory2;
26220 memory1 = get_attr_memory (top);
26221 memory2 = get_attr_memory (next);
26222 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26223 return true;
26224 }
26225 return (bool) (clock2 < clock1);
26226 }
26227 return false;
26228 #undef INSN_TICK
26229 }
26230
26231 /* Perform possible reodering of ready list for Atom/Silvermont only.
26232 Return issue rate. */
26233 static int
26234 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
26235 int *pn_ready, int clock_var)
26236 {
26237 int issue_rate = -1;
26238 int n_ready = *pn_ready;
26239 int i;
26240 rtx_insn *insn;
26241 int index = -1;
26242
26243 /* Set up issue rate. */
26244 issue_rate = ix86_issue_rate ();
26245
26246 /* Do reodering for BONNELL/SILVERMONT only. */
26247 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26248 return issue_rate;
26249
26250 /* Nothing to do if ready list contains only 1 instruction. */
26251 if (n_ready <= 1)
26252 return issue_rate;
26253
26254 /* Do reodering for post-reload scheduler only. */
26255 if (!reload_completed)
26256 return issue_rate;
26257
26258 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26259 {
26260 if (sched_verbose > 1)
26261 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26262 INSN_UID (ready[index]));
26263
26264 /* Put IMUL producer (ready[index]) at the top of ready list. */
26265 insn = ready[index];
26266 for (i = index; i < n_ready - 1; i++)
26267 ready[i] = ready[i + 1];
26268 ready[n_ready - 1] = insn;
26269 return issue_rate;
26270 }
26271 if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
26272 {
26273 if (sched_verbose > 1)
26274 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26275 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26276 /* Swap 2 top elements of ready list. */
26277 insn = ready[n_ready - 1];
26278 ready[n_ready - 1] = ready[n_ready - 2];
26279 ready[n_ready - 2] = insn;
26280 }
26281 return issue_rate;
26282 }
26283
26284 static bool
26285 ix86_class_likely_spilled_p (reg_class_t);
26286
26287 /* Returns true if lhs of insn is HW function argument register and set up
26288 is_spilled to true if it is likely spilled HW register. */
26289 static bool
26290 insn_is_function_arg (rtx insn, bool* is_spilled)
26291 {
26292 rtx dst;
26293
26294 if (!NONDEBUG_INSN_P (insn))
26295 return false;
26296 /* Call instructions are not movable, ignore it. */
26297 if (CALL_P (insn))
26298 return false;
26299 insn = PATTERN (insn);
26300 if (GET_CODE (insn) == PARALLEL)
26301 insn = XVECEXP (insn, 0, 0);
26302 if (GET_CODE (insn) != SET)
26303 return false;
26304 dst = SET_DEST (insn);
26305 if (REG_P (dst) && HARD_REGISTER_P (dst)
26306 && ix86_function_arg_regno_p (REGNO (dst)))
26307 {
26308 /* Is it likely spilled HW register? */
26309 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26310 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26311 *is_spilled = true;
26312 return true;
26313 }
26314 return false;
26315 }
26316
26317 /* Add output dependencies for chain of function adjacent arguments if only
26318 there is a move to likely spilled HW register. Return first argument
26319 if at least one dependence was added or NULL otherwise. */
26320 static rtx_insn *
26321 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
26322 {
26323 rtx_insn *insn;
26324 rtx_insn *last = call;
26325 rtx_insn *first_arg = NULL;
26326 bool is_spilled = false;
26327
26328 head = PREV_INSN (head);
26329
26330 /* Find nearest to call argument passing instruction. */
26331 while (true)
26332 {
26333 last = PREV_INSN (last);
26334 if (last == head)
26335 return NULL;
26336 if (!NONDEBUG_INSN_P (last))
26337 continue;
26338 if (insn_is_function_arg (last, &is_spilled))
26339 break;
26340 return NULL;
26341 }
26342
26343 first_arg = last;
26344 while (true)
26345 {
26346 insn = PREV_INSN (last);
26347 if (!INSN_P (insn))
26348 break;
26349 if (insn == head)
26350 break;
26351 if (!NONDEBUG_INSN_P (insn))
26352 {
26353 last = insn;
26354 continue;
26355 }
26356 if (insn_is_function_arg (insn, &is_spilled))
26357 {
26358 /* Add output depdendence between two function arguments if chain
26359 of output arguments contains likely spilled HW registers. */
26360 if (is_spilled)
26361 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26362 first_arg = last = insn;
26363 }
26364 else
26365 break;
26366 }
26367 if (!is_spilled)
26368 return NULL;
26369 return first_arg;
26370 }
26371
26372 /* Add output or anti dependency from insn to first_arg to restrict its code
26373 motion. */
26374 static void
26375 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
26376 {
26377 rtx set;
26378 rtx tmp;
26379
26380 set = single_set (insn);
26381 if (!set)
26382 return;
26383 tmp = SET_DEST (set);
26384 if (REG_P (tmp))
26385 {
26386 /* Add output dependency to the first function argument. */
26387 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26388 return;
26389 }
26390 /* Add anti dependency. */
26391 add_dependence (first_arg, insn, REG_DEP_ANTI);
26392 }
26393
26394 /* Avoid cross block motion of function argument through adding dependency
26395 from the first non-jump instruction in bb. */
26396 static void
26397 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
26398 {
26399 rtx_insn *insn = BB_END (bb);
26400
26401 while (insn)
26402 {
26403 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26404 {
26405 rtx set = single_set (insn);
26406 if (set)
26407 {
26408 avoid_func_arg_motion (arg, insn);
26409 return;
26410 }
26411 }
26412 if (insn == BB_HEAD (bb))
26413 return;
26414 insn = PREV_INSN (insn);
26415 }
26416 }
26417
26418 /* Hook for pre-reload schedule - avoid motion of function arguments
26419 passed in likely spilled HW registers. */
26420 static void
26421 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
26422 {
26423 rtx_insn *insn;
26424 rtx_insn *first_arg = NULL;
26425 if (reload_completed)
26426 return;
26427 while (head != tail && DEBUG_INSN_P (head))
26428 head = NEXT_INSN (head);
26429 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26430 if (INSN_P (insn) && CALL_P (insn))
26431 {
26432 first_arg = add_parameter_dependencies (insn, head);
26433 if (first_arg)
26434 {
26435 /* Add dependee for first argument to predecessors if only
26436 region contains more than one block. */
26437 basic_block bb = BLOCK_FOR_INSN (insn);
26438 int rgn = CONTAINING_RGN (bb->index);
26439 int nr_blks = RGN_NR_BLOCKS (rgn);
26440 /* Skip trivial regions and region head blocks that can have
26441 predecessors outside of region. */
26442 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26443 {
26444 edge e;
26445 edge_iterator ei;
26446
26447 /* Regions are SCCs with the exception of selective
26448 scheduling with pipelining of outer blocks enabled.
26449 So also check that immediate predecessors of a non-head
26450 block are in the same region. */
26451 FOR_EACH_EDGE (e, ei, bb->preds)
26452 {
26453 /* Avoid creating of loop-carried dependencies through
26454 using topological ordering in the region. */
26455 if (rgn == CONTAINING_RGN (e->src->index)
26456 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
26457 add_dependee_for_func_arg (first_arg, e->src);
26458 }
26459 }
26460 insn = first_arg;
26461 if (insn == head)
26462 break;
26463 }
26464 }
26465 else if (first_arg)
26466 avoid_func_arg_motion (first_arg, insn);
26467 }
26468
26469 /* Hook for pre-reload schedule - set priority of moves from likely spilled
26470 HW registers to maximum, to schedule them at soon as possible. These are
26471 moves from function argument registers at the top of the function entry
26472 and moves from function return value registers after call. */
26473 static int
26474 ix86_adjust_priority (rtx_insn *insn, int priority)
26475 {
26476 rtx set;
26477
26478 if (reload_completed)
26479 return priority;
26480
26481 if (!NONDEBUG_INSN_P (insn))
26482 return priority;
26483
26484 set = single_set (insn);
26485 if (set)
26486 {
26487 rtx tmp = SET_SRC (set);
26488 if (REG_P (tmp)
26489 && HARD_REGISTER_P (tmp)
26490 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
26491 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
26492 return current_sched_info->sched_max_insns_priority;
26493 }
26494
26495 return priority;
26496 }
26497
26498 /* Model decoder of Core 2/i7.
26499 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
26500 track the instruction fetch block boundaries and make sure that long
26501 (9+ bytes) instructions are assigned to D0. */
26502
26503 /* Maximum length of an insn that can be handled by
26504 a secondary decoder unit. '8' for Core 2/i7. */
26505 static int core2i7_secondary_decoder_max_insn_size;
26506
26507 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
26508 '16' for Core 2/i7. */
26509 static int core2i7_ifetch_block_size;
26510
26511 /* Maximum number of instructions decoder can handle per cycle.
26512 '6' for Core 2/i7. */
26513 static int core2i7_ifetch_block_max_insns;
26514
26515 typedef struct ix86_first_cycle_multipass_data_ *
26516 ix86_first_cycle_multipass_data_t;
26517 typedef const struct ix86_first_cycle_multipass_data_ *
26518 const_ix86_first_cycle_multipass_data_t;
26519
26520 /* A variable to store target state across calls to max_issue within
26521 one cycle. */
26522 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
26523 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
26524
26525 /* Initialize DATA. */
26526 static void
26527 core2i7_first_cycle_multipass_init (void *_data)
26528 {
26529 ix86_first_cycle_multipass_data_t data
26530 = (ix86_first_cycle_multipass_data_t) _data;
26531
26532 data->ifetch_block_len = 0;
26533 data->ifetch_block_n_insns = 0;
26534 data->ready_try_change = NULL;
26535 data->ready_try_change_size = 0;
26536 }
26537
26538 /* Advancing the cycle; reset ifetch block counts. */
26539 static void
26540 core2i7_dfa_post_advance_cycle (void)
26541 {
26542 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
26543
26544 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26545
26546 data->ifetch_block_len = 0;
26547 data->ifetch_block_n_insns = 0;
26548 }
26549
26550 static int min_insn_size (rtx_insn *);
26551
26552 /* Filter out insns from ready_try that the core will not be able to issue
26553 on current cycle due to decoder. */
26554 static void
26555 core2i7_first_cycle_multipass_filter_ready_try
26556 (const_ix86_first_cycle_multipass_data_t data,
26557 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
26558 {
26559 while (n_ready--)
26560 {
26561 rtx_insn *insn;
26562 int insn_size;
26563
26564 if (ready_try[n_ready])
26565 continue;
26566
26567 insn = get_ready_element (n_ready);
26568 insn_size = min_insn_size (insn);
26569
26570 if (/* If this is a too long an insn for a secondary decoder ... */
26571 (!first_cycle_insn_p
26572 && insn_size > core2i7_secondary_decoder_max_insn_size)
26573 /* ... or it would not fit into the ifetch block ... */
26574 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
26575 /* ... or the decoder is full already ... */
26576 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
26577 /* ... mask the insn out. */
26578 {
26579 ready_try[n_ready] = 1;
26580
26581 if (data->ready_try_change)
26582 bitmap_set_bit (data->ready_try_change, n_ready);
26583 }
26584 }
26585 }
26586
26587 /* Prepare for a new round of multipass lookahead scheduling. */
26588 static void
26589 core2i7_first_cycle_multipass_begin (void *_data,
26590 signed char *ready_try, int n_ready,
26591 bool first_cycle_insn_p)
26592 {
26593 ix86_first_cycle_multipass_data_t data
26594 = (ix86_first_cycle_multipass_data_t) _data;
26595 const_ix86_first_cycle_multipass_data_t prev_data
26596 = ix86_first_cycle_multipass_data;
26597
26598 /* Restore the state from the end of the previous round. */
26599 data->ifetch_block_len = prev_data->ifetch_block_len;
26600 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
26601
26602 /* Filter instructions that cannot be issued on current cycle due to
26603 decoder restrictions. */
26604 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26605 first_cycle_insn_p);
26606 }
26607
26608 /* INSN is being issued in current solution. Account for its impact on
26609 the decoder model. */
26610 static void
26611 core2i7_first_cycle_multipass_issue (void *_data,
26612 signed char *ready_try, int n_ready,
26613 rtx_insn *insn, const void *_prev_data)
26614 {
26615 ix86_first_cycle_multipass_data_t data
26616 = (ix86_first_cycle_multipass_data_t) _data;
26617 const_ix86_first_cycle_multipass_data_t prev_data
26618 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
26619
26620 int insn_size = min_insn_size (insn);
26621
26622 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
26623 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
26624 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
26625 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26626
26627 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
26628 if (!data->ready_try_change)
26629 {
26630 data->ready_try_change = sbitmap_alloc (n_ready);
26631 data->ready_try_change_size = n_ready;
26632 }
26633 else if (data->ready_try_change_size < n_ready)
26634 {
26635 data->ready_try_change = sbitmap_resize (data->ready_try_change,
26636 n_ready, 0);
26637 data->ready_try_change_size = n_ready;
26638 }
26639 bitmap_clear (data->ready_try_change);
26640
26641 /* Filter out insns from ready_try that the core will not be able to issue
26642 on current cycle due to decoder. */
26643 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26644 false);
26645 }
26646
26647 /* Revert the effect on ready_try. */
26648 static void
26649 core2i7_first_cycle_multipass_backtrack (const void *_data,
26650 signed char *ready_try,
26651 int n_ready ATTRIBUTE_UNUSED)
26652 {
26653 const_ix86_first_cycle_multipass_data_t data
26654 = (const_ix86_first_cycle_multipass_data_t) _data;
26655 unsigned int i = 0;
26656 sbitmap_iterator sbi;
26657
26658 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
26659 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
26660 {
26661 ready_try[i] = 0;
26662 }
26663 }
26664
26665 /* Save the result of multipass lookahead scheduling for the next round. */
26666 static void
26667 core2i7_first_cycle_multipass_end (const void *_data)
26668 {
26669 const_ix86_first_cycle_multipass_data_t data
26670 = (const_ix86_first_cycle_multipass_data_t) _data;
26671 ix86_first_cycle_multipass_data_t next_data
26672 = ix86_first_cycle_multipass_data;
26673
26674 if (data != NULL)
26675 {
26676 next_data->ifetch_block_len = data->ifetch_block_len;
26677 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
26678 }
26679 }
26680
26681 /* Deallocate target data. */
26682 static void
26683 core2i7_first_cycle_multipass_fini (void *_data)
26684 {
26685 ix86_first_cycle_multipass_data_t data
26686 = (ix86_first_cycle_multipass_data_t) _data;
26687
26688 if (data->ready_try_change)
26689 {
26690 sbitmap_free (data->ready_try_change);
26691 data->ready_try_change = NULL;
26692 data->ready_try_change_size = 0;
26693 }
26694 }
26695
26696 /* Prepare for scheduling pass. */
26697 static void
26698 ix86_sched_init_global (FILE *, int, int)
26699 {
26700 /* Install scheduling hooks for current CPU. Some of these hooks are used
26701 in time-critical parts of the scheduler, so we only set them up when
26702 they are actually used. */
26703 switch (ix86_tune)
26704 {
26705 case PROCESSOR_CORE2:
26706 case PROCESSOR_NEHALEM:
26707 case PROCESSOR_SANDYBRIDGE:
26708 case PROCESSOR_HASWELL:
26709 /* Do not perform multipass scheduling for pre-reload schedule
26710 to save compile time. */
26711 if (reload_completed)
26712 {
26713 targetm.sched.dfa_post_advance_cycle
26714 = core2i7_dfa_post_advance_cycle;
26715 targetm.sched.first_cycle_multipass_init
26716 = core2i7_first_cycle_multipass_init;
26717 targetm.sched.first_cycle_multipass_begin
26718 = core2i7_first_cycle_multipass_begin;
26719 targetm.sched.first_cycle_multipass_issue
26720 = core2i7_first_cycle_multipass_issue;
26721 targetm.sched.first_cycle_multipass_backtrack
26722 = core2i7_first_cycle_multipass_backtrack;
26723 targetm.sched.first_cycle_multipass_end
26724 = core2i7_first_cycle_multipass_end;
26725 targetm.sched.first_cycle_multipass_fini
26726 = core2i7_first_cycle_multipass_fini;
26727
26728 /* Set decoder parameters. */
26729 core2i7_secondary_decoder_max_insn_size = 8;
26730 core2i7_ifetch_block_size = 16;
26731 core2i7_ifetch_block_max_insns = 6;
26732 break;
26733 }
26734 /* ... Fall through ... */
26735 default:
26736 targetm.sched.dfa_post_advance_cycle = NULL;
26737 targetm.sched.first_cycle_multipass_init = NULL;
26738 targetm.sched.first_cycle_multipass_begin = NULL;
26739 targetm.sched.first_cycle_multipass_issue = NULL;
26740 targetm.sched.first_cycle_multipass_backtrack = NULL;
26741 targetm.sched.first_cycle_multipass_end = NULL;
26742 targetm.sched.first_cycle_multipass_fini = NULL;
26743 break;
26744 }
26745 }
26746
26747 \f
26748 /* Compute the alignment given to a constant that is being placed in memory.
26749 EXP is the constant and ALIGN is the alignment that the object would
26750 ordinarily have.
26751 The value of this function is used instead of that alignment to align
26752 the object. */
26753
26754 int
26755 ix86_constant_alignment (tree exp, int align)
26756 {
26757 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
26758 || TREE_CODE (exp) == INTEGER_CST)
26759 {
26760 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
26761 return 64;
26762 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
26763 return 128;
26764 }
26765 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
26766 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
26767 return BITS_PER_WORD;
26768
26769 return align;
26770 }
26771
26772 /* Compute the alignment for a static variable.
26773 TYPE is the data type, and ALIGN is the alignment that
26774 the object would ordinarily have. The value of this function is used
26775 instead of that alignment to align the object. */
26776
26777 int
26778 ix86_data_alignment (tree type, int align, bool opt)
26779 {
26780 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
26781 for symbols from other compilation units or symbols that don't need
26782 to bind locally. In order to preserve some ABI compatibility with
26783 those compilers, ensure we don't decrease alignment from what we
26784 used to assume. */
26785
26786 int max_align_compat
26787 = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
26788
26789 /* A data structure, equal or greater than the size of a cache line
26790 (64 bytes in the Pentium 4 and other recent Intel processors, including
26791 processors based on Intel Core microarchitecture) should be aligned
26792 so that its base address is a multiple of a cache line size. */
26793
26794 int max_align
26795 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
26796
26797 if (max_align < BITS_PER_WORD)
26798 max_align = BITS_PER_WORD;
26799
26800 if (opt
26801 && AGGREGATE_TYPE_P (type)
26802 && TYPE_SIZE (type)
26803 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
26804 {
26805 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
26806 && align < max_align_compat)
26807 align = max_align_compat;
26808 if (wi::geu_p (TYPE_SIZE (type), max_align)
26809 && align < max_align)
26810 align = max_align;
26811 }
26812
26813 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26814 to 16byte boundary. */
26815 if (TARGET_64BIT)
26816 {
26817 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
26818 && TYPE_SIZE (type)
26819 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26820 && wi::geu_p (TYPE_SIZE (type), 128)
26821 && align < 128)
26822 return 128;
26823 }
26824
26825 if (!opt)
26826 return align;
26827
26828 if (TREE_CODE (type) == ARRAY_TYPE)
26829 {
26830 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26831 return 64;
26832 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26833 return 128;
26834 }
26835 else if (TREE_CODE (type) == COMPLEX_TYPE)
26836 {
26837
26838 if (TYPE_MODE (type) == DCmode && align < 64)
26839 return 64;
26840 if ((TYPE_MODE (type) == XCmode
26841 || TYPE_MODE (type) == TCmode) && align < 128)
26842 return 128;
26843 }
26844 else if ((TREE_CODE (type) == RECORD_TYPE
26845 || TREE_CODE (type) == UNION_TYPE
26846 || TREE_CODE (type) == QUAL_UNION_TYPE)
26847 && TYPE_FIELDS (type))
26848 {
26849 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26850 return 64;
26851 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26852 return 128;
26853 }
26854 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26855 || TREE_CODE (type) == INTEGER_TYPE)
26856 {
26857 if (TYPE_MODE (type) == DFmode && align < 64)
26858 return 64;
26859 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26860 return 128;
26861 }
26862
26863 return align;
26864 }
26865
26866 /* Compute the alignment for a local variable or a stack slot. EXP is
26867 the data type or decl itself, MODE is the widest mode available and
26868 ALIGN is the alignment that the object would ordinarily have. The
26869 value of this macro is used instead of that alignment to align the
26870 object. */
26871
26872 unsigned int
26873 ix86_local_alignment (tree exp, enum machine_mode mode,
26874 unsigned int align)
26875 {
26876 tree type, decl;
26877
26878 if (exp && DECL_P (exp))
26879 {
26880 type = TREE_TYPE (exp);
26881 decl = exp;
26882 }
26883 else
26884 {
26885 type = exp;
26886 decl = NULL;
26887 }
26888
26889 /* Don't do dynamic stack realignment for long long objects with
26890 -mpreferred-stack-boundary=2. */
26891 if (!TARGET_64BIT
26892 && align == 64
26893 && ix86_preferred_stack_boundary < 64
26894 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
26895 && (!type || !TYPE_USER_ALIGN (type))
26896 && (!decl || !DECL_USER_ALIGN (decl)))
26897 align = 32;
26898
26899 /* If TYPE is NULL, we are allocating a stack slot for caller-save
26900 register in MODE. We will return the largest alignment of XF
26901 and DF. */
26902 if (!type)
26903 {
26904 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
26905 align = GET_MODE_ALIGNMENT (DFmode);
26906 return align;
26907 }
26908
26909 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26910 to 16byte boundary. Exact wording is:
26911
26912 An array uses the same alignment as its elements, except that a local or
26913 global array variable of length at least 16 bytes or
26914 a C99 variable-length array variable always has alignment of at least 16 bytes.
26915
26916 This was added to allow use of aligned SSE instructions at arrays. This
26917 rule is meant for static storage (where compiler can not do the analysis
26918 by itself). We follow it for automatic variables only when convenient.
26919 We fully control everything in the function compiled and functions from
26920 other unit can not rely on the alignment.
26921
26922 Exclude va_list type. It is the common case of local array where
26923 we can not benefit from the alignment.
26924
26925 TODO: Probably one should optimize for size only when var is not escaping. */
26926 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
26927 && TARGET_SSE)
26928 {
26929 if (AGGREGATE_TYPE_P (type)
26930 && (va_list_type_node == NULL_TREE
26931 || (TYPE_MAIN_VARIANT (type)
26932 != TYPE_MAIN_VARIANT (va_list_type_node)))
26933 && TYPE_SIZE (type)
26934 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26935 && wi::geu_p (TYPE_SIZE (type), 16)
26936 && align < 128)
26937 return 128;
26938 }
26939 if (TREE_CODE (type) == ARRAY_TYPE)
26940 {
26941 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26942 return 64;
26943 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26944 return 128;
26945 }
26946 else if (TREE_CODE (type) == COMPLEX_TYPE)
26947 {
26948 if (TYPE_MODE (type) == DCmode && align < 64)
26949 return 64;
26950 if ((TYPE_MODE (type) == XCmode
26951 || TYPE_MODE (type) == TCmode) && align < 128)
26952 return 128;
26953 }
26954 else if ((TREE_CODE (type) == RECORD_TYPE
26955 || TREE_CODE (type) == UNION_TYPE
26956 || TREE_CODE (type) == QUAL_UNION_TYPE)
26957 && TYPE_FIELDS (type))
26958 {
26959 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26960 return 64;
26961 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26962 return 128;
26963 }
26964 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26965 || TREE_CODE (type) == INTEGER_TYPE)
26966 {
26967
26968 if (TYPE_MODE (type) == DFmode && align < 64)
26969 return 64;
26970 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26971 return 128;
26972 }
26973 return align;
26974 }
26975
26976 /* Compute the minimum required alignment for dynamic stack realignment
26977 purposes for a local variable, parameter or a stack slot. EXP is
26978 the data type or decl itself, MODE is its mode and ALIGN is the
26979 alignment that the object would ordinarily have. */
26980
26981 unsigned int
26982 ix86_minimum_alignment (tree exp, enum machine_mode mode,
26983 unsigned int align)
26984 {
26985 tree type, decl;
26986
26987 if (exp && DECL_P (exp))
26988 {
26989 type = TREE_TYPE (exp);
26990 decl = exp;
26991 }
26992 else
26993 {
26994 type = exp;
26995 decl = NULL;
26996 }
26997
26998 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
26999 return align;
27000
27001 /* Don't do dynamic stack realignment for long long objects with
27002 -mpreferred-stack-boundary=2. */
27003 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
27004 && (!type || !TYPE_USER_ALIGN (type))
27005 && (!decl || !DECL_USER_ALIGN (decl)))
27006 return 32;
27007
27008 return align;
27009 }
27010 \f
27011 /* Find a location for the static chain incoming to a nested function.
27012 This is a register, unless all free registers are used by arguments. */
27013
27014 static rtx
27015 ix86_static_chain (const_tree fndecl, bool incoming_p)
27016 {
27017 unsigned regno;
27018
27019 if (!DECL_STATIC_CHAIN (fndecl))
27020 return NULL;
27021
27022 if (TARGET_64BIT)
27023 {
27024 /* We always use R10 in 64-bit mode. */
27025 regno = R10_REG;
27026 }
27027 else
27028 {
27029 tree fntype;
27030 unsigned int ccvt;
27031
27032 /* By default in 32-bit mode we use ECX to pass the static chain. */
27033 regno = CX_REG;
27034
27035 fntype = TREE_TYPE (fndecl);
27036 ccvt = ix86_get_callcvt (fntype);
27037 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
27038 {
27039 /* Fastcall functions use ecx/edx for arguments, which leaves
27040 us with EAX for the static chain.
27041 Thiscall functions use ecx for arguments, which also
27042 leaves us with EAX for the static chain. */
27043 regno = AX_REG;
27044 }
27045 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
27046 {
27047 /* Thiscall functions use ecx for arguments, which leaves
27048 us with EAX and EDX for the static chain.
27049 We are using for abi-compatibility EAX. */
27050 regno = AX_REG;
27051 }
27052 else if (ix86_function_regparm (fntype, fndecl) == 3)
27053 {
27054 /* For regparm 3, we have no free call-clobbered registers in
27055 which to store the static chain. In order to implement this,
27056 we have the trampoline push the static chain to the stack.
27057 However, we can't push a value below the return address when
27058 we call the nested function directly, so we have to use an
27059 alternate entry point. For this we use ESI, and have the
27060 alternate entry point push ESI, so that things appear the
27061 same once we're executing the nested function. */
27062 if (incoming_p)
27063 {
27064 if (fndecl == current_function_decl)
27065 ix86_static_chain_on_stack = true;
27066 return gen_frame_mem (SImode,
27067 plus_constant (Pmode,
27068 arg_pointer_rtx, -8));
27069 }
27070 regno = SI_REG;
27071 }
27072 }
27073
27074 return gen_rtx_REG (Pmode, regno);
27075 }
27076
27077 /* Emit RTL insns to initialize the variable parts of a trampoline.
27078 FNDECL is the decl of the target address; M_TRAMP is a MEM for
27079 the trampoline, and CHAIN_VALUE is an RTX for the static chain
27080 to be passed to the target function. */
27081
27082 static void
27083 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
27084 {
27085 rtx mem, fnaddr;
27086 int opcode;
27087 int offset = 0;
27088
27089 fnaddr = XEXP (DECL_RTL (fndecl), 0);
27090
27091 if (TARGET_64BIT)
27092 {
27093 int size;
27094
27095 /* Load the function address to r11. Try to load address using
27096 the shorter movl instead of movabs. We may want to support
27097 movq for kernel mode, but kernel does not use trampolines at
27098 the moment. FNADDR is a 32bit address and may not be in
27099 DImode when ptr_mode == SImode. Always use movl in this
27100 case. */
27101 if (ptr_mode == SImode
27102 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
27103 {
27104 fnaddr = copy_addr_to_reg (fnaddr);
27105
27106 mem = adjust_address (m_tramp, HImode, offset);
27107 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
27108
27109 mem = adjust_address (m_tramp, SImode, offset + 2);
27110 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
27111 offset += 6;
27112 }
27113 else
27114 {
27115 mem = adjust_address (m_tramp, HImode, offset);
27116 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
27117
27118 mem = adjust_address (m_tramp, DImode, offset + 2);
27119 emit_move_insn (mem, fnaddr);
27120 offset += 10;
27121 }
27122
27123 /* Load static chain using movabs to r10. Use the shorter movl
27124 instead of movabs when ptr_mode == SImode. */
27125 if (ptr_mode == SImode)
27126 {
27127 opcode = 0xba41;
27128 size = 6;
27129 }
27130 else
27131 {
27132 opcode = 0xba49;
27133 size = 10;
27134 }
27135
27136 mem = adjust_address (m_tramp, HImode, offset);
27137 emit_move_insn (mem, gen_int_mode (opcode, HImode));
27138
27139 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
27140 emit_move_insn (mem, chain_value);
27141 offset += size;
27142
27143 /* Jump to r11; the last (unused) byte is a nop, only there to
27144 pad the write out to a single 32-bit store. */
27145 mem = adjust_address (m_tramp, SImode, offset);
27146 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
27147 offset += 4;
27148 }
27149 else
27150 {
27151 rtx disp, chain;
27152
27153 /* Depending on the static chain location, either load a register
27154 with a constant, or push the constant to the stack. All of the
27155 instructions are the same size. */
27156 chain = ix86_static_chain (fndecl, true);
27157 if (REG_P (chain))
27158 {
27159 switch (REGNO (chain))
27160 {
27161 case AX_REG:
27162 opcode = 0xb8; break;
27163 case CX_REG:
27164 opcode = 0xb9; break;
27165 default:
27166 gcc_unreachable ();
27167 }
27168 }
27169 else
27170 opcode = 0x68;
27171
27172 mem = adjust_address (m_tramp, QImode, offset);
27173 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27174
27175 mem = adjust_address (m_tramp, SImode, offset + 1);
27176 emit_move_insn (mem, chain_value);
27177 offset += 5;
27178
27179 mem = adjust_address (m_tramp, QImode, offset);
27180 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27181
27182 mem = adjust_address (m_tramp, SImode, offset + 1);
27183
27184 /* Compute offset from the end of the jmp to the target function.
27185 In the case in which the trampoline stores the static chain on
27186 the stack, we need to skip the first insn which pushes the
27187 (call-saved) register static chain; this push is 1 byte. */
27188 offset += 5;
27189 disp = expand_binop (SImode, sub_optab, fnaddr,
27190 plus_constant (Pmode, XEXP (m_tramp, 0),
27191 offset - (MEM_P (chain) ? 1 : 0)),
27192 NULL_RTX, 1, OPTAB_DIRECT);
27193 emit_move_insn (mem, disp);
27194 }
27195
27196 gcc_assert (offset <= TRAMPOLINE_SIZE);
27197
27198 #ifdef HAVE_ENABLE_EXECUTE_STACK
27199 #ifdef CHECK_EXECUTE_STACK_ENABLED
27200 if (CHECK_EXECUTE_STACK_ENABLED)
27201 #endif
27202 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27203 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27204 #endif
27205 }
27206 \f
27207 /* The following file contains several enumerations and data structures
27208 built from the definitions in i386-builtin-types.def. */
27209
27210 #include "i386-builtin-types.inc"
27211
27212 /* Table for the ix86 builtin non-function types. */
27213 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27214
27215 /* Retrieve an element from the above table, building some of
27216 the types lazily. */
27217
27218 static tree
27219 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27220 {
27221 unsigned int index;
27222 tree type, itype;
27223
27224 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27225
27226 type = ix86_builtin_type_tab[(int) tcode];
27227 if (type != NULL)
27228 return type;
27229
27230 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27231 if (tcode <= IX86_BT_LAST_VECT)
27232 {
27233 enum machine_mode mode;
27234
27235 index = tcode - IX86_BT_LAST_PRIM - 1;
27236 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27237 mode = ix86_builtin_type_vect_mode[index];
27238
27239 type = build_vector_type_for_mode (itype, mode);
27240 }
27241 else
27242 {
27243 int quals;
27244
27245 index = tcode - IX86_BT_LAST_VECT - 1;
27246 if (tcode <= IX86_BT_LAST_PTR)
27247 quals = TYPE_UNQUALIFIED;
27248 else
27249 quals = TYPE_QUAL_CONST;
27250
27251 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27252 if (quals != TYPE_UNQUALIFIED)
27253 itype = build_qualified_type (itype, quals);
27254
27255 type = build_pointer_type (itype);
27256 }
27257
27258 ix86_builtin_type_tab[(int) tcode] = type;
27259 return type;
27260 }
27261
27262 /* Table for the ix86 builtin function types. */
27263 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27264
27265 /* Retrieve an element from the above table, building some of
27266 the types lazily. */
27267
27268 static tree
27269 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27270 {
27271 tree type;
27272
27273 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27274
27275 type = ix86_builtin_func_type_tab[(int) tcode];
27276 if (type != NULL)
27277 return type;
27278
27279 if (tcode <= IX86_BT_LAST_FUNC)
27280 {
27281 unsigned start = ix86_builtin_func_start[(int) tcode];
27282 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27283 tree rtype, atype, args = void_list_node;
27284 unsigned i;
27285
27286 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27287 for (i = after - 1; i > start; --i)
27288 {
27289 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27290 args = tree_cons (NULL, atype, args);
27291 }
27292
27293 type = build_function_type (rtype, args);
27294 }
27295 else
27296 {
27297 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27298 enum ix86_builtin_func_type icode;
27299
27300 icode = ix86_builtin_func_alias_base[index];
27301 type = ix86_get_builtin_func_type (icode);
27302 }
27303
27304 ix86_builtin_func_type_tab[(int) tcode] = type;
27305 return type;
27306 }
27307
27308
27309 /* Codes for all the SSE/MMX builtins. */
27310 enum ix86_builtins
27311 {
27312 IX86_BUILTIN_ADDPS,
27313 IX86_BUILTIN_ADDSS,
27314 IX86_BUILTIN_DIVPS,
27315 IX86_BUILTIN_DIVSS,
27316 IX86_BUILTIN_MULPS,
27317 IX86_BUILTIN_MULSS,
27318 IX86_BUILTIN_SUBPS,
27319 IX86_BUILTIN_SUBSS,
27320
27321 IX86_BUILTIN_CMPEQPS,
27322 IX86_BUILTIN_CMPLTPS,
27323 IX86_BUILTIN_CMPLEPS,
27324 IX86_BUILTIN_CMPGTPS,
27325 IX86_BUILTIN_CMPGEPS,
27326 IX86_BUILTIN_CMPNEQPS,
27327 IX86_BUILTIN_CMPNLTPS,
27328 IX86_BUILTIN_CMPNLEPS,
27329 IX86_BUILTIN_CMPNGTPS,
27330 IX86_BUILTIN_CMPNGEPS,
27331 IX86_BUILTIN_CMPORDPS,
27332 IX86_BUILTIN_CMPUNORDPS,
27333 IX86_BUILTIN_CMPEQSS,
27334 IX86_BUILTIN_CMPLTSS,
27335 IX86_BUILTIN_CMPLESS,
27336 IX86_BUILTIN_CMPNEQSS,
27337 IX86_BUILTIN_CMPNLTSS,
27338 IX86_BUILTIN_CMPNLESS,
27339 IX86_BUILTIN_CMPORDSS,
27340 IX86_BUILTIN_CMPUNORDSS,
27341
27342 IX86_BUILTIN_COMIEQSS,
27343 IX86_BUILTIN_COMILTSS,
27344 IX86_BUILTIN_COMILESS,
27345 IX86_BUILTIN_COMIGTSS,
27346 IX86_BUILTIN_COMIGESS,
27347 IX86_BUILTIN_COMINEQSS,
27348 IX86_BUILTIN_UCOMIEQSS,
27349 IX86_BUILTIN_UCOMILTSS,
27350 IX86_BUILTIN_UCOMILESS,
27351 IX86_BUILTIN_UCOMIGTSS,
27352 IX86_BUILTIN_UCOMIGESS,
27353 IX86_BUILTIN_UCOMINEQSS,
27354
27355 IX86_BUILTIN_CVTPI2PS,
27356 IX86_BUILTIN_CVTPS2PI,
27357 IX86_BUILTIN_CVTSI2SS,
27358 IX86_BUILTIN_CVTSI642SS,
27359 IX86_BUILTIN_CVTSS2SI,
27360 IX86_BUILTIN_CVTSS2SI64,
27361 IX86_BUILTIN_CVTTPS2PI,
27362 IX86_BUILTIN_CVTTSS2SI,
27363 IX86_BUILTIN_CVTTSS2SI64,
27364
27365 IX86_BUILTIN_MAXPS,
27366 IX86_BUILTIN_MAXSS,
27367 IX86_BUILTIN_MINPS,
27368 IX86_BUILTIN_MINSS,
27369
27370 IX86_BUILTIN_LOADUPS,
27371 IX86_BUILTIN_STOREUPS,
27372 IX86_BUILTIN_MOVSS,
27373
27374 IX86_BUILTIN_MOVHLPS,
27375 IX86_BUILTIN_MOVLHPS,
27376 IX86_BUILTIN_LOADHPS,
27377 IX86_BUILTIN_LOADLPS,
27378 IX86_BUILTIN_STOREHPS,
27379 IX86_BUILTIN_STORELPS,
27380
27381 IX86_BUILTIN_MASKMOVQ,
27382 IX86_BUILTIN_MOVMSKPS,
27383 IX86_BUILTIN_PMOVMSKB,
27384
27385 IX86_BUILTIN_MOVNTPS,
27386 IX86_BUILTIN_MOVNTQ,
27387
27388 IX86_BUILTIN_LOADDQU,
27389 IX86_BUILTIN_STOREDQU,
27390
27391 IX86_BUILTIN_PACKSSWB,
27392 IX86_BUILTIN_PACKSSDW,
27393 IX86_BUILTIN_PACKUSWB,
27394
27395 IX86_BUILTIN_PADDB,
27396 IX86_BUILTIN_PADDW,
27397 IX86_BUILTIN_PADDD,
27398 IX86_BUILTIN_PADDQ,
27399 IX86_BUILTIN_PADDSB,
27400 IX86_BUILTIN_PADDSW,
27401 IX86_BUILTIN_PADDUSB,
27402 IX86_BUILTIN_PADDUSW,
27403 IX86_BUILTIN_PSUBB,
27404 IX86_BUILTIN_PSUBW,
27405 IX86_BUILTIN_PSUBD,
27406 IX86_BUILTIN_PSUBQ,
27407 IX86_BUILTIN_PSUBSB,
27408 IX86_BUILTIN_PSUBSW,
27409 IX86_BUILTIN_PSUBUSB,
27410 IX86_BUILTIN_PSUBUSW,
27411
27412 IX86_BUILTIN_PAND,
27413 IX86_BUILTIN_PANDN,
27414 IX86_BUILTIN_POR,
27415 IX86_BUILTIN_PXOR,
27416
27417 IX86_BUILTIN_PAVGB,
27418 IX86_BUILTIN_PAVGW,
27419
27420 IX86_BUILTIN_PCMPEQB,
27421 IX86_BUILTIN_PCMPEQW,
27422 IX86_BUILTIN_PCMPEQD,
27423 IX86_BUILTIN_PCMPGTB,
27424 IX86_BUILTIN_PCMPGTW,
27425 IX86_BUILTIN_PCMPGTD,
27426
27427 IX86_BUILTIN_PMADDWD,
27428
27429 IX86_BUILTIN_PMAXSW,
27430 IX86_BUILTIN_PMAXUB,
27431 IX86_BUILTIN_PMINSW,
27432 IX86_BUILTIN_PMINUB,
27433
27434 IX86_BUILTIN_PMULHUW,
27435 IX86_BUILTIN_PMULHW,
27436 IX86_BUILTIN_PMULLW,
27437
27438 IX86_BUILTIN_PSADBW,
27439 IX86_BUILTIN_PSHUFW,
27440
27441 IX86_BUILTIN_PSLLW,
27442 IX86_BUILTIN_PSLLD,
27443 IX86_BUILTIN_PSLLQ,
27444 IX86_BUILTIN_PSRAW,
27445 IX86_BUILTIN_PSRAD,
27446 IX86_BUILTIN_PSRLW,
27447 IX86_BUILTIN_PSRLD,
27448 IX86_BUILTIN_PSRLQ,
27449 IX86_BUILTIN_PSLLWI,
27450 IX86_BUILTIN_PSLLDI,
27451 IX86_BUILTIN_PSLLQI,
27452 IX86_BUILTIN_PSRAWI,
27453 IX86_BUILTIN_PSRADI,
27454 IX86_BUILTIN_PSRLWI,
27455 IX86_BUILTIN_PSRLDI,
27456 IX86_BUILTIN_PSRLQI,
27457
27458 IX86_BUILTIN_PUNPCKHBW,
27459 IX86_BUILTIN_PUNPCKHWD,
27460 IX86_BUILTIN_PUNPCKHDQ,
27461 IX86_BUILTIN_PUNPCKLBW,
27462 IX86_BUILTIN_PUNPCKLWD,
27463 IX86_BUILTIN_PUNPCKLDQ,
27464
27465 IX86_BUILTIN_SHUFPS,
27466
27467 IX86_BUILTIN_RCPPS,
27468 IX86_BUILTIN_RCPSS,
27469 IX86_BUILTIN_RSQRTPS,
27470 IX86_BUILTIN_RSQRTPS_NR,
27471 IX86_BUILTIN_RSQRTSS,
27472 IX86_BUILTIN_RSQRTF,
27473 IX86_BUILTIN_SQRTPS,
27474 IX86_BUILTIN_SQRTPS_NR,
27475 IX86_BUILTIN_SQRTSS,
27476
27477 IX86_BUILTIN_UNPCKHPS,
27478 IX86_BUILTIN_UNPCKLPS,
27479
27480 IX86_BUILTIN_ANDPS,
27481 IX86_BUILTIN_ANDNPS,
27482 IX86_BUILTIN_ORPS,
27483 IX86_BUILTIN_XORPS,
27484
27485 IX86_BUILTIN_EMMS,
27486 IX86_BUILTIN_LDMXCSR,
27487 IX86_BUILTIN_STMXCSR,
27488 IX86_BUILTIN_SFENCE,
27489
27490 IX86_BUILTIN_FXSAVE,
27491 IX86_BUILTIN_FXRSTOR,
27492 IX86_BUILTIN_FXSAVE64,
27493 IX86_BUILTIN_FXRSTOR64,
27494
27495 IX86_BUILTIN_XSAVE,
27496 IX86_BUILTIN_XRSTOR,
27497 IX86_BUILTIN_XSAVE64,
27498 IX86_BUILTIN_XRSTOR64,
27499
27500 IX86_BUILTIN_XSAVEOPT,
27501 IX86_BUILTIN_XSAVEOPT64,
27502
27503 IX86_BUILTIN_XSAVEC,
27504 IX86_BUILTIN_XSAVEC64,
27505
27506 IX86_BUILTIN_XSAVES,
27507 IX86_BUILTIN_XRSTORS,
27508 IX86_BUILTIN_XSAVES64,
27509 IX86_BUILTIN_XRSTORS64,
27510
27511 /* 3DNow! Original */
27512 IX86_BUILTIN_FEMMS,
27513 IX86_BUILTIN_PAVGUSB,
27514 IX86_BUILTIN_PF2ID,
27515 IX86_BUILTIN_PFACC,
27516 IX86_BUILTIN_PFADD,
27517 IX86_BUILTIN_PFCMPEQ,
27518 IX86_BUILTIN_PFCMPGE,
27519 IX86_BUILTIN_PFCMPGT,
27520 IX86_BUILTIN_PFMAX,
27521 IX86_BUILTIN_PFMIN,
27522 IX86_BUILTIN_PFMUL,
27523 IX86_BUILTIN_PFRCP,
27524 IX86_BUILTIN_PFRCPIT1,
27525 IX86_BUILTIN_PFRCPIT2,
27526 IX86_BUILTIN_PFRSQIT1,
27527 IX86_BUILTIN_PFRSQRT,
27528 IX86_BUILTIN_PFSUB,
27529 IX86_BUILTIN_PFSUBR,
27530 IX86_BUILTIN_PI2FD,
27531 IX86_BUILTIN_PMULHRW,
27532
27533 /* 3DNow! Athlon Extensions */
27534 IX86_BUILTIN_PF2IW,
27535 IX86_BUILTIN_PFNACC,
27536 IX86_BUILTIN_PFPNACC,
27537 IX86_BUILTIN_PI2FW,
27538 IX86_BUILTIN_PSWAPDSI,
27539 IX86_BUILTIN_PSWAPDSF,
27540
27541 /* SSE2 */
27542 IX86_BUILTIN_ADDPD,
27543 IX86_BUILTIN_ADDSD,
27544 IX86_BUILTIN_DIVPD,
27545 IX86_BUILTIN_DIVSD,
27546 IX86_BUILTIN_MULPD,
27547 IX86_BUILTIN_MULSD,
27548 IX86_BUILTIN_SUBPD,
27549 IX86_BUILTIN_SUBSD,
27550
27551 IX86_BUILTIN_CMPEQPD,
27552 IX86_BUILTIN_CMPLTPD,
27553 IX86_BUILTIN_CMPLEPD,
27554 IX86_BUILTIN_CMPGTPD,
27555 IX86_BUILTIN_CMPGEPD,
27556 IX86_BUILTIN_CMPNEQPD,
27557 IX86_BUILTIN_CMPNLTPD,
27558 IX86_BUILTIN_CMPNLEPD,
27559 IX86_BUILTIN_CMPNGTPD,
27560 IX86_BUILTIN_CMPNGEPD,
27561 IX86_BUILTIN_CMPORDPD,
27562 IX86_BUILTIN_CMPUNORDPD,
27563 IX86_BUILTIN_CMPEQSD,
27564 IX86_BUILTIN_CMPLTSD,
27565 IX86_BUILTIN_CMPLESD,
27566 IX86_BUILTIN_CMPNEQSD,
27567 IX86_BUILTIN_CMPNLTSD,
27568 IX86_BUILTIN_CMPNLESD,
27569 IX86_BUILTIN_CMPORDSD,
27570 IX86_BUILTIN_CMPUNORDSD,
27571
27572 IX86_BUILTIN_COMIEQSD,
27573 IX86_BUILTIN_COMILTSD,
27574 IX86_BUILTIN_COMILESD,
27575 IX86_BUILTIN_COMIGTSD,
27576 IX86_BUILTIN_COMIGESD,
27577 IX86_BUILTIN_COMINEQSD,
27578 IX86_BUILTIN_UCOMIEQSD,
27579 IX86_BUILTIN_UCOMILTSD,
27580 IX86_BUILTIN_UCOMILESD,
27581 IX86_BUILTIN_UCOMIGTSD,
27582 IX86_BUILTIN_UCOMIGESD,
27583 IX86_BUILTIN_UCOMINEQSD,
27584
27585 IX86_BUILTIN_MAXPD,
27586 IX86_BUILTIN_MAXSD,
27587 IX86_BUILTIN_MINPD,
27588 IX86_BUILTIN_MINSD,
27589
27590 IX86_BUILTIN_ANDPD,
27591 IX86_BUILTIN_ANDNPD,
27592 IX86_BUILTIN_ORPD,
27593 IX86_BUILTIN_XORPD,
27594
27595 IX86_BUILTIN_SQRTPD,
27596 IX86_BUILTIN_SQRTSD,
27597
27598 IX86_BUILTIN_UNPCKHPD,
27599 IX86_BUILTIN_UNPCKLPD,
27600
27601 IX86_BUILTIN_SHUFPD,
27602
27603 IX86_BUILTIN_LOADUPD,
27604 IX86_BUILTIN_STOREUPD,
27605 IX86_BUILTIN_MOVSD,
27606
27607 IX86_BUILTIN_LOADHPD,
27608 IX86_BUILTIN_LOADLPD,
27609
27610 IX86_BUILTIN_CVTDQ2PD,
27611 IX86_BUILTIN_CVTDQ2PS,
27612
27613 IX86_BUILTIN_CVTPD2DQ,
27614 IX86_BUILTIN_CVTPD2PI,
27615 IX86_BUILTIN_CVTPD2PS,
27616 IX86_BUILTIN_CVTTPD2DQ,
27617 IX86_BUILTIN_CVTTPD2PI,
27618
27619 IX86_BUILTIN_CVTPI2PD,
27620 IX86_BUILTIN_CVTSI2SD,
27621 IX86_BUILTIN_CVTSI642SD,
27622
27623 IX86_BUILTIN_CVTSD2SI,
27624 IX86_BUILTIN_CVTSD2SI64,
27625 IX86_BUILTIN_CVTSD2SS,
27626 IX86_BUILTIN_CVTSS2SD,
27627 IX86_BUILTIN_CVTTSD2SI,
27628 IX86_BUILTIN_CVTTSD2SI64,
27629
27630 IX86_BUILTIN_CVTPS2DQ,
27631 IX86_BUILTIN_CVTPS2PD,
27632 IX86_BUILTIN_CVTTPS2DQ,
27633
27634 IX86_BUILTIN_MOVNTI,
27635 IX86_BUILTIN_MOVNTI64,
27636 IX86_BUILTIN_MOVNTPD,
27637 IX86_BUILTIN_MOVNTDQ,
27638
27639 IX86_BUILTIN_MOVQ128,
27640
27641 /* SSE2 MMX */
27642 IX86_BUILTIN_MASKMOVDQU,
27643 IX86_BUILTIN_MOVMSKPD,
27644 IX86_BUILTIN_PMOVMSKB128,
27645
27646 IX86_BUILTIN_PACKSSWB128,
27647 IX86_BUILTIN_PACKSSDW128,
27648 IX86_BUILTIN_PACKUSWB128,
27649
27650 IX86_BUILTIN_PADDB128,
27651 IX86_BUILTIN_PADDW128,
27652 IX86_BUILTIN_PADDD128,
27653 IX86_BUILTIN_PADDQ128,
27654 IX86_BUILTIN_PADDSB128,
27655 IX86_BUILTIN_PADDSW128,
27656 IX86_BUILTIN_PADDUSB128,
27657 IX86_BUILTIN_PADDUSW128,
27658 IX86_BUILTIN_PSUBB128,
27659 IX86_BUILTIN_PSUBW128,
27660 IX86_BUILTIN_PSUBD128,
27661 IX86_BUILTIN_PSUBQ128,
27662 IX86_BUILTIN_PSUBSB128,
27663 IX86_BUILTIN_PSUBSW128,
27664 IX86_BUILTIN_PSUBUSB128,
27665 IX86_BUILTIN_PSUBUSW128,
27666
27667 IX86_BUILTIN_PAND128,
27668 IX86_BUILTIN_PANDN128,
27669 IX86_BUILTIN_POR128,
27670 IX86_BUILTIN_PXOR128,
27671
27672 IX86_BUILTIN_PAVGB128,
27673 IX86_BUILTIN_PAVGW128,
27674
27675 IX86_BUILTIN_PCMPEQB128,
27676 IX86_BUILTIN_PCMPEQW128,
27677 IX86_BUILTIN_PCMPEQD128,
27678 IX86_BUILTIN_PCMPGTB128,
27679 IX86_BUILTIN_PCMPGTW128,
27680 IX86_BUILTIN_PCMPGTD128,
27681
27682 IX86_BUILTIN_PMADDWD128,
27683
27684 IX86_BUILTIN_PMAXSW128,
27685 IX86_BUILTIN_PMAXUB128,
27686 IX86_BUILTIN_PMINSW128,
27687 IX86_BUILTIN_PMINUB128,
27688
27689 IX86_BUILTIN_PMULUDQ,
27690 IX86_BUILTIN_PMULUDQ128,
27691 IX86_BUILTIN_PMULHUW128,
27692 IX86_BUILTIN_PMULHW128,
27693 IX86_BUILTIN_PMULLW128,
27694
27695 IX86_BUILTIN_PSADBW128,
27696 IX86_BUILTIN_PSHUFHW,
27697 IX86_BUILTIN_PSHUFLW,
27698 IX86_BUILTIN_PSHUFD,
27699
27700 IX86_BUILTIN_PSLLDQI128,
27701 IX86_BUILTIN_PSLLWI128,
27702 IX86_BUILTIN_PSLLDI128,
27703 IX86_BUILTIN_PSLLQI128,
27704 IX86_BUILTIN_PSRAWI128,
27705 IX86_BUILTIN_PSRADI128,
27706 IX86_BUILTIN_PSRLDQI128,
27707 IX86_BUILTIN_PSRLWI128,
27708 IX86_BUILTIN_PSRLDI128,
27709 IX86_BUILTIN_PSRLQI128,
27710
27711 IX86_BUILTIN_PSLLDQ128,
27712 IX86_BUILTIN_PSLLW128,
27713 IX86_BUILTIN_PSLLD128,
27714 IX86_BUILTIN_PSLLQ128,
27715 IX86_BUILTIN_PSRAW128,
27716 IX86_BUILTIN_PSRAD128,
27717 IX86_BUILTIN_PSRLW128,
27718 IX86_BUILTIN_PSRLD128,
27719 IX86_BUILTIN_PSRLQ128,
27720
27721 IX86_BUILTIN_PUNPCKHBW128,
27722 IX86_BUILTIN_PUNPCKHWD128,
27723 IX86_BUILTIN_PUNPCKHDQ128,
27724 IX86_BUILTIN_PUNPCKHQDQ128,
27725 IX86_BUILTIN_PUNPCKLBW128,
27726 IX86_BUILTIN_PUNPCKLWD128,
27727 IX86_BUILTIN_PUNPCKLDQ128,
27728 IX86_BUILTIN_PUNPCKLQDQ128,
27729
27730 IX86_BUILTIN_CLFLUSH,
27731 IX86_BUILTIN_MFENCE,
27732 IX86_BUILTIN_LFENCE,
27733 IX86_BUILTIN_PAUSE,
27734
27735 IX86_BUILTIN_FNSTENV,
27736 IX86_BUILTIN_FLDENV,
27737 IX86_BUILTIN_FNSTSW,
27738 IX86_BUILTIN_FNCLEX,
27739
27740 IX86_BUILTIN_BSRSI,
27741 IX86_BUILTIN_BSRDI,
27742 IX86_BUILTIN_RDPMC,
27743 IX86_BUILTIN_RDTSC,
27744 IX86_BUILTIN_RDTSCP,
27745 IX86_BUILTIN_ROLQI,
27746 IX86_BUILTIN_ROLHI,
27747 IX86_BUILTIN_RORQI,
27748 IX86_BUILTIN_RORHI,
27749
27750 /* SSE3. */
27751 IX86_BUILTIN_ADDSUBPS,
27752 IX86_BUILTIN_HADDPS,
27753 IX86_BUILTIN_HSUBPS,
27754 IX86_BUILTIN_MOVSHDUP,
27755 IX86_BUILTIN_MOVSLDUP,
27756 IX86_BUILTIN_ADDSUBPD,
27757 IX86_BUILTIN_HADDPD,
27758 IX86_BUILTIN_HSUBPD,
27759 IX86_BUILTIN_LDDQU,
27760
27761 IX86_BUILTIN_MONITOR,
27762 IX86_BUILTIN_MWAIT,
27763
27764 /* SSSE3. */
27765 IX86_BUILTIN_PHADDW,
27766 IX86_BUILTIN_PHADDD,
27767 IX86_BUILTIN_PHADDSW,
27768 IX86_BUILTIN_PHSUBW,
27769 IX86_BUILTIN_PHSUBD,
27770 IX86_BUILTIN_PHSUBSW,
27771 IX86_BUILTIN_PMADDUBSW,
27772 IX86_BUILTIN_PMULHRSW,
27773 IX86_BUILTIN_PSHUFB,
27774 IX86_BUILTIN_PSIGNB,
27775 IX86_BUILTIN_PSIGNW,
27776 IX86_BUILTIN_PSIGND,
27777 IX86_BUILTIN_PALIGNR,
27778 IX86_BUILTIN_PABSB,
27779 IX86_BUILTIN_PABSW,
27780 IX86_BUILTIN_PABSD,
27781
27782 IX86_BUILTIN_PHADDW128,
27783 IX86_BUILTIN_PHADDD128,
27784 IX86_BUILTIN_PHADDSW128,
27785 IX86_BUILTIN_PHSUBW128,
27786 IX86_BUILTIN_PHSUBD128,
27787 IX86_BUILTIN_PHSUBSW128,
27788 IX86_BUILTIN_PMADDUBSW128,
27789 IX86_BUILTIN_PMULHRSW128,
27790 IX86_BUILTIN_PSHUFB128,
27791 IX86_BUILTIN_PSIGNB128,
27792 IX86_BUILTIN_PSIGNW128,
27793 IX86_BUILTIN_PSIGND128,
27794 IX86_BUILTIN_PALIGNR128,
27795 IX86_BUILTIN_PABSB128,
27796 IX86_BUILTIN_PABSW128,
27797 IX86_BUILTIN_PABSD128,
27798
27799 /* AMDFAM10 - SSE4A New Instructions. */
27800 IX86_BUILTIN_MOVNTSD,
27801 IX86_BUILTIN_MOVNTSS,
27802 IX86_BUILTIN_EXTRQI,
27803 IX86_BUILTIN_EXTRQ,
27804 IX86_BUILTIN_INSERTQI,
27805 IX86_BUILTIN_INSERTQ,
27806
27807 /* SSE4.1. */
27808 IX86_BUILTIN_BLENDPD,
27809 IX86_BUILTIN_BLENDPS,
27810 IX86_BUILTIN_BLENDVPD,
27811 IX86_BUILTIN_BLENDVPS,
27812 IX86_BUILTIN_PBLENDVB128,
27813 IX86_BUILTIN_PBLENDW128,
27814
27815 IX86_BUILTIN_DPPD,
27816 IX86_BUILTIN_DPPS,
27817
27818 IX86_BUILTIN_INSERTPS128,
27819
27820 IX86_BUILTIN_MOVNTDQA,
27821 IX86_BUILTIN_MPSADBW128,
27822 IX86_BUILTIN_PACKUSDW128,
27823 IX86_BUILTIN_PCMPEQQ,
27824 IX86_BUILTIN_PHMINPOSUW128,
27825
27826 IX86_BUILTIN_PMAXSB128,
27827 IX86_BUILTIN_PMAXSD128,
27828 IX86_BUILTIN_PMAXUD128,
27829 IX86_BUILTIN_PMAXUW128,
27830
27831 IX86_BUILTIN_PMINSB128,
27832 IX86_BUILTIN_PMINSD128,
27833 IX86_BUILTIN_PMINUD128,
27834 IX86_BUILTIN_PMINUW128,
27835
27836 IX86_BUILTIN_PMOVSXBW128,
27837 IX86_BUILTIN_PMOVSXBD128,
27838 IX86_BUILTIN_PMOVSXBQ128,
27839 IX86_BUILTIN_PMOVSXWD128,
27840 IX86_BUILTIN_PMOVSXWQ128,
27841 IX86_BUILTIN_PMOVSXDQ128,
27842
27843 IX86_BUILTIN_PMOVZXBW128,
27844 IX86_BUILTIN_PMOVZXBD128,
27845 IX86_BUILTIN_PMOVZXBQ128,
27846 IX86_BUILTIN_PMOVZXWD128,
27847 IX86_BUILTIN_PMOVZXWQ128,
27848 IX86_BUILTIN_PMOVZXDQ128,
27849
27850 IX86_BUILTIN_PMULDQ128,
27851 IX86_BUILTIN_PMULLD128,
27852
27853 IX86_BUILTIN_ROUNDSD,
27854 IX86_BUILTIN_ROUNDSS,
27855
27856 IX86_BUILTIN_ROUNDPD,
27857 IX86_BUILTIN_ROUNDPS,
27858
27859 IX86_BUILTIN_FLOORPD,
27860 IX86_BUILTIN_CEILPD,
27861 IX86_BUILTIN_TRUNCPD,
27862 IX86_BUILTIN_RINTPD,
27863 IX86_BUILTIN_ROUNDPD_AZ,
27864
27865 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
27866 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
27867 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
27868
27869 IX86_BUILTIN_FLOORPS,
27870 IX86_BUILTIN_CEILPS,
27871 IX86_BUILTIN_TRUNCPS,
27872 IX86_BUILTIN_RINTPS,
27873 IX86_BUILTIN_ROUNDPS_AZ,
27874
27875 IX86_BUILTIN_FLOORPS_SFIX,
27876 IX86_BUILTIN_CEILPS_SFIX,
27877 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
27878
27879 IX86_BUILTIN_PTESTZ,
27880 IX86_BUILTIN_PTESTC,
27881 IX86_BUILTIN_PTESTNZC,
27882
27883 IX86_BUILTIN_VEC_INIT_V2SI,
27884 IX86_BUILTIN_VEC_INIT_V4HI,
27885 IX86_BUILTIN_VEC_INIT_V8QI,
27886 IX86_BUILTIN_VEC_EXT_V2DF,
27887 IX86_BUILTIN_VEC_EXT_V2DI,
27888 IX86_BUILTIN_VEC_EXT_V4SF,
27889 IX86_BUILTIN_VEC_EXT_V4SI,
27890 IX86_BUILTIN_VEC_EXT_V8HI,
27891 IX86_BUILTIN_VEC_EXT_V2SI,
27892 IX86_BUILTIN_VEC_EXT_V4HI,
27893 IX86_BUILTIN_VEC_EXT_V16QI,
27894 IX86_BUILTIN_VEC_SET_V2DI,
27895 IX86_BUILTIN_VEC_SET_V4SF,
27896 IX86_BUILTIN_VEC_SET_V4SI,
27897 IX86_BUILTIN_VEC_SET_V8HI,
27898 IX86_BUILTIN_VEC_SET_V4HI,
27899 IX86_BUILTIN_VEC_SET_V16QI,
27900
27901 IX86_BUILTIN_VEC_PACK_SFIX,
27902 IX86_BUILTIN_VEC_PACK_SFIX256,
27903
27904 /* SSE4.2. */
27905 IX86_BUILTIN_CRC32QI,
27906 IX86_BUILTIN_CRC32HI,
27907 IX86_BUILTIN_CRC32SI,
27908 IX86_BUILTIN_CRC32DI,
27909
27910 IX86_BUILTIN_PCMPESTRI128,
27911 IX86_BUILTIN_PCMPESTRM128,
27912 IX86_BUILTIN_PCMPESTRA128,
27913 IX86_BUILTIN_PCMPESTRC128,
27914 IX86_BUILTIN_PCMPESTRO128,
27915 IX86_BUILTIN_PCMPESTRS128,
27916 IX86_BUILTIN_PCMPESTRZ128,
27917 IX86_BUILTIN_PCMPISTRI128,
27918 IX86_BUILTIN_PCMPISTRM128,
27919 IX86_BUILTIN_PCMPISTRA128,
27920 IX86_BUILTIN_PCMPISTRC128,
27921 IX86_BUILTIN_PCMPISTRO128,
27922 IX86_BUILTIN_PCMPISTRS128,
27923 IX86_BUILTIN_PCMPISTRZ128,
27924
27925 IX86_BUILTIN_PCMPGTQ,
27926
27927 /* AES instructions */
27928 IX86_BUILTIN_AESENC128,
27929 IX86_BUILTIN_AESENCLAST128,
27930 IX86_BUILTIN_AESDEC128,
27931 IX86_BUILTIN_AESDECLAST128,
27932 IX86_BUILTIN_AESIMC128,
27933 IX86_BUILTIN_AESKEYGENASSIST128,
27934
27935 /* PCLMUL instruction */
27936 IX86_BUILTIN_PCLMULQDQ128,
27937
27938 /* AVX */
27939 IX86_BUILTIN_ADDPD256,
27940 IX86_BUILTIN_ADDPS256,
27941 IX86_BUILTIN_ADDSUBPD256,
27942 IX86_BUILTIN_ADDSUBPS256,
27943 IX86_BUILTIN_ANDPD256,
27944 IX86_BUILTIN_ANDPS256,
27945 IX86_BUILTIN_ANDNPD256,
27946 IX86_BUILTIN_ANDNPS256,
27947 IX86_BUILTIN_BLENDPD256,
27948 IX86_BUILTIN_BLENDPS256,
27949 IX86_BUILTIN_BLENDVPD256,
27950 IX86_BUILTIN_BLENDVPS256,
27951 IX86_BUILTIN_DIVPD256,
27952 IX86_BUILTIN_DIVPS256,
27953 IX86_BUILTIN_DPPS256,
27954 IX86_BUILTIN_HADDPD256,
27955 IX86_BUILTIN_HADDPS256,
27956 IX86_BUILTIN_HSUBPD256,
27957 IX86_BUILTIN_HSUBPS256,
27958 IX86_BUILTIN_MAXPD256,
27959 IX86_BUILTIN_MAXPS256,
27960 IX86_BUILTIN_MINPD256,
27961 IX86_BUILTIN_MINPS256,
27962 IX86_BUILTIN_MULPD256,
27963 IX86_BUILTIN_MULPS256,
27964 IX86_BUILTIN_ORPD256,
27965 IX86_BUILTIN_ORPS256,
27966 IX86_BUILTIN_SHUFPD256,
27967 IX86_BUILTIN_SHUFPS256,
27968 IX86_BUILTIN_SUBPD256,
27969 IX86_BUILTIN_SUBPS256,
27970 IX86_BUILTIN_XORPD256,
27971 IX86_BUILTIN_XORPS256,
27972 IX86_BUILTIN_CMPSD,
27973 IX86_BUILTIN_CMPSS,
27974 IX86_BUILTIN_CMPPD,
27975 IX86_BUILTIN_CMPPS,
27976 IX86_BUILTIN_CMPPD256,
27977 IX86_BUILTIN_CMPPS256,
27978 IX86_BUILTIN_CVTDQ2PD256,
27979 IX86_BUILTIN_CVTDQ2PS256,
27980 IX86_BUILTIN_CVTPD2PS256,
27981 IX86_BUILTIN_CVTPS2DQ256,
27982 IX86_BUILTIN_CVTPS2PD256,
27983 IX86_BUILTIN_CVTTPD2DQ256,
27984 IX86_BUILTIN_CVTPD2DQ256,
27985 IX86_BUILTIN_CVTTPS2DQ256,
27986 IX86_BUILTIN_EXTRACTF128PD256,
27987 IX86_BUILTIN_EXTRACTF128PS256,
27988 IX86_BUILTIN_EXTRACTF128SI256,
27989 IX86_BUILTIN_VZEROALL,
27990 IX86_BUILTIN_VZEROUPPER,
27991 IX86_BUILTIN_VPERMILVARPD,
27992 IX86_BUILTIN_VPERMILVARPS,
27993 IX86_BUILTIN_VPERMILVARPD256,
27994 IX86_BUILTIN_VPERMILVARPS256,
27995 IX86_BUILTIN_VPERMILPD,
27996 IX86_BUILTIN_VPERMILPS,
27997 IX86_BUILTIN_VPERMILPD256,
27998 IX86_BUILTIN_VPERMILPS256,
27999 IX86_BUILTIN_VPERMIL2PD,
28000 IX86_BUILTIN_VPERMIL2PS,
28001 IX86_BUILTIN_VPERMIL2PD256,
28002 IX86_BUILTIN_VPERMIL2PS256,
28003 IX86_BUILTIN_VPERM2F128PD256,
28004 IX86_BUILTIN_VPERM2F128PS256,
28005 IX86_BUILTIN_VPERM2F128SI256,
28006 IX86_BUILTIN_VBROADCASTSS,
28007 IX86_BUILTIN_VBROADCASTSD256,
28008 IX86_BUILTIN_VBROADCASTSS256,
28009 IX86_BUILTIN_VBROADCASTPD256,
28010 IX86_BUILTIN_VBROADCASTPS256,
28011 IX86_BUILTIN_VINSERTF128PD256,
28012 IX86_BUILTIN_VINSERTF128PS256,
28013 IX86_BUILTIN_VINSERTF128SI256,
28014 IX86_BUILTIN_LOADUPD256,
28015 IX86_BUILTIN_LOADUPS256,
28016 IX86_BUILTIN_STOREUPD256,
28017 IX86_BUILTIN_STOREUPS256,
28018 IX86_BUILTIN_LDDQU256,
28019 IX86_BUILTIN_MOVNTDQ256,
28020 IX86_BUILTIN_MOVNTPD256,
28021 IX86_BUILTIN_MOVNTPS256,
28022 IX86_BUILTIN_LOADDQU256,
28023 IX86_BUILTIN_STOREDQU256,
28024 IX86_BUILTIN_MASKLOADPD,
28025 IX86_BUILTIN_MASKLOADPS,
28026 IX86_BUILTIN_MASKSTOREPD,
28027 IX86_BUILTIN_MASKSTOREPS,
28028 IX86_BUILTIN_MASKLOADPD256,
28029 IX86_BUILTIN_MASKLOADPS256,
28030 IX86_BUILTIN_MASKSTOREPD256,
28031 IX86_BUILTIN_MASKSTOREPS256,
28032 IX86_BUILTIN_MOVSHDUP256,
28033 IX86_BUILTIN_MOVSLDUP256,
28034 IX86_BUILTIN_MOVDDUP256,
28035
28036 IX86_BUILTIN_SQRTPD256,
28037 IX86_BUILTIN_SQRTPS256,
28038 IX86_BUILTIN_SQRTPS_NR256,
28039 IX86_BUILTIN_RSQRTPS256,
28040 IX86_BUILTIN_RSQRTPS_NR256,
28041
28042 IX86_BUILTIN_RCPPS256,
28043
28044 IX86_BUILTIN_ROUNDPD256,
28045 IX86_BUILTIN_ROUNDPS256,
28046
28047 IX86_BUILTIN_FLOORPD256,
28048 IX86_BUILTIN_CEILPD256,
28049 IX86_BUILTIN_TRUNCPD256,
28050 IX86_BUILTIN_RINTPD256,
28051 IX86_BUILTIN_ROUNDPD_AZ256,
28052
28053 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
28054 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
28055 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
28056
28057 IX86_BUILTIN_FLOORPS256,
28058 IX86_BUILTIN_CEILPS256,
28059 IX86_BUILTIN_TRUNCPS256,
28060 IX86_BUILTIN_RINTPS256,
28061 IX86_BUILTIN_ROUNDPS_AZ256,
28062
28063 IX86_BUILTIN_FLOORPS_SFIX256,
28064 IX86_BUILTIN_CEILPS_SFIX256,
28065 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
28066
28067 IX86_BUILTIN_UNPCKHPD256,
28068 IX86_BUILTIN_UNPCKLPD256,
28069 IX86_BUILTIN_UNPCKHPS256,
28070 IX86_BUILTIN_UNPCKLPS256,
28071
28072 IX86_BUILTIN_SI256_SI,
28073 IX86_BUILTIN_PS256_PS,
28074 IX86_BUILTIN_PD256_PD,
28075 IX86_BUILTIN_SI_SI256,
28076 IX86_BUILTIN_PS_PS256,
28077 IX86_BUILTIN_PD_PD256,
28078
28079 IX86_BUILTIN_VTESTZPD,
28080 IX86_BUILTIN_VTESTCPD,
28081 IX86_BUILTIN_VTESTNZCPD,
28082 IX86_BUILTIN_VTESTZPS,
28083 IX86_BUILTIN_VTESTCPS,
28084 IX86_BUILTIN_VTESTNZCPS,
28085 IX86_BUILTIN_VTESTZPD256,
28086 IX86_BUILTIN_VTESTCPD256,
28087 IX86_BUILTIN_VTESTNZCPD256,
28088 IX86_BUILTIN_VTESTZPS256,
28089 IX86_BUILTIN_VTESTCPS256,
28090 IX86_BUILTIN_VTESTNZCPS256,
28091 IX86_BUILTIN_PTESTZ256,
28092 IX86_BUILTIN_PTESTC256,
28093 IX86_BUILTIN_PTESTNZC256,
28094
28095 IX86_BUILTIN_MOVMSKPD256,
28096 IX86_BUILTIN_MOVMSKPS256,
28097
28098 /* AVX2 */
28099 IX86_BUILTIN_MPSADBW256,
28100 IX86_BUILTIN_PABSB256,
28101 IX86_BUILTIN_PABSW256,
28102 IX86_BUILTIN_PABSD256,
28103 IX86_BUILTIN_PACKSSDW256,
28104 IX86_BUILTIN_PACKSSWB256,
28105 IX86_BUILTIN_PACKUSDW256,
28106 IX86_BUILTIN_PACKUSWB256,
28107 IX86_BUILTIN_PADDB256,
28108 IX86_BUILTIN_PADDW256,
28109 IX86_BUILTIN_PADDD256,
28110 IX86_BUILTIN_PADDQ256,
28111 IX86_BUILTIN_PADDSB256,
28112 IX86_BUILTIN_PADDSW256,
28113 IX86_BUILTIN_PADDUSB256,
28114 IX86_BUILTIN_PADDUSW256,
28115 IX86_BUILTIN_PALIGNR256,
28116 IX86_BUILTIN_AND256I,
28117 IX86_BUILTIN_ANDNOT256I,
28118 IX86_BUILTIN_PAVGB256,
28119 IX86_BUILTIN_PAVGW256,
28120 IX86_BUILTIN_PBLENDVB256,
28121 IX86_BUILTIN_PBLENDVW256,
28122 IX86_BUILTIN_PCMPEQB256,
28123 IX86_BUILTIN_PCMPEQW256,
28124 IX86_BUILTIN_PCMPEQD256,
28125 IX86_BUILTIN_PCMPEQQ256,
28126 IX86_BUILTIN_PCMPGTB256,
28127 IX86_BUILTIN_PCMPGTW256,
28128 IX86_BUILTIN_PCMPGTD256,
28129 IX86_BUILTIN_PCMPGTQ256,
28130 IX86_BUILTIN_PHADDW256,
28131 IX86_BUILTIN_PHADDD256,
28132 IX86_BUILTIN_PHADDSW256,
28133 IX86_BUILTIN_PHSUBW256,
28134 IX86_BUILTIN_PHSUBD256,
28135 IX86_BUILTIN_PHSUBSW256,
28136 IX86_BUILTIN_PMADDUBSW256,
28137 IX86_BUILTIN_PMADDWD256,
28138 IX86_BUILTIN_PMAXSB256,
28139 IX86_BUILTIN_PMAXSW256,
28140 IX86_BUILTIN_PMAXSD256,
28141 IX86_BUILTIN_PMAXUB256,
28142 IX86_BUILTIN_PMAXUW256,
28143 IX86_BUILTIN_PMAXUD256,
28144 IX86_BUILTIN_PMINSB256,
28145 IX86_BUILTIN_PMINSW256,
28146 IX86_BUILTIN_PMINSD256,
28147 IX86_BUILTIN_PMINUB256,
28148 IX86_BUILTIN_PMINUW256,
28149 IX86_BUILTIN_PMINUD256,
28150 IX86_BUILTIN_PMOVMSKB256,
28151 IX86_BUILTIN_PMOVSXBW256,
28152 IX86_BUILTIN_PMOVSXBD256,
28153 IX86_BUILTIN_PMOVSXBQ256,
28154 IX86_BUILTIN_PMOVSXWD256,
28155 IX86_BUILTIN_PMOVSXWQ256,
28156 IX86_BUILTIN_PMOVSXDQ256,
28157 IX86_BUILTIN_PMOVZXBW256,
28158 IX86_BUILTIN_PMOVZXBD256,
28159 IX86_BUILTIN_PMOVZXBQ256,
28160 IX86_BUILTIN_PMOVZXWD256,
28161 IX86_BUILTIN_PMOVZXWQ256,
28162 IX86_BUILTIN_PMOVZXDQ256,
28163 IX86_BUILTIN_PMULDQ256,
28164 IX86_BUILTIN_PMULHRSW256,
28165 IX86_BUILTIN_PMULHUW256,
28166 IX86_BUILTIN_PMULHW256,
28167 IX86_BUILTIN_PMULLW256,
28168 IX86_BUILTIN_PMULLD256,
28169 IX86_BUILTIN_PMULUDQ256,
28170 IX86_BUILTIN_POR256,
28171 IX86_BUILTIN_PSADBW256,
28172 IX86_BUILTIN_PSHUFB256,
28173 IX86_BUILTIN_PSHUFD256,
28174 IX86_BUILTIN_PSHUFHW256,
28175 IX86_BUILTIN_PSHUFLW256,
28176 IX86_BUILTIN_PSIGNB256,
28177 IX86_BUILTIN_PSIGNW256,
28178 IX86_BUILTIN_PSIGND256,
28179 IX86_BUILTIN_PSLLDQI256,
28180 IX86_BUILTIN_PSLLWI256,
28181 IX86_BUILTIN_PSLLW256,
28182 IX86_BUILTIN_PSLLDI256,
28183 IX86_BUILTIN_PSLLD256,
28184 IX86_BUILTIN_PSLLQI256,
28185 IX86_BUILTIN_PSLLQ256,
28186 IX86_BUILTIN_PSRAWI256,
28187 IX86_BUILTIN_PSRAW256,
28188 IX86_BUILTIN_PSRADI256,
28189 IX86_BUILTIN_PSRAD256,
28190 IX86_BUILTIN_PSRLDQI256,
28191 IX86_BUILTIN_PSRLWI256,
28192 IX86_BUILTIN_PSRLW256,
28193 IX86_BUILTIN_PSRLDI256,
28194 IX86_BUILTIN_PSRLD256,
28195 IX86_BUILTIN_PSRLQI256,
28196 IX86_BUILTIN_PSRLQ256,
28197 IX86_BUILTIN_PSUBB256,
28198 IX86_BUILTIN_PSUBW256,
28199 IX86_BUILTIN_PSUBD256,
28200 IX86_BUILTIN_PSUBQ256,
28201 IX86_BUILTIN_PSUBSB256,
28202 IX86_BUILTIN_PSUBSW256,
28203 IX86_BUILTIN_PSUBUSB256,
28204 IX86_BUILTIN_PSUBUSW256,
28205 IX86_BUILTIN_PUNPCKHBW256,
28206 IX86_BUILTIN_PUNPCKHWD256,
28207 IX86_BUILTIN_PUNPCKHDQ256,
28208 IX86_BUILTIN_PUNPCKHQDQ256,
28209 IX86_BUILTIN_PUNPCKLBW256,
28210 IX86_BUILTIN_PUNPCKLWD256,
28211 IX86_BUILTIN_PUNPCKLDQ256,
28212 IX86_BUILTIN_PUNPCKLQDQ256,
28213 IX86_BUILTIN_PXOR256,
28214 IX86_BUILTIN_MOVNTDQA256,
28215 IX86_BUILTIN_VBROADCASTSS_PS,
28216 IX86_BUILTIN_VBROADCASTSS_PS256,
28217 IX86_BUILTIN_VBROADCASTSD_PD256,
28218 IX86_BUILTIN_VBROADCASTSI256,
28219 IX86_BUILTIN_PBLENDD256,
28220 IX86_BUILTIN_PBLENDD128,
28221 IX86_BUILTIN_PBROADCASTB256,
28222 IX86_BUILTIN_PBROADCASTW256,
28223 IX86_BUILTIN_PBROADCASTD256,
28224 IX86_BUILTIN_PBROADCASTQ256,
28225 IX86_BUILTIN_PBROADCASTB128,
28226 IX86_BUILTIN_PBROADCASTW128,
28227 IX86_BUILTIN_PBROADCASTD128,
28228 IX86_BUILTIN_PBROADCASTQ128,
28229 IX86_BUILTIN_VPERMVARSI256,
28230 IX86_BUILTIN_VPERMDF256,
28231 IX86_BUILTIN_VPERMVARSF256,
28232 IX86_BUILTIN_VPERMDI256,
28233 IX86_BUILTIN_VPERMTI256,
28234 IX86_BUILTIN_VEXTRACT128I256,
28235 IX86_BUILTIN_VINSERT128I256,
28236 IX86_BUILTIN_MASKLOADD,
28237 IX86_BUILTIN_MASKLOADQ,
28238 IX86_BUILTIN_MASKLOADD256,
28239 IX86_BUILTIN_MASKLOADQ256,
28240 IX86_BUILTIN_MASKSTORED,
28241 IX86_BUILTIN_MASKSTOREQ,
28242 IX86_BUILTIN_MASKSTORED256,
28243 IX86_BUILTIN_MASKSTOREQ256,
28244 IX86_BUILTIN_PSLLVV4DI,
28245 IX86_BUILTIN_PSLLVV2DI,
28246 IX86_BUILTIN_PSLLVV8SI,
28247 IX86_BUILTIN_PSLLVV4SI,
28248 IX86_BUILTIN_PSRAVV8SI,
28249 IX86_BUILTIN_PSRAVV4SI,
28250 IX86_BUILTIN_PSRLVV4DI,
28251 IX86_BUILTIN_PSRLVV2DI,
28252 IX86_BUILTIN_PSRLVV8SI,
28253 IX86_BUILTIN_PSRLVV4SI,
28254
28255 IX86_BUILTIN_GATHERSIV2DF,
28256 IX86_BUILTIN_GATHERSIV4DF,
28257 IX86_BUILTIN_GATHERDIV2DF,
28258 IX86_BUILTIN_GATHERDIV4DF,
28259 IX86_BUILTIN_GATHERSIV4SF,
28260 IX86_BUILTIN_GATHERSIV8SF,
28261 IX86_BUILTIN_GATHERDIV4SF,
28262 IX86_BUILTIN_GATHERDIV8SF,
28263 IX86_BUILTIN_GATHERSIV2DI,
28264 IX86_BUILTIN_GATHERSIV4DI,
28265 IX86_BUILTIN_GATHERDIV2DI,
28266 IX86_BUILTIN_GATHERDIV4DI,
28267 IX86_BUILTIN_GATHERSIV4SI,
28268 IX86_BUILTIN_GATHERSIV8SI,
28269 IX86_BUILTIN_GATHERDIV4SI,
28270 IX86_BUILTIN_GATHERDIV8SI,
28271
28272 /* AVX512F */
28273 IX86_BUILTIN_SI512_SI256,
28274 IX86_BUILTIN_PD512_PD256,
28275 IX86_BUILTIN_PS512_PS256,
28276 IX86_BUILTIN_SI512_SI,
28277 IX86_BUILTIN_PD512_PD,
28278 IX86_BUILTIN_PS512_PS,
28279 IX86_BUILTIN_ADDPD512,
28280 IX86_BUILTIN_ADDPS512,
28281 IX86_BUILTIN_ADDSD_ROUND,
28282 IX86_BUILTIN_ADDSS_ROUND,
28283 IX86_BUILTIN_ALIGND512,
28284 IX86_BUILTIN_ALIGNQ512,
28285 IX86_BUILTIN_BLENDMD512,
28286 IX86_BUILTIN_BLENDMPD512,
28287 IX86_BUILTIN_BLENDMPS512,
28288 IX86_BUILTIN_BLENDMQ512,
28289 IX86_BUILTIN_BROADCASTF32X4_512,
28290 IX86_BUILTIN_BROADCASTF64X4_512,
28291 IX86_BUILTIN_BROADCASTI32X4_512,
28292 IX86_BUILTIN_BROADCASTI64X4_512,
28293 IX86_BUILTIN_BROADCASTSD512,
28294 IX86_BUILTIN_BROADCASTSS512,
28295 IX86_BUILTIN_CMPD512,
28296 IX86_BUILTIN_CMPPD512,
28297 IX86_BUILTIN_CMPPS512,
28298 IX86_BUILTIN_CMPQ512,
28299 IX86_BUILTIN_CMPSD_MASK,
28300 IX86_BUILTIN_CMPSS_MASK,
28301 IX86_BUILTIN_COMIDF,
28302 IX86_BUILTIN_COMISF,
28303 IX86_BUILTIN_COMPRESSPD512,
28304 IX86_BUILTIN_COMPRESSPDSTORE512,
28305 IX86_BUILTIN_COMPRESSPS512,
28306 IX86_BUILTIN_COMPRESSPSSTORE512,
28307 IX86_BUILTIN_CVTDQ2PD512,
28308 IX86_BUILTIN_CVTDQ2PS512,
28309 IX86_BUILTIN_CVTPD2DQ512,
28310 IX86_BUILTIN_CVTPD2PS512,
28311 IX86_BUILTIN_CVTPD2UDQ512,
28312 IX86_BUILTIN_CVTPH2PS512,
28313 IX86_BUILTIN_CVTPS2DQ512,
28314 IX86_BUILTIN_CVTPS2PD512,
28315 IX86_BUILTIN_CVTPS2PH512,
28316 IX86_BUILTIN_CVTPS2UDQ512,
28317 IX86_BUILTIN_CVTSD2SS_ROUND,
28318 IX86_BUILTIN_CVTSI2SD64,
28319 IX86_BUILTIN_CVTSI2SS32,
28320 IX86_BUILTIN_CVTSI2SS64,
28321 IX86_BUILTIN_CVTSS2SD_ROUND,
28322 IX86_BUILTIN_CVTTPD2DQ512,
28323 IX86_BUILTIN_CVTTPD2UDQ512,
28324 IX86_BUILTIN_CVTTPS2DQ512,
28325 IX86_BUILTIN_CVTTPS2UDQ512,
28326 IX86_BUILTIN_CVTUDQ2PD512,
28327 IX86_BUILTIN_CVTUDQ2PS512,
28328 IX86_BUILTIN_CVTUSI2SD32,
28329 IX86_BUILTIN_CVTUSI2SD64,
28330 IX86_BUILTIN_CVTUSI2SS32,
28331 IX86_BUILTIN_CVTUSI2SS64,
28332 IX86_BUILTIN_DIVPD512,
28333 IX86_BUILTIN_DIVPS512,
28334 IX86_BUILTIN_DIVSD_ROUND,
28335 IX86_BUILTIN_DIVSS_ROUND,
28336 IX86_BUILTIN_EXPANDPD512,
28337 IX86_BUILTIN_EXPANDPD512Z,
28338 IX86_BUILTIN_EXPANDPDLOAD512,
28339 IX86_BUILTIN_EXPANDPDLOAD512Z,
28340 IX86_BUILTIN_EXPANDPS512,
28341 IX86_BUILTIN_EXPANDPS512Z,
28342 IX86_BUILTIN_EXPANDPSLOAD512,
28343 IX86_BUILTIN_EXPANDPSLOAD512Z,
28344 IX86_BUILTIN_EXTRACTF32X4,
28345 IX86_BUILTIN_EXTRACTF64X4,
28346 IX86_BUILTIN_EXTRACTI32X4,
28347 IX86_BUILTIN_EXTRACTI64X4,
28348 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28349 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28350 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28351 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28352 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28353 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28354 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28355 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28356 IX86_BUILTIN_GETEXPPD512,
28357 IX86_BUILTIN_GETEXPPS512,
28358 IX86_BUILTIN_GETEXPSD128,
28359 IX86_BUILTIN_GETEXPSS128,
28360 IX86_BUILTIN_GETMANTPD512,
28361 IX86_BUILTIN_GETMANTPS512,
28362 IX86_BUILTIN_GETMANTSD128,
28363 IX86_BUILTIN_GETMANTSS128,
28364 IX86_BUILTIN_INSERTF32X4,
28365 IX86_BUILTIN_INSERTF64X4,
28366 IX86_BUILTIN_INSERTI32X4,
28367 IX86_BUILTIN_INSERTI64X4,
28368 IX86_BUILTIN_LOADAPD512,
28369 IX86_BUILTIN_LOADAPS512,
28370 IX86_BUILTIN_LOADDQUDI512,
28371 IX86_BUILTIN_LOADDQUSI512,
28372 IX86_BUILTIN_LOADUPD512,
28373 IX86_BUILTIN_LOADUPS512,
28374 IX86_BUILTIN_MAXPD512,
28375 IX86_BUILTIN_MAXPS512,
28376 IX86_BUILTIN_MAXSD_ROUND,
28377 IX86_BUILTIN_MAXSS_ROUND,
28378 IX86_BUILTIN_MINPD512,
28379 IX86_BUILTIN_MINPS512,
28380 IX86_BUILTIN_MINSD_ROUND,
28381 IX86_BUILTIN_MINSS_ROUND,
28382 IX86_BUILTIN_MOVAPD512,
28383 IX86_BUILTIN_MOVAPS512,
28384 IX86_BUILTIN_MOVDDUP512,
28385 IX86_BUILTIN_MOVDQA32LOAD512,
28386 IX86_BUILTIN_MOVDQA32STORE512,
28387 IX86_BUILTIN_MOVDQA32_512,
28388 IX86_BUILTIN_MOVDQA64LOAD512,
28389 IX86_BUILTIN_MOVDQA64STORE512,
28390 IX86_BUILTIN_MOVDQA64_512,
28391 IX86_BUILTIN_MOVNTDQ512,
28392 IX86_BUILTIN_MOVNTDQA512,
28393 IX86_BUILTIN_MOVNTPD512,
28394 IX86_BUILTIN_MOVNTPS512,
28395 IX86_BUILTIN_MOVSHDUP512,
28396 IX86_BUILTIN_MOVSLDUP512,
28397 IX86_BUILTIN_MULPD512,
28398 IX86_BUILTIN_MULPS512,
28399 IX86_BUILTIN_MULSD_ROUND,
28400 IX86_BUILTIN_MULSS_ROUND,
28401 IX86_BUILTIN_PABSD512,
28402 IX86_BUILTIN_PABSQ512,
28403 IX86_BUILTIN_PADDD512,
28404 IX86_BUILTIN_PADDQ512,
28405 IX86_BUILTIN_PANDD512,
28406 IX86_BUILTIN_PANDND512,
28407 IX86_BUILTIN_PANDNQ512,
28408 IX86_BUILTIN_PANDQ512,
28409 IX86_BUILTIN_PBROADCASTD512,
28410 IX86_BUILTIN_PBROADCASTD512_GPR,
28411 IX86_BUILTIN_PBROADCASTMB512,
28412 IX86_BUILTIN_PBROADCASTMW512,
28413 IX86_BUILTIN_PBROADCASTQ512,
28414 IX86_BUILTIN_PBROADCASTQ512_GPR,
28415 IX86_BUILTIN_PBROADCASTQ512_MEM,
28416 IX86_BUILTIN_PCMPEQD512_MASK,
28417 IX86_BUILTIN_PCMPEQQ512_MASK,
28418 IX86_BUILTIN_PCMPGTD512_MASK,
28419 IX86_BUILTIN_PCMPGTQ512_MASK,
28420 IX86_BUILTIN_PCOMPRESSD512,
28421 IX86_BUILTIN_PCOMPRESSDSTORE512,
28422 IX86_BUILTIN_PCOMPRESSQ512,
28423 IX86_BUILTIN_PCOMPRESSQSTORE512,
28424 IX86_BUILTIN_PEXPANDD512,
28425 IX86_BUILTIN_PEXPANDD512Z,
28426 IX86_BUILTIN_PEXPANDDLOAD512,
28427 IX86_BUILTIN_PEXPANDDLOAD512Z,
28428 IX86_BUILTIN_PEXPANDQ512,
28429 IX86_BUILTIN_PEXPANDQ512Z,
28430 IX86_BUILTIN_PEXPANDQLOAD512,
28431 IX86_BUILTIN_PEXPANDQLOAD512Z,
28432 IX86_BUILTIN_PMAXSD512,
28433 IX86_BUILTIN_PMAXSQ512,
28434 IX86_BUILTIN_PMAXUD512,
28435 IX86_BUILTIN_PMAXUQ512,
28436 IX86_BUILTIN_PMINSD512,
28437 IX86_BUILTIN_PMINSQ512,
28438 IX86_BUILTIN_PMINUD512,
28439 IX86_BUILTIN_PMINUQ512,
28440 IX86_BUILTIN_PMOVDB512,
28441 IX86_BUILTIN_PMOVDB512_MEM,
28442 IX86_BUILTIN_PMOVDW512,
28443 IX86_BUILTIN_PMOVDW512_MEM,
28444 IX86_BUILTIN_PMOVQB512,
28445 IX86_BUILTIN_PMOVQB512_MEM,
28446 IX86_BUILTIN_PMOVQD512,
28447 IX86_BUILTIN_PMOVQD512_MEM,
28448 IX86_BUILTIN_PMOVQW512,
28449 IX86_BUILTIN_PMOVQW512_MEM,
28450 IX86_BUILTIN_PMOVSDB512,
28451 IX86_BUILTIN_PMOVSDB512_MEM,
28452 IX86_BUILTIN_PMOVSDW512,
28453 IX86_BUILTIN_PMOVSDW512_MEM,
28454 IX86_BUILTIN_PMOVSQB512,
28455 IX86_BUILTIN_PMOVSQB512_MEM,
28456 IX86_BUILTIN_PMOVSQD512,
28457 IX86_BUILTIN_PMOVSQD512_MEM,
28458 IX86_BUILTIN_PMOVSQW512,
28459 IX86_BUILTIN_PMOVSQW512_MEM,
28460 IX86_BUILTIN_PMOVSXBD512,
28461 IX86_BUILTIN_PMOVSXBQ512,
28462 IX86_BUILTIN_PMOVSXDQ512,
28463 IX86_BUILTIN_PMOVSXWD512,
28464 IX86_BUILTIN_PMOVSXWQ512,
28465 IX86_BUILTIN_PMOVUSDB512,
28466 IX86_BUILTIN_PMOVUSDB512_MEM,
28467 IX86_BUILTIN_PMOVUSDW512,
28468 IX86_BUILTIN_PMOVUSDW512_MEM,
28469 IX86_BUILTIN_PMOVUSQB512,
28470 IX86_BUILTIN_PMOVUSQB512_MEM,
28471 IX86_BUILTIN_PMOVUSQD512,
28472 IX86_BUILTIN_PMOVUSQD512_MEM,
28473 IX86_BUILTIN_PMOVUSQW512,
28474 IX86_BUILTIN_PMOVUSQW512_MEM,
28475 IX86_BUILTIN_PMOVZXBD512,
28476 IX86_BUILTIN_PMOVZXBQ512,
28477 IX86_BUILTIN_PMOVZXDQ512,
28478 IX86_BUILTIN_PMOVZXWD512,
28479 IX86_BUILTIN_PMOVZXWQ512,
28480 IX86_BUILTIN_PMULDQ512,
28481 IX86_BUILTIN_PMULLD512,
28482 IX86_BUILTIN_PMULUDQ512,
28483 IX86_BUILTIN_PORD512,
28484 IX86_BUILTIN_PORQ512,
28485 IX86_BUILTIN_PROLD512,
28486 IX86_BUILTIN_PROLQ512,
28487 IX86_BUILTIN_PROLVD512,
28488 IX86_BUILTIN_PROLVQ512,
28489 IX86_BUILTIN_PRORD512,
28490 IX86_BUILTIN_PRORQ512,
28491 IX86_BUILTIN_PRORVD512,
28492 IX86_BUILTIN_PRORVQ512,
28493 IX86_BUILTIN_PSHUFD512,
28494 IX86_BUILTIN_PSLLD512,
28495 IX86_BUILTIN_PSLLDI512,
28496 IX86_BUILTIN_PSLLQ512,
28497 IX86_BUILTIN_PSLLQI512,
28498 IX86_BUILTIN_PSLLVV16SI,
28499 IX86_BUILTIN_PSLLVV8DI,
28500 IX86_BUILTIN_PSRAD512,
28501 IX86_BUILTIN_PSRADI512,
28502 IX86_BUILTIN_PSRAQ512,
28503 IX86_BUILTIN_PSRAQI512,
28504 IX86_BUILTIN_PSRAVV16SI,
28505 IX86_BUILTIN_PSRAVV8DI,
28506 IX86_BUILTIN_PSRLD512,
28507 IX86_BUILTIN_PSRLDI512,
28508 IX86_BUILTIN_PSRLQ512,
28509 IX86_BUILTIN_PSRLQI512,
28510 IX86_BUILTIN_PSRLVV16SI,
28511 IX86_BUILTIN_PSRLVV8DI,
28512 IX86_BUILTIN_PSUBD512,
28513 IX86_BUILTIN_PSUBQ512,
28514 IX86_BUILTIN_PTESTMD512,
28515 IX86_BUILTIN_PTESTMQ512,
28516 IX86_BUILTIN_PTESTNMD512,
28517 IX86_BUILTIN_PTESTNMQ512,
28518 IX86_BUILTIN_PUNPCKHDQ512,
28519 IX86_BUILTIN_PUNPCKHQDQ512,
28520 IX86_BUILTIN_PUNPCKLDQ512,
28521 IX86_BUILTIN_PUNPCKLQDQ512,
28522 IX86_BUILTIN_PXORD512,
28523 IX86_BUILTIN_PXORQ512,
28524 IX86_BUILTIN_RCP14PD512,
28525 IX86_BUILTIN_RCP14PS512,
28526 IX86_BUILTIN_RCP14SD,
28527 IX86_BUILTIN_RCP14SS,
28528 IX86_BUILTIN_RNDSCALEPD,
28529 IX86_BUILTIN_RNDSCALEPS,
28530 IX86_BUILTIN_RNDSCALESD,
28531 IX86_BUILTIN_RNDSCALESS,
28532 IX86_BUILTIN_RSQRT14PD512,
28533 IX86_BUILTIN_RSQRT14PS512,
28534 IX86_BUILTIN_RSQRT14SD,
28535 IX86_BUILTIN_RSQRT14SS,
28536 IX86_BUILTIN_SCALEFPD512,
28537 IX86_BUILTIN_SCALEFPS512,
28538 IX86_BUILTIN_SCALEFSD,
28539 IX86_BUILTIN_SCALEFSS,
28540 IX86_BUILTIN_SHUFPD512,
28541 IX86_BUILTIN_SHUFPS512,
28542 IX86_BUILTIN_SHUF_F32x4,
28543 IX86_BUILTIN_SHUF_F64x2,
28544 IX86_BUILTIN_SHUF_I32x4,
28545 IX86_BUILTIN_SHUF_I64x2,
28546 IX86_BUILTIN_SQRTPD512,
28547 IX86_BUILTIN_SQRTPD512_MASK,
28548 IX86_BUILTIN_SQRTPS512_MASK,
28549 IX86_BUILTIN_SQRTPS_NR512,
28550 IX86_BUILTIN_SQRTSD_ROUND,
28551 IX86_BUILTIN_SQRTSS_ROUND,
28552 IX86_BUILTIN_STOREAPD512,
28553 IX86_BUILTIN_STOREAPS512,
28554 IX86_BUILTIN_STOREDQUDI512,
28555 IX86_BUILTIN_STOREDQUSI512,
28556 IX86_BUILTIN_STOREUPD512,
28557 IX86_BUILTIN_STOREUPS512,
28558 IX86_BUILTIN_SUBPD512,
28559 IX86_BUILTIN_SUBPS512,
28560 IX86_BUILTIN_SUBSD_ROUND,
28561 IX86_BUILTIN_SUBSS_ROUND,
28562 IX86_BUILTIN_UCMPD512,
28563 IX86_BUILTIN_UCMPQ512,
28564 IX86_BUILTIN_UNPCKHPD512,
28565 IX86_BUILTIN_UNPCKHPS512,
28566 IX86_BUILTIN_UNPCKLPD512,
28567 IX86_BUILTIN_UNPCKLPS512,
28568 IX86_BUILTIN_VCVTSD2SI32,
28569 IX86_BUILTIN_VCVTSD2SI64,
28570 IX86_BUILTIN_VCVTSD2USI32,
28571 IX86_BUILTIN_VCVTSD2USI64,
28572 IX86_BUILTIN_VCVTSS2SI32,
28573 IX86_BUILTIN_VCVTSS2SI64,
28574 IX86_BUILTIN_VCVTSS2USI32,
28575 IX86_BUILTIN_VCVTSS2USI64,
28576 IX86_BUILTIN_VCVTTSD2SI32,
28577 IX86_BUILTIN_VCVTTSD2SI64,
28578 IX86_BUILTIN_VCVTTSD2USI32,
28579 IX86_BUILTIN_VCVTTSD2USI64,
28580 IX86_BUILTIN_VCVTTSS2SI32,
28581 IX86_BUILTIN_VCVTTSS2SI64,
28582 IX86_BUILTIN_VCVTTSS2USI32,
28583 IX86_BUILTIN_VCVTTSS2USI64,
28584 IX86_BUILTIN_VFMADDPD512_MASK,
28585 IX86_BUILTIN_VFMADDPD512_MASK3,
28586 IX86_BUILTIN_VFMADDPD512_MASKZ,
28587 IX86_BUILTIN_VFMADDPS512_MASK,
28588 IX86_BUILTIN_VFMADDPS512_MASK3,
28589 IX86_BUILTIN_VFMADDPS512_MASKZ,
28590 IX86_BUILTIN_VFMADDSD3_ROUND,
28591 IX86_BUILTIN_VFMADDSS3_ROUND,
28592 IX86_BUILTIN_VFMADDSUBPD512_MASK,
28593 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
28594 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
28595 IX86_BUILTIN_VFMADDSUBPS512_MASK,
28596 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
28597 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
28598 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
28599 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
28600 IX86_BUILTIN_VFMSUBPD512_MASK3,
28601 IX86_BUILTIN_VFMSUBPS512_MASK3,
28602 IX86_BUILTIN_VFMSUBSD3_MASK3,
28603 IX86_BUILTIN_VFMSUBSS3_MASK3,
28604 IX86_BUILTIN_VFNMADDPD512_MASK,
28605 IX86_BUILTIN_VFNMADDPS512_MASK,
28606 IX86_BUILTIN_VFNMSUBPD512_MASK,
28607 IX86_BUILTIN_VFNMSUBPD512_MASK3,
28608 IX86_BUILTIN_VFNMSUBPS512_MASK,
28609 IX86_BUILTIN_VFNMSUBPS512_MASK3,
28610 IX86_BUILTIN_VPCLZCNTD512,
28611 IX86_BUILTIN_VPCLZCNTQ512,
28612 IX86_BUILTIN_VPCONFLICTD512,
28613 IX86_BUILTIN_VPCONFLICTQ512,
28614 IX86_BUILTIN_VPERMDF512,
28615 IX86_BUILTIN_VPERMDI512,
28616 IX86_BUILTIN_VPERMI2VARD512,
28617 IX86_BUILTIN_VPERMI2VARPD512,
28618 IX86_BUILTIN_VPERMI2VARPS512,
28619 IX86_BUILTIN_VPERMI2VARQ512,
28620 IX86_BUILTIN_VPERMILPD512,
28621 IX86_BUILTIN_VPERMILPS512,
28622 IX86_BUILTIN_VPERMILVARPD512,
28623 IX86_BUILTIN_VPERMILVARPS512,
28624 IX86_BUILTIN_VPERMT2VARD512,
28625 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
28626 IX86_BUILTIN_VPERMT2VARPD512,
28627 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
28628 IX86_BUILTIN_VPERMT2VARPS512,
28629 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
28630 IX86_BUILTIN_VPERMT2VARQ512,
28631 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
28632 IX86_BUILTIN_VPERMVARDF512,
28633 IX86_BUILTIN_VPERMVARDI512,
28634 IX86_BUILTIN_VPERMVARSF512,
28635 IX86_BUILTIN_VPERMVARSI512,
28636 IX86_BUILTIN_VTERNLOGD512_MASK,
28637 IX86_BUILTIN_VTERNLOGD512_MASKZ,
28638 IX86_BUILTIN_VTERNLOGQ512_MASK,
28639 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
28640
28641 /* Mask arithmetic operations */
28642 IX86_BUILTIN_KAND16,
28643 IX86_BUILTIN_KANDN16,
28644 IX86_BUILTIN_KNOT16,
28645 IX86_BUILTIN_KOR16,
28646 IX86_BUILTIN_KORTESTC16,
28647 IX86_BUILTIN_KORTESTZ16,
28648 IX86_BUILTIN_KUNPCKBW,
28649 IX86_BUILTIN_KXNOR16,
28650 IX86_BUILTIN_KXOR16,
28651 IX86_BUILTIN_KMOV16,
28652
28653 /* Alternate 4 and 8 element gather/scatter for the vectorizer
28654 where all operands are 32-byte or 64-byte wide respectively. */
28655 IX86_BUILTIN_GATHERALTSIV4DF,
28656 IX86_BUILTIN_GATHERALTDIV8SF,
28657 IX86_BUILTIN_GATHERALTSIV4DI,
28658 IX86_BUILTIN_GATHERALTDIV8SI,
28659 IX86_BUILTIN_GATHER3ALTDIV16SF,
28660 IX86_BUILTIN_GATHER3ALTDIV16SI,
28661 IX86_BUILTIN_GATHER3ALTSIV8DF,
28662 IX86_BUILTIN_GATHER3ALTSIV8DI,
28663 IX86_BUILTIN_GATHER3DIV16SF,
28664 IX86_BUILTIN_GATHER3DIV16SI,
28665 IX86_BUILTIN_GATHER3DIV8DF,
28666 IX86_BUILTIN_GATHER3DIV8DI,
28667 IX86_BUILTIN_GATHER3SIV16SF,
28668 IX86_BUILTIN_GATHER3SIV16SI,
28669 IX86_BUILTIN_GATHER3SIV8DF,
28670 IX86_BUILTIN_GATHER3SIV8DI,
28671 IX86_BUILTIN_SCATTERDIV16SF,
28672 IX86_BUILTIN_SCATTERDIV16SI,
28673 IX86_BUILTIN_SCATTERDIV8DF,
28674 IX86_BUILTIN_SCATTERDIV8DI,
28675 IX86_BUILTIN_SCATTERSIV16SF,
28676 IX86_BUILTIN_SCATTERSIV16SI,
28677 IX86_BUILTIN_SCATTERSIV8DF,
28678 IX86_BUILTIN_SCATTERSIV8DI,
28679
28680 /* AVX512PF */
28681 IX86_BUILTIN_GATHERPFQPD,
28682 IX86_BUILTIN_GATHERPFDPS,
28683 IX86_BUILTIN_GATHERPFDPD,
28684 IX86_BUILTIN_GATHERPFQPS,
28685 IX86_BUILTIN_SCATTERPFDPD,
28686 IX86_BUILTIN_SCATTERPFDPS,
28687 IX86_BUILTIN_SCATTERPFQPD,
28688 IX86_BUILTIN_SCATTERPFQPS,
28689
28690 /* AVX-512ER */
28691 IX86_BUILTIN_EXP2PD_MASK,
28692 IX86_BUILTIN_EXP2PS_MASK,
28693 IX86_BUILTIN_EXP2PS,
28694 IX86_BUILTIN_RCP28PD,
28695 IX86_BUILTIN_RCP28PS,
28696 IX86_BUILTIN_RCP28SD,
28697 IX86_BUILTIN_RCP28SS,
28698 IX86_BUILTIN_RSQRT28PD,
28699 IX86_BUILTIN_RSQRT28PS,
28700 IX86_BUILTIN_RSQRT28SD,
28701 IX86_BUILTIN_RSQRT28SS,
28702
28703 /* SHA builtins. */
28704 IX86_BUILTIN_SHA1MSG1,
28705 IX86_BUILTIN_SHA1MSG2,
28706 IX86_BUILTIN_SHA1NEXTE,
28707 IX86_BUILTIN_SHA1RNDS4,
28708 IX86_BUILTIN_SHA256MSG1,
28709 IX86_BUILTIN_SHA256MSG2,
28710 IX86_BUILTIN_SHA256RNDS2,
28711
28712 /* CLFLUSHOPT instructions. */
28713 IX86_BUILTIN_CLFLUSHOPT,
28714
28715 /* TFmode support builtins. */
28716 IX86_BUILTIN_INFQ,
28717 IX86_BUILTIN_HUGE_VALQ,
28718 IX86_BUILTIN_FABSQ,
28719 IX86_BUILTIN_COPYSIGNQ,
28720
28721 /* Vectorizer support builtins. */
28722 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
28723 IX86_BUILTIN_CPYSGNPS,
28724 IX86_BUILTIN_CPYSGNPD,
28725 IX86_BUILTIN_CPYSGNPS256,
28726 IX86_BUILTIN_CPYSGNPS512,
28727 IX86_BUILTIN_CPYSGNPD256,
28728 IX86_BUILTIN_CPYSGNPD512,
28729 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
28730 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
28731
28732
28733 /* FMA4 instructions. */
28734 IX86_BUILTIN_VFMADDSS,
28735 IX86_BUILTIN_VFMADDSD,
28736 IX86_BUILTIN_VFMADDPS,
28737 IX86_BUILTIN_VFMADDPD,
28738 IX86_BUILTIN_VFMADDPS256,
28739 IX86_BUILTIN_VFMADDPD256,
28740 IX86_BUILTIN_VFMADDSUBPS,
28741 IX86_BUILTIN_VFMADDSUBPD,
28742 IX86_BUILTIN_VFMADDSUBPS256,
28743 IX86_BUILTIN_VFMADDSUBPD256,
28744
28745 /* FMA3 instructions. */
28746 IX86_BUILTIN_VFMADDSS3,
28747 IX86_BUILTIN_VFMADDSD3,
28748
28749 /* XOP instructions. */
28750 IX86_BUILTIN_VPCMOV,
28751 IX86_BUILTIN_VPCMOV_V2DI,
28752 IX86_BUILTIN_VPCMOV_V4SI,
28753 IX86_BUILTIN_VPCMOV_V8HI,
28754 IX86_BUILTIN_VPCMOV_V16QI,
28755 IX86_BUILTIN_VPCMOV_V4SF,
28756 IX86_BUILTIN_VPCMOV_V2DF,
28757 IX86_BUILTIN_VPCMOV256,
28758 IX86_BUILTIN_VPCMOV_V4DI256,
28759 IX86_BUILTIN_VPCMOV_V8SI256,
28760 IX86_BUILTIN_VPCMOV_V16HI256,
28761 IX86_BUILTIN_VPCMOV_V32QI256,
28762 IX86_BUILTIN_VPCMOV_V8SF256,
28763 IX86_BUILTIN_VPCMOV_V4DF256,
28764
28765 IX86_BUILTIN_VPPERM,
28766
28767 IX86_BUILTIN_VPMACSSWW,
28768 IX86_BUILTIN_VPMACSWW,
28769 IX86_BUILTIN_VPMACSSWD,
28770 IX86_BUILTIN_VPMACSWD,
28771 IX86_BUILTIN_VPMACSSDD,
28772 IX86_BUILTIN_VPMACSDD,
28773 IX86_BUILTIN_VPMACSSDQL,
28774 IX86_BUILTIN_VPMACSSDQH,
28775 IX86_BUILTIN_VPMACSDQL,
28776 IX86_BUILTIN_VPMACSDQH,
28777 IX86_BUILTIN_VPMADCSSWD,
28778 IX86_BUILTIN_VPMADCSWD,
28779
28780 IX86_BUILTIN_VPHADDBW,
28781 IX86_BUILTIN_VPHADDBD,
28782 IX86_BUILTIN_VPHADDBQ,
28783 IX86_BUILTIN_VPHADDWD,
28784 IX86_BUILTIN_VPHADDWQ,
28785 IX86_BUILTIN_VPHADDDQ,
28786 IX86_BUILTIN_VPHADDUBW,
28787 IX86_BUILTIN_VPHADDUBD,
28788 IX86_BUILTIN_VPHADDUBQ,
28789 IX86_BUILTIN_VPHADDUWD,
28790 IX86_BUILTIN_VPHADDUWQ,
28791 IX86_BUILTIN_VPHADDUDQ,
28792 IX86_BUILTIN_VPHSUBBW,
28793 IX86_BUILTIN_VPHSUBWD,
28794 IX86_BUILTIN_VPHSUBDQ,
28795
28796 IX86_BUILTIN_VPROTB,
28797 IX86_BUILTIN_VPROTW,
28798 IX86_BUILTIN_VPROTD,
28799 IX86_BUILTIN_VPROTQ,
28800 IX86_BUILTIN_VPROTB_IMM,
28801 IX86_BUILTIN_VPROTW_IMM,
28802 IX86_BUILTIN_VPROTD_IMM,
28803 IX86_BUILTIN_VPROTQ_IMM,
28804
28805 IX86_BUILTIN_VPSHLB,
28806 IX86_BUILTIN_VPSHLW,
28807 IX86_BUILTIN_VPSHLD,
28808 IX86_BUILTIN_VPSHLQ,
28809 IX86_BUILTIN_VPSHAB,
28810 IX86_BUILTIN_VPSHAW,
28811 IX86_BUILTIN_VPSHAD,
28812 IX86_BUILTIN_VPSHAQ,
28813
28814 IX86_BUILTIN_VFRCZSS,
28815 IX86_BUILTIN_VFRCZSD,
28816 IX86_BUILTIN_VFRCZPS,
28817 IX86_BUILTIN_VFRCZPD,
28818 IX86_BUILTIN_VFRCZPS256,
28819 IX86_BUILTIN_VFRCZPD256,
28820
28821 IX86_BUILTIN_VPCOMEQUB,
28822 IX86_BUILTIN_VPCOMNEUB,
28823 IX86_BUILTIN_VPCOMLTUB,
28824 IX86_BUILTIN_VPCOMLEUB,
28825 IX86_BUILTIN_VPCOMGTUB,
28826 IX86_BUILTIN_VPCOMGEUB,
28827 IX86_BUILTIN_VPCOMFALSEUB,
28828 IX86_BUILTIN_VPCOMTRUEUB,
28829
28830 IX86_BUILTIN_VPCOMEQUW,
28831 IX86_BUILTIN_VPCOMNEUW,
28832 IX86_BUILTIN_VPCOMLTUW,
28833 IX86_BUILTIN_VPCOMLEUW,
28834 IX86_BUILTIN_VPCOMGTUW,
28835 IX86_BUILTIN_VPCOMGEUW,
28836 IX86_BUILTIN_VPCOMFALSEUW,
28837 IX86_BUILTIN_VPCOMTRUEUW,
28838
28839 IX86_BUILTIN_VPCOMEQUD,
28840 IX86_BUILTIN_VPCOMNEUD,
28841 IX86_BUILTIN_VPCOMLTUD,
28842 IX86_BUILTIN_VPCOMLEUD,
28843 IX86_BUILTIN_VPCOMGTUD,
28844 IX86_BUILTIN_VPCOMGEUD,
28845 IX86_BUILTIN_VPCOMFALSEUD,
28846 IX86_BUILTIN_VPCOMTRUEUD,
28847
28848 IX86_BUILTIN_VPCOMEQUQ,
28849 IX86_BUILTIN_VPCOMNEUQ,
28850 IX86_BUILTIN_VPCOMLTUQ,
28851 IX86_BUILTIN_VPCOMLEUQ,
28852 IX86_BUILTIN_VPCOMGTUQ,
28853 IX86_BUILTIN_VPCOMGEUQ,
28854 IX86_BUILTIN_VPCOMFALSEUQ,
28855 IX86_BUILTIN_VPCOMTRUEUQ,
28856
28857 IX86_BUILTIN_VPCOMEQB,
28858 IX86_BUILTIN_VPCOMNEB,
28859 IX86_BUILTIN_VPCOMLTB,
28860 IX86_BUILTIN_VPCOMLEB,
28861 IX86_BUILTIN_VPCOMGTB,
28862 IX86_BUILTIN_VPCOMGEB,
28863 IX86_BUILTIN_VPCOMFALSEB,
28864 IX86_BUILTIN_VPCOMTRUEB,
28865
28866 IX86_BUILTIN_VPCOMEQW,
28867 IX86_BUILTIN_VPCOMNEW,
28868 IX86_BUILTIN_VPCOMLTW,
28869 IX86_BUILTIN_VPCOMLEW,
28870 IX86_BUILTIN_VPCOMGTW,
28871 IX86_BUILTIN_VPCOMGEW,
28872 IX86_BUILTIN_VPCOMFALSEW,
28873 IX86_BUILTIN_VPCOMTRUEW,
28874
28875 IX86_BUILTIN_VPCOMEQD,
28876 IX86_BUILTIN_VPCOMNED,
28877 IX86_BUILTIN_VPCOMLTD,
28878 IX86_BUILTIN_VPCOMLED,
28879 IX86_BUILTIN_VPCOMGTD,
28880 IX86_BUILTIN_VPCOMGED,
28881 IX86_BUILTIN_VPCOMFALSED,
28882 IX86_BUILTIN_VPCOMTRUED,
28883
28884 IX86_BUILTIN_VPCOMEQQ,
28885 IX86_BUILTIN_VPCOMNEQ,
28886 IX86_BUILTIN_VPCOMLTQ,
28887 IX86_BUILTIN_VPCOMLEQ,
28888 IX86_BUILTIN_VPCOMGTQ,
28889 IX86_BUILTIN_VPCOMGEQ,
28890 IX86_BUILTIN_VPCOMFALSEQ,
28891 IX86_BUILTIN_VPCOMTRUEQ,
28892
28893 /* LWP instructions. */
28894 IX86_BUILTIN_LLWPCB,
28895 IX86_BUILTIN_SLWPCB,
28896 IX86_BUILTIN_LWPVAL32,
28897 IX86_BUILTIN_LWPVAL64,
28898 IX86_BUILTIN_LWPINS32,
28899 IX86_BUILTIN_LWPINS64,
28900
28901 IX86_BUILTIN_CLZS,
28902
28903 /* RTM */
28904 IX86_BUILTIN_XBEGIN,
28905 IX86_BUILTIN_XEND,
28906 IX86_BUILTIN_XABORT,
28907 IX86_BUILTIN_XTEST,
28908
28909 /* BMI instructions. */
28910 IX86_BUILTIN_BEXTR32,
28911 IX86_BUILTIN_BEXTR64,
28912 IX86_BUILTIN_CTZS,
28913
28914 /* TBM instructions. */
28915 IX86_BUILTIN_BEXTRI32,
28916 IX86_BUILTIN_BEXTRI64,
28917
28918 /* BMI2 instructions. */
28919 IX86_BUILTIN_BZHI32,
28920 IX86_BUILTIN_BZHI64,
28921 IX86_BUILTIN_PDEP32,
28922 IX86_BUILTIN_PDEP64,
28923 IX86_BUILTIN_PEXT32,
28924 IX86_BUILTIN_PEXT64,
28925
28926 /* ADX instructions. */
28927 IX86_BUILTIN_ADDCARRYX32,
28928 IX86_BUILTIN_ADDCARRYX64,
28929
28930 /* SBB instructions. */
28931 IX86_BUILTIN_SBB32,
28932 IX86_BUILTIN_SBB64,
28933
28934 /* FSGSBASE instructions. */
28935 IX86_BUILTIN_RDFSBASE32,
28936 IX86_BUILTIN_RDFSBASE64,
28937 IX86_BUILTIN_RDGSBASE32,
28938 IX86_BUILTIN_RDGSBASE64,
28939 IX86_BUILTIN_WRFSBASE32,
28940 IX86_BUILTIN_WRFSBASE64,
28941 IX86_BUILTIN_WRGSBASE32,
28942 IX86_BUILTIN_WRGSBASE64,
28943
28944 /* RDRND instructions. */
28945 IX86_BUILTIN_RDRAND16_STEP,
28946 IX86_BUILTIN_RDRAND32_STEP,
28947 IX86_BUILTIN_RDRAND64_STEP,
28948
28949 /* RDSEED instructions. */
28950 IX86_BUILTIN_RDSEED16_STEP,
28951 IX86_BUILTIN_RDSEED32_STEP,
28952 IX86_BUILTIN_RDSEED64_STEP,
28953
28954 /* F16C instructions. */
28955 IX86_BUILTIN_CVTPH2PS,
28956 IX86_BUILTIN_CVTPH2PS256,
28957 IX86_BUILTIN_CVTPS2PH,
28958 IX86_BUILTIN_CVTPS2PH256,
28959
28960 /* CFString built-in for darwin */
28961 IX86_BUILTIN_CFSTRING,
28962
28963 /* Builtins to get CPU type and supported features. */
28964 IX86_BUILTIN_CPU_INIT,
28965 IX86_BUILTIN_CPU_IS,
28966 IX86_BUILTIN_CPU_SUPPORTS,
28967
28968 /* Read/write FLAGS register built-ins. */
28969 IX86_BUILTIN_READ_FLAGS,
28970 IX86_BUILTIN_WRITE_FLAGS,
28971
28972 IX86_BUILTIN_MAX
28973 };
28974
28975 /* Table for the ix86 builtin decls. */
28976 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
28977
28978 /* Table of all of the builtin functions that are possible with different ISA's
28979 but are waiting to be built until a function is declared to use that
28980 ISA. */
28981 struct builtin_isa {
28982 const char *name; /* function name */
28983 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
28984 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
28985 bool const_p; /* true if the declaration is constant */
28986 bool set_and_not_built_p;
28987 };
28988
28989 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
28990
28991
28992 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
28993 of which isa_flags to use in the ix86_builtins_isa array. Stores the
28994 function decl in the ix86_builtins array. Returns the function decl or
28995 NULL_TREE, if the builtin was not added.
28996
28997 If the front end has a special hook for builtin functions, delay adding
28998 builtin functions that aren't in the current ISA until the ISA is changed
28999 with function specific optimization. Doing so, can save about 300K for the
29000 default compiler. When the builtin is expanded, check at that time whether
29001 it is valid.
29002
29003 If the front end doesn't have a special hook, record all builtins, even if
29004 it isn't an instruction set in the current ISA in case the user uses
29005 function specific options for a different ISA, so that we don't get scope
29006 errors if a builtin is added in the middle of a function scope. */
29007
29008 static inline tree
29009 def_builtin (HOST_WIDE_INT mask, const char *name,
29010 enum ix86_builtin_func_type tcode,
29011 enum ix86_builtins code)
29012 {
29013 tree decl = NULL_TREE;
29014
29015 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
29016 {
29017 ix86_builtins_isa[(int) code].isa = mask;
29018
29019 mask &= ~OPTION_MASK_ISA_64BIT;
29020 if (mask == 0
29021 || (mask & ix86_isa_flags) != 0
29022 || (lang_hooks.builtin_function
29023 == lang_hooks.builtin_function_ext_scope))
29024
29025 {
29026 tree type = ix86_get_builtin_func_type (tcode);
29027 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
29028 NULL, NULL_TREE);
29029 ix86_builtins[(int) code] = decl;
29030 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
29031 }
29032 else
29033 {
29034 ix86_builtins[(int) code] = NULL_TREE;
29035 ix86_builtins_isa[(int) code].tcode = tcode;
29036 ix86_builtins_isa[(int) code].name = name;
29037 ix86_builtins_isa[(int) code].const_p = false;
29038 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
29039 }
29040 }
29041
29042 return decl;
29043 }
29044
29045 /* Like def_builtin, but also marks the function decl "const". */
29046
29047 static inline tree
29048 def_builtin_const (HOST_WIDE_INT mask, const char *name,
29049 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
29050 {
29051 tree decl = def_builtin (mask, name, tcode, code);
29052 if (decl)
29053 TREE_READONLY (decl) = 1;
29054 else
29055 ix86_builtins_isa[(int) code].const_p = true;
29056
29057 return decl;
29058 }
29059
29060 /* Add any new builtin functions for a given ISA that may not have been
29061 declared. This saves a bit of space compared to adding all of the
29062 declarations to the tree, even if we didn't use them. */
29063
29064 static void
29065 ix86_add_new_builtins (HOST_WIDE_INT isa)
29066 {
29067 int i;
29068
29069 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
29070 {
29071 if ((ix86_builtins_isa[i].isa & isa) != 0
29072 && ix86_builtins_isa[i].set_and_not_built_p)
29073 {
29074 tree decl, type;
29075
29076 /* Don't define the builtin again. */
29077 ix86_builtins_isa[i].set_and_not_built_p = false;
29078
29079 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
29080 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
29081 type, i, BUILT_IN_MD, NULL,
29082 NULL_TREE);
29083
29084 ix86_builtins[i] = decl;
29085 if (ix86_builtins_isa[i].const_p)
29086 TREE_READONLY (decl) = 1;
29087 }
29088 }
29089 }
29090
29091 /* Bits for builtin_description.flag. */
29092
29093 /* Set when we don't support the comparison natively, and should
29094 swap_comparison in order to support it. */
29095 #define BUILTIN_DESC_SWAP_OPERANDS 1
29096
29097 struct builtin_description
29098 {
29099 const HOST_WIDE_INT mask;
29100 const enum insn_code icode;
29101 const char *const name;
29102 const enum ix86_builtins code;
29103 const enum rtx_code comparison;
29104 const int flag;
29105 };
29106
29107 static const struct builtin_description bdesc_comi[] =
29108 {
29109 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
29110 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
29111 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
29112 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
29113 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
29114 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
29115 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
29116 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
29117 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
29118 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
29119 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
29120 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
29121 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
29122 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
29123 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
29124 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
29125 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
29126 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
29127 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
29128 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
29129 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
29130 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
29131 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
29132 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
29133 };
29134
29135 static const struct builtin_description bdesc_pcmpestr[] =
29136 {
29137 /* SSE4.2 */
29138 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
29139 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
29140 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
29141 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
29142 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
29143 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
29144 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
29145 };
29146
29147 static const struct builtin_description bdesc_pcmpistr[] =
29148 {
29149 /* SSE4.2 */
29150 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
29151 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
29152 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
29153 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
29154 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
29155 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
29156 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
29157 };
29158
29159 /* Special builtins with variable number of arguments. */
29160 static const struct builtin_description bdesc_special_args[] =
29161 {
29162 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
29163 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
29164 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
29165
29166 /* 80387 (for use internally for atomic compound assignment). */
29167 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
29168 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
29169 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
29170 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
29171
29172 /* MMX */
29173 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
29174
29175 /* 3DNow! */
29176 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
29177
29178 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
29179 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
29180 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
29181 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29182 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29183 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29184 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29185 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29186 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29187
29188 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
29189 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
29190 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29191 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29192 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29193 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29194 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29195 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29196
29197 /* SSE */
29198 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29199 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29200 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
29201
29202 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
29203 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
29204 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
29205 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
29206
29207 /* SSE or 3DNow!A */
29208 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29209 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
29210
29211 /* SSE2 */
29212 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29213 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29214 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29215 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
29216 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29217 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
29218 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
29219 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
29220 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
29221 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
29222
29223 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
29224 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
29225
29226 /* SSE3 */
29227 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
29228
29229 /* SSE4.1 */
29230 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
29231
29232 /* SSE4A */
29233 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29234 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29235
29236 /* AVX */
29237 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
29238 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
29239
29240 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
29241 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29242 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29243 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
29244 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
29245
29246 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29247 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29248 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29249 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29250 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29251 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
29252 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29253
29254 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
29255 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29256 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29257
29258 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
29259 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
29260 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
29261 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
29262 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
29263 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
29264 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
29265 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
29266
29267 /* AVX2 */
29268 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
29269 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
29270 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
29271 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
29272 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
29273 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
29274 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
29275 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
29276 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
29277
29278 /* AVX512F */
29279 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29280 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29281 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29282 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29283 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29284 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29285 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29286 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29287 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29288 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29289 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29290 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29291 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29292 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29293 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29294 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29295 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29296 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29297 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29298 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29299 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
29300 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
29301 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
29302 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
29303 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29304 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29305 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29306 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29307 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29308 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29309 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29310 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29311 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29312 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29313 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29314 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29315 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29316 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29317 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29318 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29319 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29320 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29321 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29322 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29323 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29324 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29325 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29326
29327 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
29328 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
29329 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
29330 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
29331 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
29332 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
29333
29334 /* FSGSBASE */
29335 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29336 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29337 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29338 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29339 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29340 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29341 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29342 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29343
29344 /* RTM */
29345 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29346 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
29347 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
29348 };
29349
29350 /* Builtins with variable number of arguments. */
29351 static const struct builtin_description bdesc_args[] =
29352 {
29353 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
29354 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
29355 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
29356 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29357 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29358 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29359 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29360
29361 /* MMX */
29362 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29363 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29364 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29365 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29366 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29367 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29368
29369 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29370 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29371 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29372 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29373 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29374 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29375 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29376 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29377
29378 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29379 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29380
29381 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29382 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29383 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29384 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29385
29386 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29387 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29388 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29389 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29390 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29391 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29392
29393 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29394 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29395 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29396 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29397 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
29398 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
29399
29400 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29401 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
29402 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29403
29404 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
29405
29406 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29407 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29408 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29409 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29410 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29411 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29412
29413 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29414 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29415 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29416 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29417 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29418 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29419
29420 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29421 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29422 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29423 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29424
29425 /* 3DNow! */
29426 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29427 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29428 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29429 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29430
29431 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29432 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29433 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29434 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29435 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29436 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29437 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29438 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29439 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29440 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29441 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29442 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29443 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29444 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29445 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29446
29447 /* 3DNow!A */
29448 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29449 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29450 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29451 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29452 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29453 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29454
29455 /* SSE */
29456 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
29457 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29458 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29459 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29460 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29461 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29462 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29463 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29464 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29465 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29466 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29467 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29468
29469 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29470
29471 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29472 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29473 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29474 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29475 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29476 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29477 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29478 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29479
29480 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29481 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29482 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29483 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29484 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29485 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29486 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29487 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29488 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29489 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29490 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
29491 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29492 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29493 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29494 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29495 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29496 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29497 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29498 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29499 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29500
29501 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29502 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29503 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29504 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29505
29506 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29507 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29508 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29509 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29510
29511 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29512
29513 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29514 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29515 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29516 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29517 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29518
29519 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
29520 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
29521 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
29522
29523 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
29524
29525 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29526 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29527 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29528
29529 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
29530 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
29531
29532 /* SSE MMX or 3Dnow!A */
29533 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29534 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29535 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29536
29537 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29538 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29539 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29540 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29541
29542 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
29543 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
29544
29545 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
29546
29547 /* SSE2 */
29548 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29549
29550 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
29551 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
29552 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29553 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
29554 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
29555
29556 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29557 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29558 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
29559 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29560 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29561
29562 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
29563
29564 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29565 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29566 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29567 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29568
29569 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29570 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
29571 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29572
29573 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29574 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29575 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29576 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29577 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29578 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29579 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29580 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29581
29582 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29583 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29584 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29585 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29586 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
29587 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29588 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29589 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29590 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29591 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29592 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29593 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29594 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29595 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29596 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29597 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29598 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29599 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29600 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29601 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29602
29603 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29604 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29605 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29606 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29607
29608 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29609 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29610 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29611 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29612
29613 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29614
29615 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29616 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29617 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29618
29619 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29620
29621 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29622 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29623 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29624 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29625 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29626 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29627 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29628 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29629
29630 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29631 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29632 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29633 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29634 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29635 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29636 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29637 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29638
29639 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29640 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
29641
29642 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29643 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29644 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29645 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29646
29647 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29648 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29649
29650 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29651 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29652 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29653 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29654 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29655 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29656
29657 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29658 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29659 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29660 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29661
29662 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29663 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29664 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29665 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29666 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29667 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29668 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29669 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29670
29671 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29672 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29673 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29674
29675 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29676 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
29677
29678 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
29679 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29680
29681 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
29682
29683 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
29684 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
29685 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
29686 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
29687
29688 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29689 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29690 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29691 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29692 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29693 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29694 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29695
29696 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29697 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29698 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29699 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29700 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29701 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29702 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29703
29704 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29705 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29706 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29707 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29708
29709 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
29710 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29711 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29712
29713 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
29714
29715 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29716
29717 /* SSE2 MMX */
29718 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29719 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29720
29721 /* SSE3 */
29722 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
29723 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29724
29725 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29726 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29727 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29728 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29729 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29730 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29731
29732 /* SSSE3 */
29733 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29734 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
29735 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29736 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
29737 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29738 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29739
29740 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29741 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29742 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29743 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29744 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29745 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29746 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29747 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29748 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29749 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29750 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29751 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29752 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
29753 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
29754 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29755 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29756 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29757 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29758 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29759 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29760 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29761 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29762 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29763 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29764
29765 /* SSSE3. */
29766 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
29767 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
29768
29769 /* SSE4.1 */
29770 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29771 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29772 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
29773 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
29774 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29775 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29776 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29777 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
29778 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
29779 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
29780
29781 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29782 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29783 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29784 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29785 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29786 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29787 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29788 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29789 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29790 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29791 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29792 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29793 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29794
29795 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29796 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29797 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29798 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29799 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29800 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29801 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29802 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29803 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29804 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29805 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29806 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29807
29808 /* SSE4.1 */
29809 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29810 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29811 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29812 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29813
29814 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
29815 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
29816 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
29817 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
29818
29819 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29820 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29821
29822 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29823 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29824
29825 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
29826 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
29827 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
29828 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
29829
29830 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
29831 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
29832
29833 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29834 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29835
29836 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29837 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29838 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29839
29840 /* SSE4.2 */
29841 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29842 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
29843 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
29844 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29845 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29846
29847 /* SSE4A */
29848 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
29849 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
29850 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
29851 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29852
29853 /* AES */
29854 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
29855 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29856
29857 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29858 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29859 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29860 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29861
29862 /* PCLMUL */
29863 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
29864
29865 /* AVX */
29866 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29867 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29868 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29869 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29870 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29871 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29872 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29873 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29874 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29875 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29876 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29877 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29878 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29879 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29880 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29881 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29882 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29883 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29884 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29885 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29886 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29887 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29888 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29889 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29890 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29891 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29892
29893 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
29894 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
29895 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
29896 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29897
29898 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29899 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29900 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
29901 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
29902 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29903 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29904 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29905 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29906 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29907 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29908 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29909 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29910 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29911 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
29912 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
29913 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
29914 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
29915 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
29916 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
29917 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29918 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
29919 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29920 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29921 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29922 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29923 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29924 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29925 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29926 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29927 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29928 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29929 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
29930 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
29931 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
29932
29933 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29934 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29935 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29936
29937 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29938 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29939 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29940 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29941 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29942
29943 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29944
29945 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29946 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29947
29948 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
29949 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
29950 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
29951 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
29952
29953 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29954 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29955
29956 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29957 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29958
29959 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
29960 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
29961 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
29962 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
29963
29964 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
29965 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
29966
29967 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29968 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29969
29970 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29971 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29972 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29973 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29974
29975 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29976 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29977 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29978 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
29979 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
29980 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
29981
29982 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29983 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29984 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29985 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29986 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29987 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29988 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29989 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29990 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29991 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29992 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29993 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29994 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29995 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29996 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29997
29998 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
29999 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
30000
30001 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30002 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30003
30004 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
30005
30006 /* AVX2 */
30007 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
30008 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
30009 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
30010 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
30011 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
30012 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
30013 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
30014 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
30015 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30016 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30017 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30018 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30019 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30020 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30021 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30022 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30023 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
30024 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30025 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30026 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30027 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30028 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
30029 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
30030 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30031 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30032 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30033 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30034 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30035 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30036 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30037 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30038 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30039 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30040 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30041 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30042 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30043 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30044 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
30045 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
30046 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30047 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30048 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30049 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30050 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30051 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30052 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30053 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30054 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30055 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30056 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30057 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30058 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
30059 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
30060 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
30061 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
30062 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
30063 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
30064 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
30065 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
30066 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
30067 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
30068 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
30069 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
30070 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
30071 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
30072 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30073 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30074 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30075 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30076 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30077 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
30078 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30079 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
30080 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30081 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
30082 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
30083 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
30084 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30085 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30086 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30087 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
30088 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
30089 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
30090 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
30091 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
30092 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
30093 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
30094 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
30095 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
30096 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
30097 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
30098 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
30099 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
30100 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
30101 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
30102 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
30103 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
30104 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
30105 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30106 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30107 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30108 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30109 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30110 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30111 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30112 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30113 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30114 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30115 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30116 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30117 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30118 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30119 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30120 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30121 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30122 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
30123 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
30124 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
30125 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
30126 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
30127 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
30128 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
30129 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
30130 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
30131 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
30132 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
30133 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
30134 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
30135 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
30136 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30137 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
30138 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
30139 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
30140 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
30141 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
30142 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
30143 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30144 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30145 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30146 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30147 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30148 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30149 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30150 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30151 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30152 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30153
30154 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
30155
30156 /* BMI */
30157 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30158 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30159 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
30160
30161 /* TBM */
30162 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30163 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30164
30165 /* F16C */
30166 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
30167 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
30168 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
30169 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
30170
30171 /* BMI2 */
30172 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30173 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30174 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30175 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30176 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30177 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30178
30179 /* AVX512F */
30180 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI },
30181 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF },
30182 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF },
30183 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI },
30184 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF },
30185 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF },
30186 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
30187 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30188 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30189 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30190 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30191 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30192 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
30193 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
30194 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
30195 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
30196 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
30197 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
30198 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30199 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30200 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30201 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30202 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
30203 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
30204 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
30205 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
30206 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30207 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30208 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30209 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30210 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
30211 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
30212 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
30213 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
30214 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
30215 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
30216 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
30217 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
30218 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30219 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30220 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30221 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30222 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30223 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30224 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30225 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30226 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30227 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30228 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30229 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30230 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30231 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30232 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30233 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
30234 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
30235 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
30236 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
30237 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
30238 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
30239 { OPTION_MASK_ISA_AVX512F & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_memv8di_mask, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
30240 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30241 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30242 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30243 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30244 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30245 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30246 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30247 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30248 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30249 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30250 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30251 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30252 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30253 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30254 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30255 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30256 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30257 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30258 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30259 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30260 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30261 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30262 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30263 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30264 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30265 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30266 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30267 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30268 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30269 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30270 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30271 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30272 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30273 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30274 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30275 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30276 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30277 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30278 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30279 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30280 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30281 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30282 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30283 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30284 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30285 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30286 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30287 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30288 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30289 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30290 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30291 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30292 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30293 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30294 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30295 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30296 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30297 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30298 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30299 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30300 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30301 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30302 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30303 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30304 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30305 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30306 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30307 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30308 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30309 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30310 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30311 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30312 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30313 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30314 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30315 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30316 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30317 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30318 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30319 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30320 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30321 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30322 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30323 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30324 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30325 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30326 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30327 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30328 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30329 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30330 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30331 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30332 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30333 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30334 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30335 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30336 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30337 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30338 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30339 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
30340 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30341 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30342 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30343 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30344 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30345 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30346 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30347 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30348 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30349 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30350 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30351 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30352 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30353 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30354 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30355 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30356 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30357 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30358 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
30359 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30360 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30361 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30362 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30363 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30364 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30365 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30366 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30367 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30368 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30369 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30370 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30371 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30372 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30373 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30374 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30375 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30376 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30377
30378 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
30379 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
30380 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
30381 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30382 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30383 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
30384 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30385 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30386
30387 /* Mask arithmetic operations */
30388 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30389 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30390 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
30391 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30392 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30393 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30394 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
30395 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30396 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30397 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
30398
30399 /* SHA */
30400 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30401 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30402 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30403 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
30404 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30405 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30406 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
30407 };
30408
30409 /* Builtins with rounding support. */
30410 static const struct builtin_description bdesc_round_args[] =
30411 {
30412 /* AVX512F */
30413 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30414 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30415 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30416 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30417 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
30418 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
30419 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
30420 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
30421 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
30422 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
30423 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30424 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30425 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
30426 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30427 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
30428 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30429 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
30430 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30431 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
30432 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
30433 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
30434 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
30435 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
30436 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30437 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30438 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30439 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30440 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30441 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
30442 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
30443 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
30444 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30445 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30446 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30447 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30448 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30449 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30450 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30451 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30452 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30453 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30454 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30455 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30456 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30457 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30458 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30459 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30460 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30461 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30462 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30463 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30464 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30465 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30466 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30467 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30468 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30469 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30470 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30471 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30472 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30473 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30474 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30475 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30476 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30477 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30478 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30479 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30480 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30481 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30482 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30483 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30484 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30485 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30486 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30487 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30488 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30489 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30490 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30491 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30492 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30493 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30494 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30495 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30496 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30497 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30498 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30499 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30500 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30501 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30502 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30503 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30504 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30505 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30506 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30507 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30508 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30509 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30510 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30511 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30512 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30513 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30514 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
30515 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
30516 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30517 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30518 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30519 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30520 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30521 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30522 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30523 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30524 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30525 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30526 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30527 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30528 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30529 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30530 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30531 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30532
30533 /* AVX512ER */
30534 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30535 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30536 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30537 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30538 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30539 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30540 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30541 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30542 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30543 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30544 };
30545
30546 /* FMA4 and XOP. */
30547 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
30548 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
30549 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
30550 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
30551 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
30552 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
30553 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
30554 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
30555 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
30556 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
30557 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
30558 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
30559 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
30560 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
30561 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
30562 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
30563 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
30564 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
30565 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
30566 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
30567 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
30568 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
30569 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
30570 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
30571 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
30572 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
30573 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
30574 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
30575 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
30576 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
30577 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
30578 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
30579 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
30580 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
30581 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
30582 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
30583 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
30584 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
30585 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
30586 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
30587 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
30588 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
30589 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
30590 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
30591 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
30592 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
30593 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
30594 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
30595 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
30596 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
30597 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
30598 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
30599
30600 static const struct builtin_description bdesc_multi_arg[] =
30601 {
30602 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
30603 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
30604 UNKNOWN, (int)MULTI_ARG_3_SF },
30605 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
30606 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
30607 UNKNOWN, (int)MULTI_ARG_3_DF },
30608
30609 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
30610 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
30611 UNKNOWN, (int)MULTI_ARG_3_SF },
30612 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
30613 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
30614 UNKNOWN, (int)MULTI_ARG_3_DF },
30615
30616 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
30617 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
30618 UNKNOWN, (int)MULTI_ARG_3_SF },
30619 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
30620 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
30621 UNKNOWN, (int)MULTI_ARG_3_DF },
30622 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
30623 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
30624 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30625 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
30626 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
30627 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30628
30629 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
30630 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
30631 UNKNOWN, (int)MULTI_ARG_3_SF },
30632 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
30633 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
30634 UNKNOWN, (int)MULTI_ARG_3_DF },
30635 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
30636 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
30637 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30638 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
30639 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
30640 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30641
30642 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
30643 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
30644 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
30645 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
30646 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
30647 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
30648 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
30649
30650 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30651 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30652 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
30653 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
30654 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
30655 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
30656 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
30657
30658 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
30659
30660 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30661 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30662 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30663 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30664 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30665 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30666 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30667 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30668 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30669 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30670 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30671 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30672
30673 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30674 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
30675 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
30676 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
30677 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
30678 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
30679 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
30680 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
30681 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30682 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
30683 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
30684 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
30685 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30686 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
30687 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
30688 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
30689
30690 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
30691 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
30692 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
30693 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
30694 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
30695 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
30696
30697 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30698 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30699 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30700 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30701 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30702 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30703 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30704 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30705 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30706 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30707 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30708 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30709 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30710 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30711 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30712
30713 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
30714 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30715 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30716 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
30717 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
30718 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
30719 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
30720
30721 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
30722 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30723 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30724 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
30725 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
30726 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
30727 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
30728
30729 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
30730 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30731 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30732 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
30733 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
30734 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
30735 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
30736
30737 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30738 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30739 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30740 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
30741 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
30742 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
30743 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
30744
30745 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
30746 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30747 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30748 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
30749 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
30750 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
30751 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
30752
30753 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
30754 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30755 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30756 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
30757 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
30758 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
30759 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
30760
30761 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
30762 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30763 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30764 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
30765 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
30766 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
30767 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
30768
30769 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30770 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30771 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30772 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
30773 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
30774 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
30775 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
30776
30777 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30778 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30779 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30780 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30781 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30782 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30783 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30784 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30785
30786 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30787 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30788 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30789 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30790 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30791 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30792 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30793 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30794
30795 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
30796 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
30797 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
30798 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
30799
30800 };
30801 \f
30802 /* TM vector builtins. */
30803
30804 /* Reuse the existing x86-specific `struct builtin_description' cause
30805 we're lazy. Add casts to make them fit. */
30806 static const struct builtin_description bdesc_tm[] =
30807 {
30808 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30809 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30810 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30811 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30812 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30813 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30814 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30815
30816 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30817 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30818 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30819 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30820 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30821 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30822 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30823
30824 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30825 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30826 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30827 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30828 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30829 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30830 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30831
30832 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
30833 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
30834 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
30835 };
30836
30837 /* TM callbacks. */
30838
30839 /* Return the builtin decl needed to load a vector of TYPE. */
30840
30841 static tree
30842 ix86_builtin_tm_load (tree type)
30843 {
30844 if (TREE_CODE (type) == VECTOR_TYPE)
30845 {
30846 switch (tree_to_uhwi (TYPE_SIZE (type)))
30847 {
30848 case 64:
30849 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
30850 case 128:
30851 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
30852 case 256:
30853 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
30854 }
30855 }
30856 return NULL_TREE;
30857 }
30858
30859 /* Return the builtin decl needed to store a vector of TYPE. */
30860
30861 static tree
30862 ix86_builtin_tm_store (tree type)
30863 {
30864 if (TREE_CODE (type) == VECTOR_TYPE)
30865 {
30866 switch (tree_to_uhwi (TYPE_SIZE (type)))
30867 {
30868 case 64:
30869 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
30870 case 128:
30871 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
30872 case 256:
30873 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
30874 }
30875 }
30876 return NULL_TREE;
30877 }
30878 \f
30879 /* Initialize the transactional memory vector load/store builtins. */
30880
30881 static void
30882 ix86_init_tm_builtins (void)
30883 {
30884 enum ix86_builtin_func_type ftype;
30885 const struct builtin_description *d;
30886 size_t i;
30887 tree decl;
30888 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
30889 tree attrs_log, attrs_type_log;
30890
30891 if (!flag_tm)
30892 return;
30893
30894 /* If there are no builtins defined, we must be compiling in a
30895 language without trans-mem support. */
30896 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
30897 return;
30898
30899 /* Use whatever attributes a normal TM load has. */
30900 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
30901 attrs_load = DECL_ATTRIBUTES (decl);
30902 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30903 /* Use whatever attributes a normal TM store has. */
30904 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
30905 attrs_store = DECL_ATTRIBUTES (decl);
30906 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30907 /* Use whatever attributes a normal TM log has. */
30908 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
30909 attrs_log = DECL_ATTRIBUTES (decl);
30910 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30911
30912 for (i = 0, d = bdesc_tm;
30913 i < ARRAY_SIZE (bdesc_tm);
30914 i++, d++)
30915 {
30916 if ((d->mask & ix86_isa_flags) != 0
30917 || (lang_hooks.builtin_function
30918 == lang_hooks.builtin_function_ext_scope))
30919 {
30920 tree type, attrs, attrs_type;
30921 enum built_in_function code = (enum built_in_function) d->code;
30922
30923 ftype = (enum ix86_builtin_func_type) d->flag;
30924 type = ix86_get_builtin_func_type (ftype);
30925
30926 if (BUILTIN_TM_LOAD_P (code))
30927 {
30928 attrs = attrs_load;
30929 attrs_type = attrs_type_load;
30930 }
30931 else if (BUILTIN_TM_STORE_P (code))
30932 {
30933 attrs = attrs_store;
30934 attrs_type = attrs_type_store;
30935 }
30936 else
30937 {
30938 attrs = attrs_log;
30939 attrs_type = attrs_type_log;
30940 }
30941 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
30942 /* The builtin without the prefix for
30943 calling it directly. */
30944 d->name + strlen ("__builtin_"),
30945 attrs);
30946 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
30947 set the TYPE_ATTRIBUTES. */
30948 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
30949
30950 set_builtin_decl (code, decl, false);
30951 }
30952 }
30953 }
30954
30955 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
30956 in the current target ISA to allow the user to compile particular modules
30957 with different target specific options that differ from the command line
30958 options. */
30959 static void
30960 ix86_init_mmx_sse_builtins (void)
30961 {
30962 const struct builtin_description * d;
30963 enum ix86_builtin_func_type ftype;
30964 size_t i;
30965
30966 /* Add all special builtins with variable number of operands. */
30967 for (i = 0, d = bdesc_special_args;
30968 i < ARRAY_SIZE (bdesc_special_args);
30969 i++, d++)
30970 {
30971 if (d->name == 0)
30972 continue;
30973
30974 ftype = (enum ix86_builtin_func_type) d->flag;
30975 def_builtin (d->mask, d->name, ftype, d->code);
30976 }
30977
30978 /* Add all builtins with variable number of operands. */
30979 for (i = 0, d = bdesc_args;
30980 i < ARRAY_SIZE (bdesc_args);
30981 i++, d++)
30982 {
30983 if (d->name == 0)
30984 continue;
30985
30986 ftype = (enum ix86_builtin_func_type) d->flag;
30987 def_builtin_const (d->mask, d->name, ftype, d->code);
30988 }
30989
30990 /* Add all builtins with rounding. */
30991 for (i = 0, d = bdesc_round_args;
30992 i < ARRAY_SIZE (bdesc_round_args);
30993 i++, d++)
30994 {
30995 if (d->name == 0)
30996 continue;
30997
30998 ftype = (enum ix86_builtin_func_type) d->flag;
30999 def_builtin_const (d->mask, d->name, ftype, d->code);
31000 }
31001
31002 /* pcmpestr[im] insns. */
31003 for (i = 0, d = bdesc_pcmpestr;
31004 i < ARRAY_SIZE (bdesc_pcmpestr);
31005 i++, d++)
31006 {
31007 if (d->code == IX86_BUILTIN_PCMPESTRM128)
31008 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
31009 else
31010 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
31011 def_builtin_const (d->mask, d->name, ftype, d->code);
31012 }
31013
31014 /* pcmpistr[im] insns. */
31015 for (i = 0, d = bdesc_pcmpistr;
31016 i < ARRAY_SIZE (bdesc_pcmpistr);
31017 i++, d++)
31018 {
31019 if (d->code == IX86_BUILTIN_PCMPISTRM128)
31020 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
31021 else
31022 ftype = INT_FTYPE_V16QI_V16QI_INT;
31023 def_builtin_const (d->mask, d->name, ftype, d->code);
31024 }
31025
31026 /* comi/ucomi insns. */
31027 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
31028 {
31029 if (d->mask == OPTION_MASK_ISA_SSE2)
31030 ftype = INT_FTYPE_V2DF_V2DF;
31031 else
31032 ftype = INT_FTYPE_V4SF_V4SF;
31033 def_builtin_const (d->mask, d->name, ftype, d->code);
31034 }
31035
31036 /* SSE */
31037 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
31038 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
31039 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
31040 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
31041
31042 /* SSE or 3DNow!A */
31043 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31044 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
31045 IX86_BUILTIN_MASKMOVQ);
31046
31047 /* SSE2 */
31048 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
31049 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
31050
31051 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
31052 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
31053 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
31054 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
31055
31056 /* SSE3. */
31057 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
31058 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
31059 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
31060 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
31061
31062 /* AES */
31063 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
31064 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
31065 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
31066 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
31067 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
31068 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
31069 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
31070 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
31071 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
31072 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
31073 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
31074 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
31075
31076 /* PCLMUL */
31077 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
31078 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
31079
31080 /* RDRND */
31081 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
31082 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
31083 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
31084 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
31085 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
31086 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
31087 IX86_BUILTIN_RDRAND64_STEP);
31088
31089 /* AVX2 */
31090 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
31091 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
31092 IX86_BUILTIN_GATHERSIV2DF);
31093
31094 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
31095 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
31096 IX86_BUILTIN_GATHERSIV4DF);
31097
31098 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
31099 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
31100 IX86_BUILTIN_GATHERDIV2DF);
31101
31102 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
31103 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
31104 IX86_BUILTIN_GATHERDIV4DF);
31105
31106 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
31107 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
31108 IX86_BUILTIN_GATHERSIV4SF);
31109
31110 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
31111 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
31112 IX86_BUILTIN_GATHERSIV8SF);
31113
31114 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
31115 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
31116 IX86_BUILTIN_GATHERDIV4SF);
31117
31118 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
31119 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
31120 IX86_BUILTIN_GATHERDIV8SF);
31121
31122 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
31123 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
31124 IX86_BUILTIN_GATHERSIV2DI);
31125
31126 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
31127 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
31128 IX86_BUILTIN_GATHERSIV4DI);
31129
31130 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
31131 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
31132 IX86_BUILTIN_GATHERDIV2DI);
31133
31134 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
31135 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
31136 IX86_BUILTIN_GATHERDIV4DI);
31137
31138 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
31139 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
31140 IX86_BUILTIN_GATHERSIV4SI);
31141
31142 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
31143 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
31144 IX86_BUILTIN_GATHERSIV8SI);
31145
31146 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
31147 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
31148 IX86_BUILTIN_GATHERDIV4SI);
31149
31150 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
31151 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
31152 IX86_BUILTIN_GATHERDIV8SI);
31153
31154 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
31155 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
31156 IX86_BUILTIN_GATHERALTSIV4DF);
31157
31158 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
31159 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
31160 IX86_BUILTIN_GATHERALTDIV8SF);
31161
31162 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
31163 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
31164 IX86_BUILTIN_GATHERALTSIV4DI);
31165
31166 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
31167 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
31168 IX86_BUILTIN_GATHERALTDIV8SI);
31169
31170 /* AVX512F */
31171 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
31172 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
31173 IX86_BUILTIN_GATHER3SIV16SF);
31174
31175 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
31176 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
31177 IX86_BUILTIN_GATHER3SIV8DF);
31178
31179 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
31180 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
31181 IX86_BUILTIN_GATHER3DIV16SF);
31182
31183 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
31184 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
31185 IX86_BUILTIN_GATHER3DIV8DF);
31186
31187 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
31188 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
31189 IX86_BUILTIN_GATHER3SIV16SI);
31190
31191 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
31192 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
31193 IX86_BUILTIN_GATHER3SIV8DI);
31194
31195 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
31196 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
31197 IX86_BUILTIN_GATHER3DIV16SI);
31198
31199 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
31200 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
31201 IX86_BUILTIN_GATHER3DIV8DI);
31202
31203 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
31204 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
31205 IX86_BUILTIN_GATHER3ALTSIV8DF);
31206
31207 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
31208 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
31209 IX86_BUILTIN_GATHER3ALTDIV16SF);
31210
31211 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
31212 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
31213 IX86_BUILTIN_GATHER3ALTSIV8DI);
31214
31215 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
31216 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
31217 IX86_BUILTIN_GATHER3ALTDIV16SI);
31218
31219 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
31220 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
31221 IX86_BUILTIN_SCATTERSIV16SF);
31222
31223 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
31224 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
31225 IX86_BUILTIN_SCATTERSIV8DF);
31226
31227 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
31228 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
31229 IX86_BUILTIN_SCATTERDIV16SF);
31230
31231 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
31232 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
31233 IX86_BUILTIN_SCATTERDIV8DF);
31234
31235 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
31236 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
31237 IX86_BUILTIN_SCATTERSIV16SI);
31238
31239 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
31240 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
31241 IX86_BUILTIN_SCATTERSIV8DI);
31242
31243 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
31244 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
31245 IX86_BUILTIN_SCATTERDIV16SI);
31246
31247 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
31248 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
31249 IX86_BUILTIN_SCATTERDIV8DI);
31250
31251 /* AVX512PF */
31252 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
31253 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31254 IX86_BUILTIN_GATHERPFDPD);
31255 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
31256 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31257 IX86_BUILTIN_GATHERPFDPS);
31258 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
31259 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31260 IX86_BUILTIN_GATHERPFQPD);
31261 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
31262 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31263 IX86_BUILTIN_GATHERPFQPS);
31264 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
31265 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31266 IX86_BUILTIN_SCATTERPFDPD);
31267 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
31268 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31269 IX86_BUILTIN_SCATTERPFDPS);
31270 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
31271 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31272 IX86_BUILTIN_SCATTERPFQPD);
31273 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
31274 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31275 IX86_BUILTIN_SCATTERPFQPS);
31276
31277 /* SHA */
31278 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
31279 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
31280 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
31281 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
31282 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
31283 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
31284 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
31285 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
31286 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
31287 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
31288 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
31289 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
31290 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
31291 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
31292
31293 /* RTM. */
31294 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
31295 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
31296
31297 /* MMX access to the vec_init patterns. */
31298 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
31299 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
31300
31301 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
31302 V4HI_FTYPE_HI_HI_HI_HI,
31303 IX86_BUILTIN_VEC_INIT_V4HI);
31304
31305 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
31306 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
31307 IX86_BUILTIN_VEC_INIT_V8QI);
31308
31309 /* Access to the vec_extract patterns. */
31310 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
31311 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
31312 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
31313 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
31314 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
31315 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
31316 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
31317 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
31318 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
31319 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
31320
31321 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31322 "__builtin_ia32_vec_ext_v4hi",
31323 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
31324
31325 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
31326 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
31327
31328 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
31329 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
31330
31331 /* Access to the vec_set patterns. */
31332 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
31333 "__builtin_ia32_vec_set_v2di",
31334 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
31335
31336 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
31337 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
31338
31339 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
31340 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
31341
31342 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
31343 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
31344
31345 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31346 "__builtin_ia32_vec_set_v4hi",
31347 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
31348
31349 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
31350 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
31351
31352 /* RDSEED */
31353 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
31354 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
31355 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
31356 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
31357 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
31358 "__builtin_ia32_rdseed_di_step",
31359 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
31360
31361 /* ADCX */
31362 def_builtin (0, "__builtin_ia32_addcarryx_u32",
31363 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
31364 def_builtin (OPTION_MASK_ISA_64BIT,
31365 "__builtin_ia32_addcarryx_u64",
31366 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
31367 IX86_BUILTIN_ADDCARRYX64);
31368
31369 /* SBB */
31370 def_builtin (0, "__builtin_ia32_sbb_u32",
31371 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
31372 def_builtin (OPTION_MASK_ISA_64BIT,
31373 "__builtin_ia32_sbb_u64",
31374 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
31375 IX86_BUILTIN_SBB64);
31376
31377 /* Read/write FLAGS. */
31378 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
31379 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31380 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
31381 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31382 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
31383 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
31384 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
31385 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
31386
31387 /* CLFLUSHOPT. */
31388 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
31389 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
31390
31391 /* Add FMA4 multi-arg argument instructions */
31392 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
31393 {
31394 if (d->name == 0)
31395 continue;
31396
31397 ftype = (enum ix86_builtin_func_type) d->flag;
31398 def_builtin_const (d->mask, d->name, ftype, d->code);
31399 }
31400 }
31401
31402 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
31403 to return a pointer to VERSION_DECL if the outcome of the expression
31404 formed by PREDICATE_CHAIN is true. This function will be called during
31405 version dispatch to decide which function version to execute. It returns
31406 the basic block at the end, to which more conditions can be added. */
31407
31408 static basic_block
31409 add_condition_to_bb (tree function_decl, tree version_decl,
31410 tree predicate_chain, basic_block new_bb)
31411 {
31412 gimple return_stmt;
31413 tree convert_expr, result_var;
31414 gimple convert_stmt;
31415 gimple call_cond_stmt;
31416 gimple if_else_stmt;
31417
31418 basic_block bb1, bb2, bb3;
31419 edge e12, e23;
31420
31421 tree cond_var, and_expr_var = NULL_TREE;
31422 gimple_seq gseq;
31423
31424 tree predicate_decl, predicate_arg;
31425
31426 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
31427
31428 gcc_assert (new_bb != NULL);
31429 gseq = bb_seq (new_bb);
31430
31431
31432 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
31433 build_fold_addr_expr (version_decl));
31434 result_var = create_tmp_var (ptr_type_node, NULL);
31435 convert_stmt = gimple_build_assign (result_var, convert_expr);
31436 return_stmt = gimple_build_return (result_var);
31437
31438 if (predicate_chain == NULL_TREE)
31439 {
31440 gimple_seq_add_stmt (&gseq, convert_stmt);
31441 gimple_seq_add_stmt (&gseq, return_stmt);
31442 set_bb_seq (new_bb, gseq);
31443 gimple_set_bb (convert_stmt, new_bb);
31444 gimple_set_bb (return_stmt, new_bb);
31445 pop_cfun ();
31446 return new_bb;
31447 }
31448
31449 while (predicate_chain != NULL)
31450 {
31451 cond_var = create_tmp_var (integer_type_node, NULL);
31452 predicate_decl = TREE_PURPOSE (predicate_chain);
31453 predicate_arg = TREE_VALUE (predicate_chain);
31454 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
31455 gimple_call_set_lhs (call_cond_stmt, cond_var);
31456
31457 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
31458 gimple_set_bb (call_cond_stmt, new_bb);
31459 gimple_seq_add_stmt (&gseq, call_cond_stmt);
31460
31461 predicate_chain = TREE_CHAIN (predicate_chain);
31462
31463 if (and_expr_var == NULL)
31464 and_expr_var = cond_var;
31465 else
31466 {
31467 gimple assign_stmt;
31468 /* Use MIN_EXPR to check if any integer is zero?.
31469 and_expr_var = min_expr <cond_var, and_expr_var> */
31470 assign_stmt = gimple_build_assign (and_expr_var,
31471 build2 (MIN_EXPR, integer_type_node,
31472 cond_var, and_expr_var));
31473
31474 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
31475 gimple_set_bb (assign_stmt, new_bb);
31476 gimple_seq_add_stmt (&gseq, assign_stmt);
31477 }
31478 }
31479
31480 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
31481 integer_zero_node,
31482 NULL_TREE, NULL_TREE);
31483 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
31484 gimple_set_bb (if_else_stmt, new_bb);
31485 gimple_seq_add_stmt (&gseq, if_else_stmt);
31486
31487 gimple_seq_add_stmt (&gseq, convert_stmt);
31488 gimple_seq_add_stmt (&gseq, return_stmt);
31489 set_bb_seq (new_bb, gseq);
31490
31491 bb1 = new_bb;
31492 e12 = split_block (bb1, if_else_stmt);
31493 bb2 = e12->dest;
31494 e12->flags &= ~EDGE_FALLTHRU;
31495 e12->flags |= EDGE_TRUE_VALUE;
31496
31497 e23 = split_block (bb2, return_stmt);
31498
31499 gimple_set_bb (convert_stmt, bb2);
31500 gimple_set_bb (return_stmt, bb2);
31501
31502 bb3 = e23->dest;
31503 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
31504
31505 remove_edge (e23);
31506 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
31507
31508 pop_cfun ();
31509
31510 return bb3;
31511 }
31512
31513 /* This parses the attribute arguments to target in DECL and determines
31514 the right builtin to use to match the platform specification.
31515 It returns the priority value for this version decl. If PREDICATE_LIST
31516 is not NULL, it stores the list of cpu features that need to be checked
31517 before dispatching this function. */
31518
31519 static unsigned int
31520 get_builtin_code_for_version (tree decl, tree *predicate_list)
31521 {
31522 tree attrs;
31523 struct cl_target_option cur_target;
31524 tree target_node;
31525 struct cl_target_option *new_target;
31526 const char *arg_str = NULL;
31527 const char *attrs_str = NULL;
31528 char *tok_str = NULL;
31529 char *token;
31530
31531 /* Priority of i386 features, greater value is higher priority. This is
31532 used to decide the order in which function dispatch must happen. For
31533 instance, a version specialized for SSE4.2 should be checked for dispatch
31534 before a version for SSE3, as SSE4.2 implies SSE3. */
31535 enum feature_priority
31536 {
31537 P_ZERO = 0,
31538 P_MMX,
31539 P_SSE,
31540 P_SSE2,
31541 P_SSE3,
31542 P_SSSE3,
31543 P_PROC_SSSE3,
31544 P_SSE4_A,
31545 P_PROC_SSE4_A,
31546 P_SSE4_1,
31547 P_SSE4_2,
31548 P_PROC_SSE4_2,
31549 P_POPCNT,
31550 P_AVX,
31551 P_PROC_AVX,
31552 P_FMA4,
31553 P_XOP,
31554 P_PROC_XOP,
31555 P_FMA,
31556 P_PROC_FMA,
31557 P_AVX2,
31558 P_PROC_AVX2
31559 };
31560
31561 enum feature_priority priority = P_ZERO;
31562
31563 /* These are the target attribute strings for which a dispatcher is
31564 available, from fold_builtin_cpu. */
31565
31566 static struct _feature_list
31567 {
31568 const char *const name;
31569 const enum feature_priority priority;
31570 }
31571 const feature_list[] =
31572 {
31573 {"mmx", P_MMX},
31574 {"sse", P_SSE},
31575 {"sse2", P_SSE2},
31576 {"sse3", P_SSE3},
31577 {"sse4a", P_SSE4_A},
31578 {"ssse3", P_SSSE3},
31579 {"sse4.1", P_SSE4_1},
31580 {"sse4.2", P_SSE4_2},
31581 {"popcnt", P_POPCNT},
31582 {"avx", P_AVX},
31583 {"fma4", P_FMA4},
31584 {"xop", P_XOP},
31585 {"fma", P_FMA},
31586 {"avx2", P_AVX2}
31587 };
31588
31589
31590 static unsigned int NUM_FEATURES
31591 = sizeof (feature_list) / sizeof (struct _feature_list);
31592
31593 unsigned int i;
31594
31595 tree predicate_chain = NULL_TREE;
31596 tree predicate_decl, predicate_arg;
31597
31598 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31599 gcc_assert (attrs != NULL);
31600
31601 attrs = TREE_VALUE (TREE_VALUE (attrs));
31602
31603 gcc_assert (TREE_CODE (attrs) == STRING_CST);
31604 attrs_str = TREE_STRING_POINTER (attrs);
31605
31606 /* Return priority zero for default function. */
31607 if (strcmp (attrs_str, "default") == 0)
31608 return 0;
31609
31610 /* Handle arch= if specified. For priority, set it to be 1 more than
31611 the best instruction set the processor can handle. For instance, if
31612 there is a version for atom and a version for ssse3 (the highest ISA
31613 priority for atom), the atom version must be checked for dispatch
31614 before the ssse3 version. */
31615 if (strstr (attrs_str, "arch=") != NULL)
31616 {
31617 cl_target_option_save (&cur_target, &global_options);
31618 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
31619 &global_options_set);
31620
31621 gcc_assert (target_node);
31622 new_target = TREE_TARGET_OPTION (target_node);
31623 gcc_assert (new_target);
31624
31625 if (new_target->arch_specified && new_target->arch > 0)
31626 {
31627 switch (new_target->arch)
31628 {
31629 case PROCESSOR_CORE2:
31630 arg_str = "core2";
31631 priority = P_PROC_SSSE3;
31632 break;
31633 case PROCESSOR_NEHALEM:
31634 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
31635 arg_str = "westmere";
31636 else
31637 /* We translate "arch=corei7" and "arch=nehalem" to
31638 "corei7" so that it will be mapped to M_INTEL_COREI7
31639 as cpu type to cover all M_INTEL_COREI7_XXXs. */
31640 arg_str = "corei7";
31641 priority = P_PROC_SSE4_2;
31642 break;
31643 case PROCESSOR_SANDYBRIDGE:
31644 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
31645 arg_str = "ivybridge";
31646 else
31647 arg_str = "sandybridge";
31648 priority = P_PROC_AVX;
31649 break;
31650 case PROCESSOR_HASWELL:
31651 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
31652 arg_str = "broadwell";
31653 else
31654 arg_str = "haswell";
31655 priority = P_PROC_AVX2;
31656 break;
31657 case PROCESSOR_BONNELL:
31658 arg_str = "bonnell";
31659 priority = P_PROC_SSSE3;
31660 break;
31661 case PROCESSOR_SILVERMONT:
31662 arg_str = "silvermont";
31663 priority = P_PROC_SSE4_2;
31664 break;
31665 case PROCESSOR_AMDFAM10:
31666 arg_str = "amdfam10h";
31667 priority = P_PROC_SSE4_A;
31668 break;
31669 case PROCESSOR_BTVER1:
31670 arg_str = "btver1";
31671 priority = P_PROC_SSE4_A;
31672 break;
31673 case PROCESSOR_BTVER2:
31674 arg_str = "btver2";
31675 priority = P_PROC_AVX;
31676 break;
31677 case PROCESSOR_BDVER1:
31678 arg_str = "bdver1";
31679 priority = P_PROC_XOP;
31680 break;
31681 case PROCESSOR_BDVER2:
31682 arg_str = "bdver2";
31683 priority = P_PROC_FMA;
31684 break;
31685 case PROCESSOR_BDVER3:
31686 arg_str = "bdver3";
31687 priority = P_PROC_FMA;
31688 break;
31689 case PROCESSOR_BDVER4:
31690 arg_str = "bdver4";
31691 priority = P_PROC_AVX2;
31692 break;
31693 }
31694 }
31695
31696 cl_target_option_restore (&global_options, &cur_target);
31697
31698 if (predicate_list && arg_str == NULL)
31699 {
31700 error_at (DECL_SOURCE_LOCATION (decl),
31701 "No dispatcher found for the versioning attributes");
31702 return 0;
31703 }
31704
31705 if (predicate_list)
31706 {
31707 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
31708 /* For a C string literal the length includes the trailing NULL. */
31709 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
31710 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31711 predicate_chain);
31712 }
31713 }
31714
31715 /* Process feature name. */
31716 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
31717 strcpy (tok_str, attrs_str);
31718 token = strtok (tok_str, ",");
31719 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
31720
31721 while (token != NULL)
31722 {
31723 /* Do not process "arch=" */
31724 if (strncmp (token, "arch=", 5) == 0)
31725 {
31726 token = strtok (NULL, ",");
31727 continue;
31728 }
31729 for (i = 0; i < NUM_FEATURES; ++i)
31730 {
31731 if (strcmp (token, feature_list[i].name) == 0)
31732 {
31733 if (predicate_list)
31734 {
31735 predicate_arg = build_string_literal (
31736 strlen (feature_list[i].name) + 1,
31737 feature_list[i].name);
31738 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31739 predicate_chain);
31740 }
31741 /* Find the maximum priority feature. */
31742 if (feature_list[i].priority > priority)
31743 priority = feature_list[i].priority;
31744
31745 break;
31746 }
31747 }
31748 if (predicate_list && i == NUM_FEATURES)
31749 {
31750 error_at (DECL_SOURCE_LOCATION (decl),
31751 "No dispatcher found for %s", token);
31752 return 0;
31753 }
31754 token = strtok (NULL, ",");
31755 }
31756 free (tok_str);
31757
31758 if (predicate_list && predicate_chain == NULL_TREE)
31759 {
31760 error_at (DECL_SOURCE_LOCATION (decl),
31761 "No dispatcher found for the versioning attributes : %s",
31762 attrs_str);
31763 return 0;
31764 }
31765 else if (predicate_list)
31766 {
31767 predicate_chain = nreverse (predicate_chain);
31768 *predicate_list = predicate_chain;
31769 }
31770
31771 return priority;
31772 }
31773
31774 /* This compares the priority of target features in function DECL1
31775 and DECL2. It returns positive value if DECL1 is higher priority,
31776 negative value if DECL2 is higher priority and 0 if they are the
31777 same. */
31778
31779 static int
31780 ix86_compare_version_priority (tree decl1, tree decl2)
31781 {
31782 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
31783 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
31784
31785 return (int)priority1 - (int)priority2;
31786 }
31787
31788 /* V1 and V2 point to function versions with different priorities
31789 based on the target ISA. This function compares their priorities. */
31790
31791 static int
31792 feature_compare (const void *v1, const void *v2)
31793 {
31794 typedef struct _function_version_info
31795 {
31796 tree version_decl;
31797 tree predicate_chain;
31798 unsigned int dispatch_priority;
31799 } function_version_info;
31800
31801 const function_version_info c1 = *(const function_version_info *)v1;
31802 const function_version_info c2 = *(const function_version_info *)v2;
31803 return (c2.dispatch_priority - c1.dispatch_priority);
31804 }
31805
31806 /* This function generates the dispatch function for
31807 multi-versioned functions. DISPATCH_DECL is the function which will
31808 contain the dispatch logic. FNDECLS are the function choices for
31809 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
31810 in DISPATCH_DECL in which the dispatch code is generated. */
31811
31812 static int
31813 dispatch_function_versions (tree dispatch_decl,
31814 void *fndecls_p,
31815 basic_block *empty_bb)
31816 {
31817 tree default_decl;
31818 gimple ifunc_cpu_init_stmt;
31819 gimple_seq gseq;
31820 int ix;
31821 tree ele;
31822 vec<tree> *fndecls;
31823 unsigned int num_versions = 0;
31824 unsigned int actual_versions = 0;
31825 unsigned int i;
31826
31827 struct _function_version_info
31828 {
31829 tree version_decl;
31830 tree predicate_chain;
31831 unsigned int dispatch_priority;
31832 }*function_version_info;
31833
31834 gcc_assert (dispatch_decl != NULL
31835 && fndecls_p != NULL
31836 && empty_bb != NULL);
31837
31838 /*fndecls_p is actually a vector. */
31839 fndecls = static_cast<vec<tree> *> (fndecls_p);
31840
31841 /* At least one more version other than the default. */
31842 num_versions = fndecls->length ();
31843 gcc_assert (num_versions >= 2);
31844
31845 function_version_info = (struct _function_version_info *)
31846 XNEWVEC (struct _function_version_info, (num_versions - 1));
31847
31848 /* The first version in the vector is the default decl. */
31849 default_decl = (*fndecls)[0];
31850
31851 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
31852
31853 gseq = bb_seq (*empty_bb);
31854 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
31855 constructors, so explicity call __builtin_cpu_init here. */
31856 ifunc_cpu_init_stmt = gimple_build_call_vec (
31857 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
31858 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
31859 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
31860 set_bb_seq (*empty_bb, gseq);
31861
31862 pop_cfun ();
31863
31864
31865 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
31866 {
31867 tree version_decl = ele;
31868 tree predicate_chain = NULL_TREE;
31869 unsigned int priority;
31870 /* Get attribute string, parse it and find the right predicate decl.
31871 The predicate function could be a lengthy combination of many
31872 features, like arch-type and various isa-variants. */
31873 priority = get_builtin_code_for_version (version_decl,
31874 &predicate_chain);
31875
31876 if (predicate_chain == NULL_TREE)
31877 continue;
31878
31879 function_version_info [actual_versions].version_decl = version_decl;
31880 function_version_info [actual_versions].predicate_chain
31881 = predicate_chain;
31882 function_version_info [actual_versions].dispatch_priority = priority;
31883 actual_versions++;
31884 }
31885
31886 /* Sort the versions according to descending order of dispatch priority. The
31887 priority is based on the ISA. This is not a perfect solution. There
31888 could still be ambiguity. If more than one function version is suitable
31889 to execute, which one should be dispatched? In future, allow the user
31890 to specify a dispatch priority next to the version. */
31891 qsort (function_version_info, actual_versions,
31892 sizeof (struct _function_version_info), feature_compare);
31893
31894 for (i = 0; i < actual_versions; ++i)
31895 *empty_bb = add_condition_to_bb (dispatch_decl,
31896 function_version_info[i].version_decl,
31897 function_version_info[i].predicate_chain,
31898 *empty_bb);
31899
31900 /* dispatch default version at the end. */
31901 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
31902 NULL, *empty_bb);
31903
31904 free (function_version_info);
31905 return 0;
31906 }
31907
31908 /* Comparator function to be used in qsort routine to sort attribute
31909 specification strings to "target". */
31910
31911 static int
31912 attr_strcmp (const void *v1, const void *v2)
31913 {
31914 const char *c1 = *(char *const*)v1;
31915 const char *c2 = *(char *const*)v2;
31916 return strcmp (c1, c2);
31917 }
31918
31919 /* ARGLIST is the argument to target attribute. This function tokenizes
31920 the comma separated arguments, sorts them and returns a string which
31921 is a unique identifier for the comma separated arguments. It also
31922 replaces non-identifier characters "=,-" with "_". */
31923
31924 static char *
31925 sorted_attr_string (tree arglist)
31926 {
31927 tree arg;
31928 size_t str_len_sum = 0;
31929 char **args = NULL;
31930 char *attr_str, *ret_str;
31931 char *attr = NULL;
31932 unsigned int argnum = 1;
31933 unsigned int i;
31934
31935 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31936 {
31937 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31938 size_t len = strlen (str);
31939 str_len_sum += len + 1;
31940 if (arg != arglist)
31941 argnum++;
31942 for (i = 0; i < strlen (str); i++)
31943 if (str[i] == ',')
31944 argnum++;
31945 }
31946
31947 attr_str = XNEWVEC (char, str_len_sum);
31948 str_len_sum = 0;
31949 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31950 {
31951 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31952 size_t len = strlen (str);
31953 memcpy (attr_str + str_len_sum, str, len);
31954 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
31955 str_len_sum += len + 1;
31956 }
31957
31958 /* Replace "=,-" with "_". */
31959 for (i = 0; i < strlen (attr_str); i++)
31960 if (attr_str[i] == '=' || attr_str[i]== '-')
31961 attr_str[i] = '_';
31962
31963 if (argnum == 1)
31964 return attr_str;
31965
31966 args = XNEWVEC (char *, argnum);
31967
31968 i = 0;
31969 attr = strtok (attr_str, ",");
31970 while (attr != NULL)
31971 {
31972 args[i] = attr;
31973 i++;
31974 attr = strtok (NULL, ",");
31975 }
31976
31977 qsort (args, argnum, sizeof (char *), attr_strcmp);
31978
31979 ret_str = XNEWVEC (char, str_len_sum);
31980 str_len_sum = 0;
31981 for (i = 0; i < argnum; i++)
31982 {
31983 size_t len = strlen (args[i]);
31984 memcpy (ret_str + str_len_sum, args[i], len);
31985 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
31986 str_len_sum += len + 1;
31987 }
31988
31989 XDELETEVEC (args);
31990 XDELETEVEC (attr_str);
31991 return ret_str;
31992 }
31993
31994 /* This function changes the assembler name for functions that are
31995 versions. If DECL is a function version and has a "target"
31996 attribute, it appends the attribute string to its assembler name. */
31997
31998 static tree
31999 ix86_mangle_function_version_assembler_name (tree decl, tree id)
32000 {
32001 tree version_attr;
32002 const char *orig_name, *version_string;
32003 char *attr_str, *assembler_name;
32004
32005 if (DECL_DECLARED_INLINE_P (decl)
32006 && lookup_attribute ("gnu_inline",
32007 DECL_ATTRIBUTES (decl)))
32008 error_at (DECL_SOURCE_LOCATION (decl),
32009 "Function versions cannot be marked as gnu_inline,"
32010 " bodies have to be generated");
32011
32012 if (DECL_VIRTUAL_P (decl)
32013 || DECL_VINDEX (decl))
32014 sorry ("Virtual function multiversioning not supported");
32015
32016 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
32017
32018 /* target attribute string cannot be NULL. */
32019 gcc_assert (version_attr != NULL_TREE);
32020
32021 orig_name = IDENTIFIER_POINTER (id);
32022 version_string
32023 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
32024
32025 if (strcmp (version_string, "default") == 0)
32026 return id;
32027
32028 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
32029 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
32030
32031 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
32032
32033 /* Allow assembler name to be modified if already set. */
32034 if (DECL_ASSEMBLER_NAME_SET_P (decl))
32035 SET_DECL_RTL (decl, NULL);
32036
32037 tree ret = get_identifier (assembler_name);
32038 XDELETEVEC (attr_str);
32039 XDELETEVEC (assembler_name);
32040 return ret;
32041 }
32042
32043 /* This function returns true if FN1 and FN2 are versions of the same function,
32044 that is, the target strings of the function decls are different. This assumes
32045 that FN1 and FN2 have the same signature. */
32046
32047 static bool
32048 ix86_function_versions (tree fn1, tree fn2)
32049 {
32050 tree attr1, attr2;
32051 char *target1, *target2;
32052 bool result;
32053
32054 if (TREE_CODE (fn1) != FUNCTION_DECL
32055 || TREE_CODE (fn2) != FUNCTION_DECL)
32056 return false;
32057
32058 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
32059 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
32060
32061 /* At least one function decl should have the target attribute specified. */
32062 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
32063 return false;
32064
32065 /* Diagnose missing target attribute if one of the decls is already
32066 multi-versioned. */
32067 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
32068 {
32069 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
32070 {
32071 if (attr2 != NULL_TREE)
32072 {
32073 tree tem = fn1;
32074 fn1 = fn2;
32075 fn2 = tem;
32076 attr1 = attr2;
32077 }
32078 error_at (DECL_SOURCE_LOCATION (fn2),
32079 "missing %<target%> attribute for multi-versioned %D",
32080 fn2);
32081 inform (DECL_SOURCE_LOCATION (fn1),
32082 "previous declaration of %D", fn1);
32083 /* Prevent diagnosing of the same error multiple times. */
32084 DECL_ATTRIBUTES (fn2)
32085 = tree_cons (get_identifier ("target"),
32086 copy_node (TREE_VALUE (attr1)),
32087 DECL_ATTRIBUTES (fn2));
32088 }
32089 return false;
32090 }
32091
32092 target1 = sorted_attr_string (TREE_VALUE (attr1));
32093 target2 = sorted_attr_string (TREE_VALUE (attr2));
32094
32095 /* The sorted target strings must be different for fn1 and fn2
32096 to be versions. */
32097 if (strcmp (target1, target2) == 0)
32098 result = false;
32099 else
32100 result = true;
32101
32102 XDELETEVEC (target1);
32103 XDELETEVEC (target2);
32104
32105 return result;
32106 }
32107
32108 static tree
32109 ix86_mangle_decl_assembler_name (tree decl, tree id)
32110 {
32111 /* For function version, add the target suffix to the assembler name. */
32112 if (TREE_CODE (decl) == FUNCTION_DECL
32113 && DECL_FUNCTION_VERSIONED (decl))
32114 id = ix86_mangle_function_version_assembler_name (decl, id);
32115 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
32116 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
32117 #endif
32118
32119 return id;
32120 }
32121
32122 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
32123 is true, append the full path name of the source file. */
32124
32125 static char *
32126 make_name (tree decl, const char *suffix, bool make_unique)
32127 {
32128 char *global_var_name;
32129 int name_len;
32130 const char *name;
32131 const char *unique_name = NULL;
32132
32133 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
32134
32135 /* Get a unique name that can be used globally without any chances
32136 of collision at link time. */
32137 if (make_unique)
32138 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
32139
32140 name_len = strlen (name) + strlen (suffix) + 2;
32141
32142 if (make_unique)
32143 name_len += strlen (unique_name) + 1;
32144 global_var_name = XNEWVEC (char, name_len);
32145
32146 /* Use '.' to concatenate names as it is demangler friendly. */
32147 if (make_unique)
32148 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
32149 suffix);
32150 else
32151 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
32152
32153 return global_var_name;
32154 }
32155
32156 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
32157
32158 /* Make a dispatcher declaration for the multi-versioned function DECL.
32159 Calls to DECL function will be replaced with calls to the dispatcher
32160 by the front-end. Return the decl created. */
32161
32162 static tree
32163 make_dispatcher_decl (const tree decl)
32164 {
32165 tree func_decl;
32166 char *func_name;
32167 tree fn_type, func_type;
32168 bool is_uniq = false;
32169
32170 if (TREE_PUBLIC (decl) == 0)
32171 is_uniq = true;
32172
32173 func_name = make_name (decl, "ifunc", is_uniq);
32174
32175 fn_type = TREE_TYPE (decl);
32176 func_type = build_function_type (TREE_TYPE (fn_type),
32177 TYPE_ARG_TYPES (fn_type));
32178
32179 func_decl = build_fn_decl (func_name, func_type);
32180 XDELETEVEC (func_name);
32181 TREE_USED (func_decl) = 1;
32182 DECL_CONTEXT (func_decl) = NULL_TREE;
32183 DECL_INITIAL (func_decl) = error_mark_node;
32184 DECL_ARTIFICIAL (func_decl) = 1;
32185 /* Mark this func as external, the resolver will flip it again if
32186 it gets generated. */
32187 DECL_EXTERNAL (func_decl) = 1;
32188 /* This will be of type IFUNCs have to be externally visible. */
32189 TREE_PUBLIC (func_decl) = 1;
32190
32191 return func_decl;
32192 }
32193
32194 #endif
32195
32196 /* Returns true if decl is multi-versioned and DECL is the default function,
32197 that is it is not tagged with target specific optimization. */
32198
32199 static bool
32200 is_function_default_version (const tree decl)
32201 {
32202 if (TREE_CODE (decl) != FUNCTION_DECL
32203 || !DECL_FUNCTION_VERSIONED (decl))
32204 return false;
32205 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
32206 gcc_assert (attr);
32207 attr = TREE_VALUE (TREE_VALUE (attr));
32208 return (TREE_CODE (attr) == STRING_CST
32209 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
32210 }
32211
32212 /* Make a dispatcher declaration for the multi-versioned function DECL.
32213 Calls to DECL function will be replaced with calls to the dispatcher
32214 by the front-end. Returns the decl of the dispatcher function. */
32215
32216 static tree
32217 ix86_get_function_versions_dispatcher (void *decl)
32218 {
32219 tree fn = (tree) decl;
32220 struct cgraph_node *node = NULL;
32221 struct cgraph_node *default_node = NULL;
32222 struct cgraph_function_version_info *node_v = NULL;
32223 struct cgraph_function_version_info *first_v = NULL;
32224
32225 tree dispatch_decl = NULL;
32226
32227 struct cgraph_function_version_info *default_version_info = NULL;
32228
32229 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
32230
32231 node = cgraph_node::get (fn);
32232 gcc_assert (node != NULL);
32233
32234 node_v = node->function_version ();
32235 gcc_assert (node_v != NULL);
32236
32237 if (node_v->dispatcher_resolver != NULL)
32238 return node_v->dispatcher_resolver;
32239
32240 /* Find the default version and make it the first node. */
32241 first_v = node_v;
32242 /* Go to the beginning of the chain. */
32243 while (first_v->prev != NULL)
32244 first_v = first_v->prev;
32245 default_version_info = first_v;
32246 while (default_version_info != NULL)
32247 {
32248 if (is_function_default_version
32249 (default_version_info->this_node->decl))
32250 break;
32251 default_version_info = default_version_info->next;
32252 }
32253
32254 /* If there is no default node, just return NULL. */
32255 if (default_version_info == NULL)
32256 return NULL;
32257
32258 /* Make default info the first node. */
32259 if (first_v != default_version_info)
32260 {
32261 default_version_info->prev->next = default_version_info->next;
32262 if (default_version_info->next)
32263 default_version_info->next->prev = default_version_info->prev;
32264 first_v->prev = default_version_info;
32265 default_version_info->next = first_v;
32266 default_version_info->prev = NULL;
32267 }
32268
32269 default_node = default_version_info->this_node;
32270
32271 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
32272 if (targetm.has_ifunc_p ())
32273 {
32274 struct cgraph_function_version_info *it_v = NULL;
32275 struct cgraph_node *dispatcher_node = NULL;
32276 struct cgraph_function_version_info *dispatcher_version_info = NULL;
32277
32278 /* Right now, the dispatching is done via ifunc. */
32279 dispatch_decl = make_dispatcher_decl (default_node->decl);
32280
32281 dispatcher_node = cgraph_node::get_create (dispatch_decl);
32282 gcc_assert (dispatcher_node != NULL);
32283 dispatcher_node->dispatcher_function = 1;
32284 dispatcher_version_info
32285 = dispatcher_node->insert_new_function_version ();
32286 dispatcher_version_info->next = default_version_info;
32287 dispatcher_node->definition = 1;
32288
32289 /* Set the dispatcher for all the versions. */
32290 it_v = default_version_info;
32291 while (it_v != NULL)
32292 {
32293 it_v->dispatcher_resolver = dispatch_decl;
32294 it_v = it_v->next;
32295 }
32296 }
32297 else
32298 #endif
32299 {
32300 error_at (DECL_SOURCE_LOCATION (default_node->decl),
32301 "multiversioning needs ifunc which is not supported "
32302 "on this target");
32303 }
32304
32305 return dispatch_decl;
32306 }
32307
32308 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
32309 it to CHAIN. */
32310
32311 static tree
32312 make_attribute (const char *name, const char *arg_name, tree chain)
32313 {
32314 tree attr_name;
32315 tree attr_arg_name;
32316 tree attr_args;
32317 tree attr;
32318
32319 attr_name = get_identifier (name);
32320 attr_arg_name = build_string (strlen (arg_name), arg_name);
32321 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
32322 attr = tree_cons (attr_name, attr_args, chain);
32323 return attr;
32324 }
32325
32326 /* Make the resolver function decl to dispatch the versions of
32327 a multi-versioned function, DEFAULT_DECL. Create an
32328 empty basic block in the resolver and store the pointer in
32329 EMPTY_BB. Return the decl of the resolver function. */
32330
32331 static tree
32332 make_resolver_func (const tree default_decl,
32333 const tree dispatch_decl,
32334 basic_block *empty_bb)
32335 {
32336 char *resolver_name;
32337 tree decl, type, decl_name, t;
32338 bool is_uniq = false;
32339
32340 /* IFUNC's have to be globally visible. So, if the default_decl is
32341 not, then the name of the IFUNC should be made unique. */
32342 if (TREE_PUBLIC (default_decl) == 0)
32343 is_uniq = true;
32344
32345 /* Append the filename to the resolver function if the versions are
32346 not externally visible. This is because the resolver function has
32347 to be externally visible for the loader to find it. So, appending
32348 the filename will prevent conflicts with a resolver function from
32349 another module which is based on the same version name. */
32350 resolver_name = make_name (default_decl, "resolver", is_uniq);
32351
32352 /* The resolver function should return a (void *). */
32353 type = build_function_type_list (ptr_type_node, NULL_TREE);
32354
32355 decl = build_fn_decl (resolver_name, type);
32356 decl_name = get_identifier (resolver_name);
32357 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
32358
32359 DECL_NAME (decl) = decl_name;
32360 TREE_USED (decl) = 1;
32361 DECL_ARTIFICIAL (decl) = 1;
32362 DECL_IGNORED_P (decl) = 0;
32363 /* IFUNC resolvers have to be externally visible. */
32364 TREE_PUBLIC (decl) = 1;
32365 DECL_UNINLINABLE (decl) = 1;
32366
32367 /* Resolver is not external, body is generated. */
32368 DECL_EXTERNAL (decl) = 0;
32369 DECL_EXTERNAL (dispatch_decl) = 0;
32370
32371 DECL_CONTEXT (decl) = NULL_TREE;
32372 DECL_INITIAL (decl) = make_node (BLOCK);
32373 DECL_STATIC_CONSTRUCTOR (decl) = 0;
32374
32375 if (DECL_COMDAT_GROUP (default_decl)
32376 || TREE_PUBLIC (default_decl))
32377 {
32378 /* In this case, each translation unit with a call to this
32379 versioned function will put out a resolver. Ensure it
32380 is comdat to keep just one copy. */
32381 DECL_COMDAT (decl) = 1;
32382 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
32383 }
32384 /* Build result decl and add to function_decl. */
32385 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
32386 DECL_ARTIFICIAL (t) = 1;
32387 DECL_IGNORED_P (t) = 1;
32388 DECL_RESULT (decl) = t;
32389
32390 gimplify_function_tree (decl);
32391 push_cfun (DECL_STRUCT_FUNCTION (decl));
32392 *empty_bb = init_lowered_empty_function (decl, false);
32393
32394 cgraph_node::add_new_function (decl, true);
32395 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
32396
32397 pop_cfun ();
32398
32399 gcc_assert (dispatch_decl != NULL);
32400 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
32401 DECL_ATTRIBUTES (dispatch_decl)
32402 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
32403
32404 /* Create the alias for dispatch to resolver here. */
32405 /*cgraph_create_function_alias (dispatch_decl, decl);*/
32406 cgraph_node::create_same_body_alias (dispatch_decl, decl);
32407 XDELETEVEC (resolver_name);
32408 return decl;
32409 }
32410
32411 /* Generate the dispatching code body to dispatch multi-versioned function
32412 DECL. The target hook is called to process the "target" attributes and
32413 provide the code to dispatch the right function at run-time. NODE points
32414 to the dispatcher decl whose body will be created. */
32415
32416 static tree
32417 ix86_generate_version_dispatcher_body (void *node_p)
32418 {
32419 tree resolver_decl;
32420 basic_block empty_bb;
32421 tree default_ver_decl;
32422 struct cgraph_node *versn;
32423 struct cgraph_node *node;
32424
32425 struct cgraph_function_version_info *node_version_info = NULL;
32426 struct cgraph_function_version_info *versn_info = NULL;
32427
32428 node = (cgraph_node *)node_p;
32429
32430 node_version_info = node->function_version ();
32431 gcc_assert (node->dispatcher_function
32432 && node_version_info != NULL);
32433
32434 if (node_version_info->dispatcher_resolver)
32435 return node_version_info->dispatcher_resolver;
32436
32437 /* The first version in the chain corresponds to the default version. */
32438 default_ver_decl = node_version_info->next->this_node->decl;
32439
32440 /* node is going to be an alias, so remove the finalized bit. */
32441 node->definition = false;
32442
32443 resolver_decl = make_resolver_func (default_ver_decl,
32444 node->decl, &empty_bb);
32445
32446 node_version_info->dispatcher_resolver = resolver_decl;
32447
32448 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
32449
32450 auto_vec<tree, 2> fn_ver_vec;
32451
32452 for (versn_info = node_version_info->next; versn_info;
32453 versn_info = versn_info->next)
32454 {
32455 versn = versn_info->this_node;
32456 /* Check for virtual functions here again, as by this time it should
32457 have been determined if this function needs a vtable index or
32458 not. This happens for methods in derived classes that override
32459 virtual methods in base classes but are not explicitly marked as
32460 virtual. */
32461 if (DECL_VINDEX (versn->decl))
32462 sorry ("Virtual function multiversioning not supported");
32463
32464 fn_ver_vec.safe_push (versn->decl);
32465 }
32466
32467 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
32468 cgraph_edge::rebuild_edges ();
32469 pop_cfun ();
32470 return resolver_decl;
32471 }
32472 /* This builds the processor_model struct type defined in
32473 libgcc/config/i386/cpuinfo.c */
32474
32475 static tree
32476 build_processor_model_struct (void)
32477 {
32478 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
32479 "__cpu_features"};
32480 tree field = NULL_TREE, field_chain = NULL_TREE;
32481 int i;
32482 tree type = make_node (RECORD_TYPE);
32483
32484 /* The first 3 fields are unsigned int. */
32485 for (i = 0; i < 3; ++i)
32486 {
32487 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32488 get_identifier (field_name[i]), unsigned_type_node);
32489 if (field_chain != NULL_TREE)
32490 DECL_CHAIN (field) = field_chain;
32491 field_chain = field;
32492 }
32493
32494 /* The last field is an array of unsigned integers of size one. */
32495 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32496 get_identifier (field_name[3]),
32497 build_array_type (unsigned_type_node,
32498 build_index_type (size_one_node)));
32499 if (field_chain != NULL_TREE)
32500 DECL_CHAIN (field) = field_chain;
32501 field_chain = field;
32502
32503 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
32504 return type;
32505 }
32506
32507 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
32508
32509 static tree
32510 make_var_decl (tree type, const char *name)
32511 {
32512 tree new_decl;
32513
32514 new_decl = build_decl (UNKNOWN_LOCATION,
32515 VAR_DECL,
32516 get_identifier(name),
32517 type);
32518
32519 DECL_EXTERNAL (new_decl) = 1;
32520 TREE_STATIC (new_decl) = 1;
32521 TREE_PUBLIC (new_decl) = 1;
32522 DECL_INITIAL (new_decl) = 0;
32523 DECL_ARTIFICIAL (new_decl) = 0;
32524 DECL_PRESERVE_P (new_decl) = 1;
32525
32526 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
32527 assemble_variable (new_decl, 0, 0, 0);
32528
32529 return new_decl;
32530 }
32531
32532 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
32533 into an integer defined in libgcc/config/i386/cpuinfo.c */
32534
32535 static tree
32536 fold_builtin_cpu (tree fndecl, tree *args)
32537 {
32538 unsigned int i;
32539 enum ix86_builtins fn_code = (enum ix86_builtins)
32540 DECL_FUNCTION_CODE (fndecl);
32541 tree param_string_cst = NULL;
32542
32543 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
32544 enum processor_features
32545 {
32546 F_CMOV = 0,
32547 F_MMX,
32548 F_POPCNT,
32549 F_SSE,
32550 F_SSE2,
32551 F_SSE3,
32552 F_SSSE3,
32553 F_SSE4_1,
32554 F_SSE4_2,
32555 F_AVX,
32556 F_AVX2,
32557 F_SSE4_A,
32558 F_FMA4,
32559 F_XOP,
32560 F_FMA,
32561 F_MAX
32562 };
32563
32564 /* These are the values for vendor types and cpu types and subtypes
32565 in cpuinfo.c. Cpu types and subtypes should be subtracted by
32566 the corresponding start value. */
32567 enum processor_model
32568 {
32569 M_INTEL = 1,
32570 M_AMD,
32571 M_CPU_TYPE_START,
32572 M_INTEL_BONNELL,
32573 M_INTEL_CORE2,
32574 M_INTEL_COREI7,
32575 M_AMDFAM10H,
32576 M_AMDFAM15H,
32577 M_INTEL_SILVERMONT,
32578 M_AMD_BTVER1,
32579 M_AMD_BTVER2,
32580 M_CPU_SUBTYPE_START,
32581 M_INTEL_COREI7_NEHALEM,
32582 M_INTEL_COREI7_WESTMERE,
32583 M_INTEL_COREI7_SANDYBRIDGE,
32584 M_AMDFAM10H_BARCELONA,
32585 M_AMDFAM10H_SHANGHAI,
32586 M_AMDFAM10H_ISTANBUL,
32587 M_AMDFAM15H_BDVER1,
32588 M_AMDFAM15H_BDVER2,
32589 M_AMDFAM15H_BDVER3,
32590 M_AMDFAM15H_BDVER4,
32591 M_INTEL_COREI7_IVYBRIDGE,
32592 M_INTEL_COREI7_HASWELL
32593 };
32594
32595 static struct _arch_names_table
32596 {
32597 const char *const name;
32598 const enum processor_model model;
32599 }
32600 const arch_names_table[] =
32601 {
32602 {"amd", M_AMD},
32603 {"intel", M_INTEL},
32604 {"atom", M_INTEL_BONNELL},
32605 {"slm", M_INTEL_SILVERMONT},
32606 {"core2", M_INTEL_CORE2},
32607 {"corei7", M_INTEL_COREI7},
32608 {"nehalem", M_INTEL_COREI7_NEHALEM},
32609 {"westmere", M_INTEL_COREI7_WESTMERE},
32610 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
32611 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
32612 {"haswell", M_INTEL_COREI7_HASWELL},
32613 {"bonnell", M_INTEL_BONNELL},
32614 {"silvermont", M_INTEL_SILVERMONT},
32615 {"amdfam10h", M_AMDFAM10H},
32616 {"barcelona", M_AMDFAM10H_BARCELONA},
32617 {"shanghai", M_AMDFAM10H_SHANGHAI},
32618 {"istanbul", M_AMDFAM10H_ISTANBUL},
32619 {"btver1", M_AMD_BTVER1},
32620 {"amdfam15h", M_AMDFAM15H},
32621 {"bdver1", M_AMDFAM15H_BDVER1},
32622 {"bdver2", M_AMDFAM15H_BDVER2},
32623 {"bdver3", M_AMDFAM15H_BDVER3},
32624 {"bdver4", M_AMDFAM15H_BDVER4},
32625 {"btver2", M_AMD_BTVER2},
32626 };
32627
32628 static struct _isa_names_table
32629 {
32630 const char *const name;
32631 const enum processor_features feature;
32632 }
32633 const isa_names_table[] =
32634 {
32635 {"cmov", F_CMOV},
32636 {"mmx", F_MMX},
32637 {"popcnt", F_POPCNT},
32638 {"sse", F_SSE},
32639 {"sse2", F_SSE2},
32640 {"sse3", F_SSE3},
32641 {"ssse3", F_SSSE3},
32642 {"sse4a", F_SSE4_A},
32643 {"sse4.1", F_SSE4_1},
32644 {"sse4.2", F_SSE4_2},
32645 {"avx", F_AVX},
32646 {"fma4", F_FMA4},
32647 {"xop", F_XOP},
32648 {"fma", F_FMA},
32649 {"avx2", F_AVX2}
32650 };
32651
32652 tree __processor_model_type = build_processor_model_struct ();
32653 tree __cpu_model_var = make_var_decl (__processor_model_type,
32654 "__cpu_model");
32655
32656
32657 varpool_node::add (__cpu_model_var);
32658
32659 gcc_assert ((args != NULL) && (*args != NULL));
32660
32661 param_string_cst = *args;
32662 while (param_string_cst
32663 && TREE_CODE (param_string_cst) != STRING_CST)
32664 {
32665 /* *args must be a expr that can contain other EXPRS leading to a
32666 STRING_CST. */
32667 if (!EXPR_P (param_string_cst))
32668 {
32669 error ("Parameter to builtin must be a string constant or literal");
32670 return integer_zero_node;
32671 }
32672 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
32673 }
32674
32675 gcc_assert (param_string_cst);
32676
32677 if (fn_code == IX86_BUILTIN_CPU_IS)
32678 {
32679 tree ref;
32680 tree field;
32681 tree final;
32682
32683 unsigned int field_val = 0;
32684 unsigned int NUM_ARCH_NAMES
32685 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
32686
32687 for (i = 0; i < NUM_ARCH_NAMES; i++)
32688 if (strcmp (arch_names_table[i].name,
32689 TREE_STRING_POINTER (param_string_cst)) == 0)
32690 break;
32691
32692 if (i == NUM_ARCH_NAMES)
32693 {
32694 error ("Parameter to builtin not valid: %s",
32695 TREE_STRING_POINTER (param_string_cst));
32696 return integer_zero_node;
32697 }
32698
32699 field = TYPE_FIELDS (__processor_model_type);
32700 field_val = arch_names_table[i].model;
32701
32702 /* CPU types are stored in the next field. */
32703 if (field_val > M_CPU_TYPE_START
32704 && field_val < M_CPU_SUBTYPE_START)
32705 {
32706 field = DECL_CHAIN (field);
32707 field_val -= M_CPU_TYPE_START;
32708 }
32709
32710 /* CPU subtypes are stored in the next field. */
32711 if (field_val > M_CPU_SUBTYPE_START)
32712 {
32713 field = DECL_CHAIN ( DECL_CHAIN (field));
32714 field_val -= M_CPU_SUBTYPE_START;
32715 }
32716
32717 /* Get the appropriate field in __cpu_model. */
32718 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32719 field, NULL_TREE);
32720
32721 /* Check the value. */
32722 final = build2 (EQ_EXPR, unsigned_type_node, ref,
32723 build_int_cstu (unsigned_type_node, field_val));
32724 return build1 (CONVERT_EXPR, integer_type_node, final);
32725 }
32726 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32727 {
32728 tree ref;
32729 tree array_elt;
32730 tree field;
32731 tree final;
32732
32733 unsigned int field_val = 0;
32734 unsigned int NUM_ISA_NAMES
32735 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
32736
32737 for (i = 0; i < NUM_ISA_NAMES; i++)
32738 if (strcmp (isa_names_table[i].name,
32739 TREE_STRING_POINTER (param_string_cst)) == 0)
32740 break;
32741
32742 if (i == NUM_ISA_NAMES)
32743 {
32744 error ("Parameter to builtin not valid: %s",
32745 TREE_STRING_POINTER (param_string_cst));
32746 return integer_zero_node;
32747 }
32748
32749 field = TYPE_FIELDS (__processor_model_type);
32750 /* Get the last field, which is __cpu_features. */
32751 while (DECL_CHAIN (field))
32752 field = DECL_CHAIN (field);
32753
32754 /* Get the appropriate field: __cpu_model.__cpu_features */
32755 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32756 field, NULL_TREE);
32757
32758 /* Access the 0th element of __cpu_features array. */
32759 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
32760 integer_zero_node, NULL_TREE, NULL_TREE);
32761
32762 field_val = (1 << isa_names_table[i].feature);
32763 /* Return __cpu_model.__cpu_features[0] & field_val */
32764 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
32765 build_int_cstu (unsigned_type_node, field_val));
32766 return build1 (CONVERT_EXPR, integer_type_node, final);
32767 }
32768 gcc_unreachable ();
32769 }
32770
32771 static tree
32772 ix86_fold_builtin (tree fndecl, int n_args,
32773 tree *args, bool ignore ATTRIBUTE_UNUSED)
32774 {
32775 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
32776 {
32777 enum ix86_builtins fn_code = (enum ix86_builtins)
32778 DECL_FUNCTION_CODE (fndecl);
32779 if (fn_code == IX86_BUILTIN_CPU_IS
32780 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32781 {
32782 gcc_assert (n_args == 1);
32783 return fold_builtin_cpu (fndecl, args);
32784 }
32785 }
32786
32787 #ifdef SUBTARGET_FOLD_BUILTIN
32788 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
32789 #endif
32790
32791 return NULL_TREE;
32792 }
32793
32794 /* Make builtins to detect cpu type and features supported. NAME is
32795 the builtin name, CODE is the builtin code, and FTYPE is the function
32796 type of the builtin. */
32797
32798 static void
32799 make_cpu_type_builtin (const char* name, int code,
32800 enum ix86_builtin_func_type ftype, bool is_const)
32801 {
32802 tree decl;
32803 tree type;
32804
32805 type = ix86_get_builtin_func_type (ftype);
32806 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
32807 NULL, NULL_TREE);
32808 gcc_assert (decl != NULL_TREE);
32809 ix86_builtins[(int) code] = decl;
32810 TREE_READONLY (decl) = is_const;
32811 }
32812
32813 /* Make builtins to get CPU type and features supported. The created
32814 builtins are :
32815
32816 __builtin_cpu_init (), to detect cpu type and features,
32817 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
32818 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
32819 */
32820
32821 static void
32822 ix86_init_platform_type_builtins (void)
32823 {
32824 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
32825 INT_FTYPE_VOID, false);
32826 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
32827 INT_FTYPE_PCCHAR, true);
32828 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
32829 INT_FTYPE_PCCHAR, true);
32830 }
32831
32832 /* Internal method for ix86_init_builtins. */
32833
32834 static void
32835 ix86_init_builtins_va_builtins_abi (void)
32836 {
32837 tree ms_va_ref, sysv_va_ref;
32838 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
32839 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
32840 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
32841 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
32842
32843 if (!TARGET_64BIT)
32844 return;
32845 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
32846 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
32847 ms_va_ref = build_reference_type (ms_va_list_type_node);
32848 sysv_va_ref =
32849 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
32850
32851 fnvoid_va_end_ms =
32852 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32853 fnvoid_va_start_ms =
32854 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32855 fnvoid_va_end_sysv =
32856 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
32857 fnvoid_va_start_sysv =
32858 build_varargs_function_type_list (void_type_node, sysv_va_ref,
32859 NULL_TREE);
32860 fnvoid_va_copy_ms =
32861 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
32862 NULL_TREE);
32863 fnvoid_va_copy_sysv =
32864 build_function_type_list (void_type_node, sysv_va_ref,
32865 sysv_va_ref, NULL_TREE);
32866
32867 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
32868 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
32869 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
32870 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
32871 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
32872 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
32873 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
32874 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32875 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
32876 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32877 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
32878 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32879 }
32880
32881 static void
32882 ix86_init_builtin_types (void)
32883 {
32884 tree float128_type_node, float80_type_node;
32885
32886 /* The __float80 type. */
32887 float80_type_node = long_double_type_node;
32888 if (TYPE_MODE (float80_type_node) != XFmode)
32889 {
32890 /* The __float80 type. */
32891 float80_type_node = make_node (REAL_TYPE);
32892
32893 TYPE_PRECISION (float80_type_node) = 80;
32894 layout_type (float80_type_node);
32895 }
32896 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
32897
32898 /* The __float128 type. */
32899 float128_type_node = make_node (REAL_TYPE);
32900 TYPE_PRECISION (float128_type_node) = 128;
32901 layout_type (float128_type_node);
32902 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
32903
32904 /* This macro is built by i386-builtin-types.awk. */
32905 DEFINE_BUILTIN_PRIMITIVE_TYPES;
32906 }
32907
32908 static void
32909 ix86_init_builtins (void)
32910 {
32911 tree t;
32912
32913 ix86_init_builtin_types ();
32914
32915 /* Builtins to get CPU type and features. */
32916 ix86_init_platform_type_builtins ();
32917
32918 /* TFmode support builtins. */
32919 def_builtin_const (0, "__builtin_infq",
32920 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
32921 def_builtin_const (0, "__builtin_huge_valq",
32922 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
32923
32924 /* We will expand them to normal call if SSE isn't available since
32925 they are used by libgcc. */
32926 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
32927 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
32928 BUILT_IN_MD, "__fabstf2", NULL_TREE);
32929 TREE_READONLY (t) = 1;
32930 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
32931
32932 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
32933 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
32934 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
32935 TREE_READONLY (t) = 1;
32936 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
32937
32938 ix86_init_tm_builtins ();
32939 ix86_init_mmx_sse_builtins ();
32940
32941 if (TARGET_LP64)
32942 ix86_init_builtins_va_builtins_abi ();
32943
32944 #ifdef SUBTARGET_INIT_BUILTINS
32945 SUBTARGET_INIT_BUILTINS;
32946 #endif
32947 }
32948
32949 /* Return the ix86 builtin for CODE. */
32950
32951 static tree
32952 ix86_builtin_decl (unsigned code, bool)
32953 {
32954 if (code >= IX86_BUILTIN_MAX)
32955 return error_mark_node;
32956
32957 return ix86_builtins[code];
32958 }
32959
32960 /* Errors in the source file can cause expand_expr to return const0_rtx
32961 where we expect a vector. To avoid crashing, use one of the vector
32962 clear instructions. */
32963 static rtx
32964 safe_vector_operand (rtx x, enum machine_mode mode)
32965 {
32966 if (x == const0_rtx)
32967 x = CONST0_RTX (mode);
32968 return x;
32969 }
32970
32971 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
32972
32973 static rtx
32974 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
32975 {
32976 rtx pat;
32977 tree arg0 = CALL_EXPR_ARG (exp, 0);
32978 tree arg1 = CALL_EXPR_ARG (exp, 1);
32979 rtx op0 = expand_normal (arg0);
32980 rtx op1 = expand_normal (arg1);
32981 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32982 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
32983 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
32984
32985 if (VECTOR_MODE_P (mode0))
32986 op0 = safe_vector_operand (op0, mode0);
32987 if (VECTOR_MODE_P (mode1))
32988 op1 = safe_vector_operand (op1, mode1);
32989
32990 if (optimize || !target
32991 || GET_MODE (target) != tmode
32992 || !insn_data[icode].operand[0].predicate (target, tmode))
32993 target = gen_reg_rtx (tmode);
32994
32995 if (GET_MODE (op1) == SImode && mode1 == TImode)
32996 {
32997 rtx x = gen_reg_rtx (V4SImode);
32998 emit_insn (gen_sse2_loadd (x, op1));
32999 op1 = gen_lowpart (TImode, x);
33000 }
33001
33002 if (!insn_data[icode].operand[1].predicate (op0, mode0))
33003 op0 = copy_to_mode_reg (mode0, op0);
33004 if (!insn_data[icode].operand[2].predicate (op1, mode1))
33005 op1 = copy_to_mode_reg (mode1, op1);
33006
33007 pat = GEN_FCN (icode) (target, op0, op1);
33008 if (! pat)
33009 return 0;
33010
33011 emit_insn (pat);
33012
33013 return target;
33014 }
33015
33016 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
33017
33018 static rtx
33019 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
33020 enum ix86_builtin_func_type m_type,
33021 enum rtx_code sub_code)
33022 {
33023 rtx pat;
33024 int i;
33025 int nargs;
33026 bool comparison_p = false;
33027 bool tf_p = false;
33028 bool last_arg_constant = false;
33029 int num_memory = 0;
33030 struct {
33031 rtx op;
33032 enum machine_mode mode;
33033 } args[4];
33034
33035 enum machine_mode tmode = insn_data[icode].operand[0].mode;
33036
33037 switch (m_type)
33038 {
33039 case MULTI_ARG_4_DF2_DI_I:
33040 case MULTI_ARG_4_DF2_DI_I1:
33041 case MULTI_ARG_4_SF2_SI_I:
33042 case MULTI_ARG_4_SF2_SI_I1:
33043 nargs = 4;
33044 last_arg_constant = true;
33045 break;
33046
33047 case MULTI_ARG_3_SF:
33048 case MULTI_ARG_3_DF:
33049 case MULTI_ARG_3_SF2:
33050 case MULTI_ARG_3_DF2:
33051 case MULTI_ARG_3_DI:
33052 case MULTI_ARG_3_SI:
33053 case MULTI_ARG_3_SI_DI:
33054 case MULTI_ARG_3_HI:
33055 case MULTI_ARG_3_HI_SI:
33056 case MULTI_ARG_3_QI:
33057 case MULTI_ARG_3_DI2:
33058 case MULTI_ARG_3_SI2:
33059 case MULTI_ARG_3_HI2:
33060 case MULTI_ARG_3_QI2:
33061 nargs = 3;
33062 break;
33063
33064 case MULTI_ARG_2_SF:
33065 case MULTI_ARG_2_DF:
33066 case MULTI_ARG_2_DI:
33067 case MULTI_ARG_2_SI:
33068 case MULTI_ARG_2_HI:
33069 case MULTI_ARG_2_QI:
33070 nargs = 2;
33071 break;
33072
33073 case MULTI_ARG_2_DI_IMM:
33074 case MULTI_ARG_2_SI_IMM:
33075 case MULTI_ARG_2_HI_IMM:
33076 case MULTI_ARG_2_QI_IMM:
33077 nargs = 2;
33078 last_arg_constant = true;
33079 break;
33080
33081 case MULTI_ARG_1_SF:
33082 case MULTI_ARG_1_DF:
33083 case MULTI_ARG_1_SF2:
33084 case MULTI_ARG_1_DF2:
33085 case MULTI_ARG_1_DI:
33086 case MULTI_ARG_1_SI:
33087 case MULTI_ARG_1_HI:
33088 case MULTI_ARG_1_QI:
33089 case MULTI_ARG_1_SI_DI:
33090 case MULTI_ARG_1_HI_DI:
33091 case MULTI_ARG_1_HI_SI:
33092 case MULTI_ARG_1_QI_DI:
33093 case MULTI_ARG_1_QI_SI:
33094 case MULTI_ARG_1_QI_HI:
33095 nargs = 1;
33096 break;
33097
33098 case MULTI_ARG_2_DI_CMP:
33099 case MULTI_ARG_2_SI_CMP:
33100 case MULTI_ARG_2_HI_CMP:
33101 case MULTI_ARG_2_QI_CMP:
33102 nargs = 2;
33103 comparison_p = true;
33104 break;
33105
33106 case MULTI_ARG_2_SF_TF:
33107 case MULTI_ARG_2_DF_TF:
33108 case MULTI_ARG_2_DI_TF:
33109 case MULTI_ARG_2_SI_TF:
33110 case MULTI_ARG_2_HI_TF:
33111 case MULTI_ARG_2_QI_TF:
33112 nargs = 2;
33113 tf_p = true;
33114 break;
33115
33116 default:
33117 gcc_unreachable ();
33118 }
33119
33120 if (optimize || !target
33121 || GET_MODE (target) != tmode
33122 || !insn_data[icode].operand[0].predicate (target, tmode))
33123 target = gen_reg_rtx (tmode);
33124
33125 gcc_assert (nargs <= 4);
33126
33127 for (i = 0; i < nargs; i++)
33128 {
33129 tree arg = CALL_EXPR_ARG (exp, i);
33130 rtx op = expand_normal (arg);
33131 int adjust = (comparison_p) ? 1 : 0;
33132 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
33133
33134 if (last_arg_constant && i == nargs - 1)
33135 {
33136 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
33137 {
33138 enum insn_code new_icode = icode;
33139 switch (icode)
33140 {
33141 case CODE_FOR_xop_vpermil2v2df3:
33142 case CODE_FOR_xop_vpermil2v4sf3:
33143 case CODE_FOR_xop_vpermil2v4df3:
33144 case CODE_FOR_xop_vpermil2v8sf3:
33145 error ("the last argument must be a 2-bit immediate");
33146 return gen_reg_rtx (tmode);
33147 case CODE_FOR_xop_rotlv2di3:
33148 new_icode = CODE_FOR_rotlv2di3;
33149 goto xop_rotl;
33150 case CODE_FOR_xop_rotlv4si3:
33151 new_icode = CODE_FOR_rotlv4si3;
33152 goto xop_rotl;
33153 case CODE_FOR_xop_rotlv8hi3:
33154 new_icode = CODE_FOR_rotlv8hi3;
33155 goto xop_rotl;
33156 case CODE_FOR_xop_rotlv16qi3:
33157 new_icode = CODE_FOR_rotlv16qi3;
33158 xop_rotl:
33159 if (CONST_INT_P (op))
33160 {
33161 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
33162 op = GEN_INT (INTVAL (op) & mask);
33163 gcc_checking_assert
33164 (insn_data[icode].operand[i + 1].predicate (op, mode));
33165 }
33166 else
33167 {
33168 gcc_checking_assert
33169 (nargs == 2
33170 && insn_data[new_icode].operand[0].mode == tmode
33171 && insn_data[new_icode].operand[1].mode == tmode
33172 && insn_data[new_icode].operand[2].mode == mode
33173 && insn_data[new_icode].operand[0].predicate
33174 == insn_data[icode].operand[0].predicate
33175 && insn_data[new_icode].operand[1].predicate
33176 == insn_data[icode].operand[1].predicate);
33177 icode = new_icode;
33178 goto non_constant;
33179 }
33180 break;
33181 default:
33182 gcc_unreachable ();
33183 }
33184 }
33185 }
33186 else
33187 {
33188 non_constant:
33189 if (VECTOR_MODE_P (mode))
33190 op = safe_vector_operand (op, mode);
33191
33192 /* If we aren't optimizing, only allow one memory operand to be
33193 generated. */
33194 if (memory_operand (op, mode))
33195 num_memory++;
33196
33197 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
33198
33199 if (optimize
33200 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
33201 || num_memory > 1)
33202 op = force_reg (mode, op);
33203 }
33204
33205 args[i].op = op;
33206 args[i].mode = mode;
33207 }
33208
33209 switch (nargs)
33210 {
33211 case 1:
33212 pat = GEN_FCN (icode) (target, args[0].op);
33213 break;
33214
33215 case 2:
33216 if (tf_p)
33217 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
33218 GEN_INT ((int)sub_code));
33219 else if (! comparison_p)
33220 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
33221 else
33222 {
33223 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
33224 args[0].op,
33225 args[1].op);
33226
33227 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
33228 }
33229 break;
33230
33231 case 3:
33232 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
33233 break;
33234
33235 case 4:
33236 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
33237 break;
33238
33239 default:
33240 gcc_unreachable ();
33241 }
33242
33243 if (! pat)
33244 return 0;
33245
33246 emit_insn (pat);
33247 return target;
33248 }
33249
33250 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
33251 insns with vec_merge. */
33252
33253 static rtx
33254 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
33255 rtx target)
33256 {
33257 rtx pat;
33258 tree arg0 = CALL_EXPR_ARG (exp, 0);
33259 rtx op1, op0 = expand_normal (arg0);
33260 enum machine_mode tmode = insn_data[icode].operand[0].mode;
33261 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
33262
33263 if (optimize || !target
33264 || GET_MODE (target) != tmode
33265 || !insn_data[icode].operand[0].predicate (target, tmode))
33266 target = gen_reg_rtx (tmode);
33267
33268 if (VECTOR_MODE_P (mode0))
33269 op0 = safe_vector_operand (op0, mode0);
33270
33271 if ((optimize && !register_operand (op0, mode0))
33272 || !insn_data[icode].operand[1].predicate (op0, mode0))
33273 op0 = copy_to_mode_reg (mode0, op0);
33274
33275 op1 = op0;
33276 if (!insn_data[icode].operand[2].predicate (op1, mode0))
33277 op1 = copy_to_mode_reg (mode0, op1);
33278
33279 pat = GEN_FCN (icode) (target, op0, op1);
33280 if (! pat)
33281 return 0;
33282 emit_insn (pat);
33283 return target;
33284 }
33285
33286 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
33287
33288 static rtx
33289 ix86_expand_sse_compare (const struct builtin_description *d,
33290 tree exp, rtx target, bool swap)
33291 {
33292 rtx pat;
33293 tree arg0 = CALL_EXPR_ARG (exp, 0);
33294 tree arg1 = CALL_EXPR_ARG (exp, 1);
33295 rtx op0 = expand_normal (arg0);
33296 rtx op1 = expand_normal (arg1);
33297 rtx op2;
33298 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33299 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33300 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33301 enum rtx_code comparison = d->comparison;
33302
33303 if (VECTOR_MODE_P (mode0))
33304 op0 = safe_vector_operand (op0, mode0);
33305 if (VECTOR_MODE_P (mode1))
33306 op1 = safe_vector_operand (op1, mode1);
33307
33308 /* Swap operands if we have a comparison that isn't available in
33309 hardware. */
33310 if (swap)
33311 {
33312 rtx tmp = gen_reg_rtx (mode1);
33313 emit_move_insn (tmp, op1);
33314 op1 = op0;
33315 op0 = tmp;
33316 }
33317
33318 if (optimize || !target
33319 || GET_MODE (target) != tmode
33320 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33321 target = gen_reg_rtx (tmode);
33322
33323 if ((optimize && !register_operand (op0, mode0))
33324 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
33325 op0 = copy_to_mode_reg (mode0, op0);
33326 if ((optimize && !register_operand (op1, mode1))
33327 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
33328 op1 = copy_to_mode_reg (mode1, op1);
33329
33330 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
33331 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33332 if (! pat)
33333 return 0;
33334 emit_insn (pat);
33335 return target;
33336 }
33337
33338 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
33339
33340 static rtx
33341 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
33342 rtx target)
33343 {
33344 rtx pat;
33345 tree arg0 = CALL_EXPR_ARG (exp, 0);
33346 tree arg1 = CALL_EXPR_ARG (exp, 1);
33347 rtx op0 = expand_normal (arg0);
33348 rtx op1 = expand_normal (arg1);
33349 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33350 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33351 enum rtx_code comparison = d->comparison;
33352
33353 if (VECTOR_MODE_P (mode0))
33354 op0 = safe_vector_operand (op0, mode0);
33355 if (VECTOR_MODE_P (mode1))
33356 op1 = safe_vector_operand (op1, mode1);
33357
33358 /* Swap operands if we have a comparison that isn't available in
33359 hardware. */
33360 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
33361 {
33362 rtx tmp = op1;
33363 op1 = op0;
33364 op0 = tmp;
33365 }
33366
33367 target = gen_reg_rtx (SImode);
33368 emit_move_insn (target, const0_rtx);
33369 target = gen_rtx_SUBREG (QImode, target, 0);
33370
33371 if ((optimize && !register_operand (op0, mode0))
33372 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33373 op0 = copy_to_mode_reg (mode0, op0);
33374 if ((optimize && !register_operand (op1, mode1))
33375 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33376 op1 = copy_to_mode_reg (mode1, op1);
33377
33378 pat = GEN_FCN (d->icode) (op0, op1);
33379 if (! pat)
33380 return 0;
33381 emit_insn (pat);
33382 emit_insn (gen_rtx_SET (VOIDmode,
33383 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33384 gen_rtx_fmt_ee (comparison, QImode,
33385 SET_DEST (pat),
33386 const0_rtx)));
33387
33388 return SUBREG_REG (target);
33389 }
33390
33391 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
33392
33393 static rtx
33394 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
33395 rtx target)
33396 {
33397 rtx pat;
33398 tree arg0 = CALL_EXPR_ARG (exp, 0);
33399 rtx op1, op0 = expand_normal (arg0);
33400 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33401 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33402
33403 if (optimize || target == 0
33404 || GET_MODE (target) != tmode
33405 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33406 target = gen_reg_rtx (tmode);
33407
33408 if (VECTOR_MODE_P (mode0))
33409 op0 = safe_vector_operand (op0, mode0);
33410
33411 if ((optimize && !register_operand (op0, mode0))
33412 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33413 op0 = copy_to_mode_reg (mode0, op0);
33414
33415 op1 = GEN_INT (d->comparison);
33416
33417 pat = GEN_FCN (d->icode) (target, op0, op1);
33418 if (! pat)
33419 return 0;
33420 emit_insn (pat);
33421 return target;
33422 }
33423
33424 static rtx
33425 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
33426 tree exp, rtx target)
33427 {
33428 rtx pat;
33429 tree arg0 = CALL_EXPR_ARG (exp, 0);
33430 tree arg1 = CALL_EXPR_ARG (exp, 1);
33431 rtx op0 = expand_normal (arg0);
33432 rtx op1 = expand_normal (arg1);
33433 rtx op2;
33434 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33435 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33436 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33437
33438 if (optimize || target == 0
33439 || GET_MODE (target) != tmode
33440 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33441 target = gen_reg_rtx (tmode);
33442
33443 op0 = safe_vector_operand (op0, mode0);
33444 op1 = safe_vector_operand (op1, mode1);
33445
33446 if ((optimize && !register_operand (op0, mode0))
33447 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33448 op0 = copy_to_mode_reg (mode0, op0);
33449 if ((optimize && !register_operand (op1, mode1))
33450 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33451 op1 = copy_to_mode_reg (mode1, op1);
33452
33453 op2 = GEN_INT (d->comparison);
33454
33455 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33456 if (! pat)
33457 return 0;
33458 emit_insn (pat);
33459 return target;
33460 }
33461
33462 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
33463
33464 static rtx
33465 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
33466 rtx target)
33467 {
33468 rtx pat;
33469 tree arg0 = CALL_EXPR_ARG (exp, 0);
33470 tree arg1 = CALL_EXPR_ARG (exp, 1);
33471 rtx op0 = expand_normal (arg0);
33472 rtx op1 = expand_normal (arg1);
33473 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33474 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33475 enum rtx_code comparison = d->comparison;
33476
33477 if (VECTOR_MODE_P (mode0))
33478 op0 = safe_vector_operand (op0, mode0);
33479 if (VECTOR_MODE_P (mode1))
33480 op1 = safe_vector_operand (op1, mode1);
33481
33482 target = gen_reg_rtx (SImode);
33483 emit_move_insn (target, const0_rtx);
33484 target = gen_rtx_SUBREG (QImode, target, 0);
33485
33486 if ((optimize && !register_operand (op0, mode0))
33487 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33488 op0 = copy_to_mode_reg (mode0, op0);
33489 if ((optimize && !register_operand (op1, mode1))
33490 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33491 op1 = copy_to_mode_reg (mode1, op1);
33492
33493 pat = GEN_FCN (d->icode) (op0, op1);
33494 if (! pat)
33495 return 0;
33496 emit_insn (pat);
33497 emit_insn (gen_rtx_SET (VOIDmode,
33498 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33499 gen_rtx_fmt_ee (comparison, QImode,
33500 SET_DEST (pat),
33501 const0_rtx)));
33502
33503 return SUBREG_REG (target);
33504 }
33505
33506 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
33507
33508 static rtx
33509 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
33510 tree exp, rtx target)
33511 {
33512 rtx pat;
33513 tree arg0 = CALL_EXPR_ARG (exp, 0);
33514 tree arg1 = CALL_EXPR_ARG (exp, 1);
33515 tree arg2 = CALL_EXPR_ARG (exp, 2);
33516 tree arg3 = CALL_EXPR_ARG (exp, 3);
33517 tree arg4 = CALL_EXPR_ARG (exp, 4);
33518 rtx scratch0, scratch1;
33519 rtx op0 = expand_normal (arg0);
33520 rtx op1 = expand_normal (arg1);
33521 rtx op2 = expand_normal (arg2);
33522 rtx op3 = expand_normal (arg3);
33523 rtx op4 = expand_normal (arg4);
33524 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
33525
33526 tmode0 = insn_data[d->icode].operand[0].mode;
33527 tmode1 = insn_data[d->icode].operand[1].mode;
33528 modev2 = insn_data[d->icode].operand[2].mode;
33529 modei3 = insn_data[d->icode].operand[3].mode;
33530 modev4 = insn_data[d->icode].operand[4].mode;
33531 modei5 = insn_data[d->icode].operand[5].mode;
33532 modeimm = insn_data[d->icode].operand[6].mode;
33533
33534 if (VECTOR_MODE_P (modev2))
33535 op0 = safe_vector_operand (op0, modev2);
33536 if (VECTOR_MODE_P (modev4))
33537 op2 = safe_vector_operand (op2, modev4);
33538
33539 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33540 op0 = copy_to_mode_reg (modev2, op0);
33541 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
33542 op1 = copy_to_mode_reg (modei3, op1);
33543 if ((optimize && !register_operand (op2, modev4))
33544 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
33545 op2 = copy_to_mode_reg (modev4, op2);
33546 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
33547 op3 = copy_to_mode_reg (modei5, op3);
33548
33549 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
33550 {
33551 error ("the fifth argument must be an 8-bit immediate");
33552 return const0_rtx;
33553 }
33554
33555 if (d->code == IX86_BUILTIN_PCMPESTRI128)
33556 {
33557 if (optimize || !target
33558 || GET_MODE (target) != tmode0
33559 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33560 target = gen_reg_rtx (tmode0);
33561
33562 scratch1 = gen_reg_rtx (tmode1);
33563
33564 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
33565 }
33566 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
33567 {
33568 if (optimize || !target
33569 || GET_MODE (target) != tmode1
33570 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33571 target = gen_reg_rtx (tmode1);
33572
33573 scratch0 = gen_reg_rtx (tmode0);
33574
33575 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
33576 }
33577 else
33578 {
33579 gcc_assert (d->flag);
33580
33581 scratch0 = gen_reg_rtx (tmode0);
33582 scratch1 = gen_reg_rtx (tmode1);
33583
33584 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
33585 }
33586
33587 if (! pat)
33588 return 0;
33589
33590 emit_insn (pat);
33591
33592 if (d->flag)
33593 {
33594 target = gen_reg_rtx (SImode);
33595 emit_move_insn (target, const0_rtx);
33596 target = gen_rtx_SUBREG (QImode, target, 0);
33597
33598 emit_insn
33599 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33600 gen_rtx_fmt_ee (EQ, QImode,
33601 gen_rtx_REG ((enum machine_mode) d->flag,
33602 FLAGS_REG),
33603 const0_rtx)));
33604 return SUBREG_REG (target);
33605 }
33606 else
33607 return target;
33608 }
33609
33610
33611 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
33612
33613 static rtx
33614 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
33615 tree exp, rtx target)
33616 {
33617 rtx pat;
33618 tree arg0 = CALL_EXPR_ARG (exp, 0);
33619 tree arg1 = CALL_EXPR_ARG (exp, 1);
33620 tree arg2 = CALL_EXPR_ARG (exp, 2);
33621 rtx scratch0, scratch1;
33622 rtx op0 = expand_normal (arg0);
33623 rtx op1 = expand_normal (arg1);
33624 rtx op2 = expand_normal (arg2);
33625 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
33626
33627 tmode0 = insn_data[d->icode].operand[0].mode;
33628 tmode1 = insn_data[d->icode].operand[1].mode;
33629 modev2 = insn_data[d->icode].operand[2].mode;
33630 modev3 = insn_data[d->icode].operand[3].mode;
33631 modeimm = insn_data[d->icode].operand[4].mode;
33632
33633 if (VECTOR_MODE_P (modev2))
33634 op0 = safe_vector_operand (op0, modev2);
33635 if (VECTOR_MODE_P (modev3))
33636 op1 = safe_vector_operand (op1, modev3);
33637
33638 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33639 op0 = copy_to_mode_reg (modev2, op0);
33640 if ((optimize && !register_operand (op1, modev3))
33641 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
33642 op1 = copy_to_mode_reg (modev3, op1);
33643
33644 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
33645 {
33646 error ("the third argument must be an 8-bit immediate");
33647 return const0_rtx;
33648 }
33649
33650 if (d->code == IX86_BUILTIN_PCMPISTRI128)
33651 {
33652 if (optimize || !target
33653 || GET_MODE (target) != tmode0
33654 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33655 target = gen_reg_rtx (tmode0);
33656
33657 scratch1 = gen_reg_rtx (tmode1);
33658
33659 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
33660 }
33661 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
33662 {
33663 if (optimize || !target
33664 || GET_MODE (target) != tmode1
33665 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33666 target = gen_reg_rtx (tmode1);
33667
33668 scratch0 = gen_reg_rtx (tmode0);
33669
33670 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
33671 }
33672 else
33673 {
33674 gcc_assert (d->flag);
33675
33676 scratch0 = gen_reg_rtx (tmode0);
33677 scratch1 = gen_reg_rtx (tmode1);
33678
33679 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
33680 }
33681
33682 if (! pat)
33683 return 0;
33684
33685 emit_insn (pat);
33686
33687 if (d->flag)
33688 {
33689 target = gen_reg_rtx (SImode);
33690 emit_move_insn (target, const0_rtx);
33691 target = gen_rtx_SUBREG (QImode, target, 0);
33692
33693 emit_insn
33694 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33695 gen_rtx_fmt_ee (EQ, QImode,
33696 gen_rtx_REG ((enum machine_mode) d->flag,
33697 FLAGS_REG),
33698 const0_rtx)));
33699 return SUBREG_REG (target);
33700 }
33701 else
33702 return target;
33703 }
33704
33705 /* Subroutine of ix86_expand_builtin to take care of insns with
33706 variable number of operands. */
33707
33708 static rtx
33709 ix86_expand_args_builtin (const struct builtin_description *d,
33710 tree exp, rtx target)
33711 {
33712 rtx pat, real_target;
33713 unsigned int i, nargs;
33714 unsigned int nargs_constant = 0;
33715 unsigned int mask_pos = 0;
33716 int num_memory = 0;
33717 struct
33718 {
33719 rtx op;
33720 enum machine_mode mode;
33721 } args[6];
33722 bool last_arg_count = false;
33723 enum insn_code icode = d->icode;
33724 const struct insn_data_d *insn_p = &insn_data[icode];
33725 enum machine_mode tmode = insn_p->operand[0].mode;
33726 enum machine_mode rmode = VOIDmode;
33727 bool swap = false;
33728 enum rtx_code comparison = d->comparison;
33729
33730 switch ((enum ix86_builtin_func_type) d->flag)
33731 {
33732 case V2DF_FTYPE_V2DF_ROUND:
33733 case V4DF_FTYPE_V4DF_ROUND:
33734 case V4SF_FTYPE_V4SF_ROUND:
33735 case V8SF_FTYPE_V8SF_ROUND:
33736 case V4SI_FTYPE_V4SF_ROUND:
33737 case V8SI_FTYPE_V8SF_ROUND:
33738 return ix86_expand_sse_round (d, exp, target);
33739 case V4SI_FTYPE_V2DF_V2DF_ROUND:
33740 case V8SI_FTYPE_V4DF_V4DF_ROUND:
33741 case V16SI_FTYPE_V8DF_V8DF_ROUND:
33742 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
33743 case INT_FTYPE_V8SF_V8SF_PTEST:
33744 case INT_FTYPE_V4DI_V4DI_PTEST:
33745 case INT_FTYPE_V4DF_V4DF_PTEST:
33746 case INT_FTYPE_V4SF_V4SF_PTEST:
33747 case INT_FTYPE_V2DI_V2DI_PTEST:
33748 case INT_FTYPE_V2DF_V2DF_PTEST:
33749 return ix86_expand_sse_ptest (d, exp, target);
33750 case FLOAT128_FTYPE_FLOAT128:
33751 case FLOAT_FTYPE_FLOAT:
33752 case INT_FTYPE_INT:
33753 case UINT64_FTYPE_INT:
33754 case UINT16_FTYPE_UINT16:
33755 case INT64_FTYPE_INT64:
33756 case INT64_FTYPE_V4SF:
33757 case INT64_FTYPE_V2DF:
33758 case INT_FTYPE_V16QI:
33759 case INT_FTYPE_V8QI:
33760 case INT_FTYPE_V8SF:
33761 case INT_FTYPE_V4DF:
33762 case INT_FTYPE_V4SF:
33763 case INT_FTYPE_V2DF:
33764 case INT_FTYPE_V32QI:
33765 case V16QI_FTYPE_V16QI:
33766 case V8SI_FTYPE_V8SF:
33767 case V8SI_FTYPE_V4SI:
33768 case V8HI_FTYPE_V8HI:
33769 case V8HI_FTYPE_V16QI:
33770 case V8QI_FTYPE_V8QI:
33771 case V8SF_FTYPE_V8SF:
33772 case V8SF_FTYPE_V8SI:
33773 case V8SF_FTYPE_V4SF:
33774 case V8SF_FTYPE_V8HI:
33775 case V4SI_FTYPE_V4SI:
33776 case V4SI_FTYPE_V16QI:
33777 case V4SI_FTYPE_V4SF:
33778 case V4SI_FTYPE_V8SI:
33779 case V4SI_FTYPE_V8HI:
33780 case V4SI_FTYPE_V4DF:
33781 case V4SI_FTYPE_V2DF:
33782 case V4HI_FTYPE_V4HI:
33783 case V4DF_FTYPE_V4DF:
33784 case V4DF_FTYPE_V4SI:
33785 case V4DF_FTYPE_V4SF:
33786 case V4DF_FTYPE_V2DF:
33787 case V4SF_FTYPE_V4SF:
33788 case V4SF_FTYPE_V4SI:
33789 case V4SF_FTYPE_V8SF:
33790 case V4SF_FTYPE_V4DF:
33791 case V4SF_FTYPE_V8HI:
33792 case V4SF_FTYPE_V2DF:
33793 case V2DI_FTYPE_V2DI:
33794 case V2DI_FTYPE_V16QI:
33795 case V2DI_FTYPE_V8HI:
33796 case V2DI_FTYPE_V4SI:
33797 case V2DF_FTYPE_V2DF:
33798 case V2DF_FTYPE_V4SI:
33799 case V2DF_FTYPE_V4DF:
33800 case V2DF_FTYPE_V4SF:
33801 case V2DF_FTYPE_V2SI:
33802 case V2SI_FTYPE_V2SI:
33803 case V2SI_FTYPE_V4SF:
33804 case V2SI_FTYPE_V2SF:
33805 case V2SI_FTYPE_V2DF:
33806 case V2SF_FTYPE_V2SF:
33807 case V2SF_FTYPE_V2SI:
33808 case V32QI_FTYPE_V32QI:
33809 case V32QI_FTYPE_V16QI:
33810 case V16HI_FTYPE_V16HI:
33811 case V16HI_FTYPE_V8HI:
33812 case V8SI_FTYPE_V8SI:
33813 case V16HI_FTYPE_V16QI:
33814 case V8SI_FTYPE_V16QI:
33815 case V4DI_FTYPE_V16QI:
33816 case V8SI_FTYPE_V8HI:
33817 case V4DI_FTYPE_V8HI:
33818 case V4DI_FTYPE_V4SI:
33819 case V4DI_FTYPE_V2DI:
33820 case HI_FTYPE_HI:
33821 case UINT_FTYPE_V2DF:
33822 case UINT_FTYPE_V4SF:
33823 case UINT64_FTYPE_V2DF:
33824 case UINT64_FTYPE_V4SF:
33825 case V16QI_FTYPE_V8DI:
33826 case V16HI_FTYPE_V16SI:
33827 case V16SI_FTYPE_HI:
33828 case V16SI_FTYPE_V16SI:
33829 case V16SI_FTYPE_INT:
33830 case V16SF_FTYPE_FLOAT:
33831 case V16SF_FTYPE_V8SF:
33832 case V16SI_FTYPE_V8SI:
33833 case V16SF_FTYPE_V4SF:
33834 case V16SI_FTYPE_V4SI:
33835 case V16SF_FTYPE_V16SF:
33836 case V8HI_FTYPE_V8DI:
33837 case V8UHI_FTYPE_V8UHI:
33838 case V8SI_FTYPE_V8DI:
33839 case V8USI_FTYPE_V8USI:
33840 case V8SF_FTYPE_V8DF:
33841 case V8DI_FTYPE_QI:
33842 case V8DI_FTYPE_INT64:
33843 case V8DI_FTYPE_V4DI:
33844 case V8DI_FTYPE_V8DI:
33845 case V8DF_FTYPE_DOUBLE:
33846 case V8DF_FTYPE_V4DF:
33847 case V8DF_FTYPE_V2DF:
33848 case V8DF_FTYPE_V8DF:
33849 case V8DF_FTYPE_V8SI:
33850 nargs = 1;
33851 break;
33852 case V4SF_FTYPE_V4SF_VEC_MERGE:
33853 case V2DF_FTYPE_V2DF_VEC_MERGE:
33854 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
33855 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
33856 case V16QI_FTYPE_V16QI_V16QI:
33857 case V16QI_FTYPE_V8HI_V8HI:
33858 case V16SI_FTYPE_V16SI_V16SI:
33859 case V16SF_FTYPE_V16SF_V16SF:
33860 case V16SF_FTYPE_V16SF_V16SI:
33861 case V8QI_FTYPE_V8QI_V8QI:
33862 case V8QI_FTYPE_V4HI_V4HI:
33863 case V8HI_FTYPE_V8HI_V8HI:
33864 case V8HI_FTYPE_V16QI_V16QI:
33865 case V8HI_FTYPE_V4SI_V4SI:
33866 case V8SF_FTYPE_V8SF_V8SF:
33867 case V8SF_FTYPE_V8SF_V8SI:
33868 case V8DI_FTYPE_V8DI_V8DI:
33869 case V8DF_FTYPE_V8DF_V8DF:
33870 case V8DF_FTYPE_V8DF_V8DI:
33871 case V4SI_FTYPE_V4SI_V4SI:
33872 case V4SI_FTYPE_V8HI_V8HI:
33873 case V4SI_FTYPE_V4SF_V4SF:
33874 case V4SI_FTYPE_V2DF_V2DF:
33875 case V4HI_FTYPE_V4HI_V4HI:
33876 case V4HI_FTYPE_V8QI_V8QI:
33877 case V4HI_FTYPE_V2SI_V2SI:
33878 case V4DF_FTYPE_V4DF_V4DF:
33879 case V4DF_FTYPE_V4DF_V4DI:
33880 case V4SF_FTYPE_V4SF_V4SF:
33881 case V4SF_FTYPE_V4SF_V4SI:
33882 case V4SF_FTYPE_V4SF_V2SI:
33883 case V4SF_FTYPE_V4SF_V2DF:
33884 case V4SF_FTYPE_V4SF_UINT:
33885 case V4SF_FTYPE_V4SF_UINT64:
33886 case V4SF_FTYPE_V4SF_DI:
33887 case V4SF_FTYPE_V4SF_SI:
33888 case V2DI_FTYPE_V2DI_V2DI:
33889 case V2DI_FTYPE_V16QI_V16QI:
33890 case V2DI_FTYPE_V4SI_V4SI:
33891 case V2UDI_FTYPE_V4USI_V4USI:
33892 case V2DI_FTYPE_V2DI_V16QI:
33893 case V2DI_FTYPE_V2DF_V2DF:
33894 case V2SI_FTYPE_V2SI_V2SI:
33895 case V2SI_FTYPE_V4HI_V4HI:
33896 case V2SI_FTYPE_V2SF_V2SF:
33897 case V2DF_FTYPE_V2DF_V2DF:
33898 case V2DF_FTYPE_V2DF_V4SF:
33899 case V2DF_FTYPE_V2DF_V2DI:
33900 case V2DF_FTYPE_V2DF_DI:
33901 case V2DF_FTYPE_V2DF_SI:
33902 case V2DF_FTYPE_V2DF_UINT:
33903 case V2DF_FTYPE_V2DF_UINT64:
33904 case V2SF_FTYPE_V2SF_V2SF:
33905 case V1DI_FTYPE_V1DI_V1DI:
33906 case V1DI_FTYPE_V8QI_V8QI:
33907 case V1DI_FTYPE_V2SI_V2SI:
33908 case V32QI_FTYPE_V16HI_V16HI:
33909 case V16HI_FTYPE_V8SI_V8SI:
33910 case V32QI_FTYPE_V32QI_V32QI:
33911 case V16HI_FTYPE_V32QI_V32QI:
33912 case V16HI_FTYPE_V16HI_V16HI:
33913 case V8SI_FTYPE_V4DF_V4DF:
33914 case V8SI_FTYPE_V8SI_V8SI:
33915 case V8SI_FTYPE_V16HI_V16HI:
33916 case V4DI_FTYPE_V4DI_V4DI:
33917 case V4DI_FTYPE_V8SI_V8SI:
33918 case V4UDI_FTYPE_V8USI_V8USI:
33919 case QI_FTYPE_V8DI_V8DI:
33920 case HI_FTYPE_V16SI_V16SI:
33921 if (comparison == UNKNOWN)
33922 return ix86_expand_binop_builtin (icode, exp, target);
33923 nargs = 2;
33924 break;
33925 case V4SF_FTYPE_V4SF_V4SF_SWAP:
33926 case V2DF_FTYPE_V2DF_V2DF_SWAP:
33927 gcc_assert (comparison != UNKNOWN);
33928 nargs = 2;
33929 swap = true;
33930 break;
33931 case V16HI_FTYPE_V16HI_V8HI_COUNT:
33932 case V16HI_FTYPE_V16HI_SI_COUNT:
33933 case V8SI_FTYPE_V8SI_V4SI_COUNT:
33934 case V8SI_FTYPE_V8SI_SI_COUNT:
33935 case V4DI_FTYPE_V4DI_V2DI_COUNT:
33936 case V4DI_FTYPE_V4DI_INT_COUNT:
33937 case V8HI_FTYPE_V8HI_V8HI_COUNT:
33938 case V8HI_FTYPE_V8HI_SI_COUNT:
33939 case V4SI_FTYPE_V4SI_V4SI_COUNT:
33940 case V4SI_FTYPE_V4SI_SI_COUNT:
33941 case V4HI_FTYPE_V4HI_V4HI_COUNT:
33942 case V4HI_FTYPE_V4HI_SI_COUNT:
33943 case V2DI_FTYPE_V2DI_V2DI_COUNT:
33944 case V2DI_FTYPE_V2DI_SI_COUNT:
33945 case V2SI_FTYPE_V2SI_V2SI_COUNT:
33946 case V2SI_FTYPE_V2SI_SI_COUNT:
33947 case V1DI_FTYPE_V1DI_V1DI_COUNT:
33948 case V1DI_FTYPE_V1DI_SI_COUNT:
33949 nargs = 2;
33950 last_arg_count = true;
33951 break;
33952 case UINT64_FTYPE_UINT64_UINT64:
33953 case UINT_FTYPE_UINT_UINT:
33954 case UINT_FTYPE_UINT_USHORT:
33955 case UINT_FTYPE_UINT_UCHAR:
33956 case UINT16_FTYPE_UINT16_INT:
33957 case UINT8_FTYPE_UINT8_INT:
33958 case HI_FTYPE_HI_HI:
33959 case V16SI_FTYPE_V8DF_V8DF:
33960 nargs = 2;
33961 break;
33962 case V2DI_FTYPE_V2DI_INT_CONVERT:
33963 nargs = 2;
33964 rmode = V1TImode;
33965 nargs_constant = 1;
33966 break;
33967 case V4DI_FTYPE_V4DI_INT_CONVERT:
33968 nargs = 2;
33969 rmode = V2TImode;
33970 nargs_constant = 1;
33971 break;
33972 case V8HI_FTYPE_V8HI_INT:
33973 case V8HI_FTYPE_V8SF_INT:
33974 case V16HI_FTYPE_V16SF_INT:
33975 case V8HI_FTYPE_V4SF_INT:
33976 case V8SF_FTYPE_V8SF_INT:
33977 case V4SF_FTYPE_V16SF_INT:
33978 case V16SF_FTYPE_V16SF_INT:
33979 case V4SI_FTYPE_V4SI_INT:
33980 case V4SI_FTYPE_V8SI_INT:
33981 case V4HI_FTYPE_V4HI_INT:
33982 case V4DF_FTYPE_V4DF_INT:
33983 case V4DF_FTYPE_V8DF_INT:
33984 case V4SF_FTYPE_V4SF_INT:
33985 case V4SF_FTYPE_V8SF_INT:
33986 case V2DI_FTYPE_V2DI_INT:
33987 case V2DF_FTYPE_V2DF_INT:
33988 case V2DF_FTYPE_V4DF_INT:
33989 case V16HI_FTYPE_V16HI_INT:
33990 case V8SI_FTYPE_V8SI_INT:
33991 case V16SI_FTYPE_V16SI_INT:
33992 case V4SI_FTYPE_V16SI_INT:
33993 case V4DI_FTYPE_V4DI_INT:
33994 case V2DI_FTYPE_V4DI_INT:
33995 case V4DI_FTYPE_V8DI_INT:
33996 case HI_FTYPE_HI_INT:
33997 nargs = 2;
33998 nargs_constant = 1;
33999 break;
34000 case V16QI_FTYPE_V16QI_V16QI_V16QI:
34001 case V8SF_FTYPE_V8SF_V8SF_V8SF:
34002 case V4DF_FTYPE_V4DF_V4DF_V4DF:
34003 case V4SF_FTYPE_V4SF_V4SF_V4SF:
34004 case V2DF_FTYPE_V2DF_V2DF_V2DF:
34005 case V32QI_FTYPE_V32QI_V32QI_V32QI:
34006 case HI_FTYPE_V16SI_V16SI_HI:
34007 case QI_FTYPE_V8DI_V8DI_QI:
34008 case V16HI_FTYPE_V16SI_V16HI_HI:
34009 case V16QI_FTYPE_V16SI_V16QI_HI:
34010 case V16QI_FTYPE_V8DI_V16QI_QI:
34011 case V16SF_FTYPE_V16SF_V16SF_HI:
34012 case V16SF_FTYPE_V16SF_V16SF_V16SF:
34013 case V16SF_FTYPE_V16SF_V16SI_V16SF:
34014 case V16SF_FTYPE_V16SI_V16SF_HI:
34015 case V16SF_FTYPE_V16SI_V16SF_V16SF:
34016 case V16SF_FTYPE_V4SF_V16SF_HI:
34017 case V16SI_FTYPE_SI_V16SI_HI:
34018 case V16SI_FTYPE_V16HI_V16SI_HI:
34019 case V16SI_FTYPE_V16QI_V16SI_HI:
34020 case V16SI_FTYPE_V16SF_V16SI_HI:
34021 case V16SI_FTYPE_V16SI_V16SI_HI:
34022 case V16SI_FTYPE_V16SI_V16SI_V16SI:
34023 case V16SI_FTYPE_V4SI_V16SI_HI:
34024 case V2DI_FTYPE_V2DI_V2DI_V2DI:
34025 case V4DI_FTYPE_V4DI_V4DI_V4DI:
34026 case V8DF_FTYPE_V2DF_V8DF_QI:
34027 case V8DF_FTYPE_V4DF_V8DF_QI:
34028 case V8DF_FTYPE_V8DF_V8DF_QI:
34029 case V8DF_FTYPE_V8DF_V8DF_V8DF:
34030 case V8DF_FTYPE_V8DF_V8DI_V8DF:
34031 case V8DF_FTYPE_V8DI_V8DF_V8DF:
34032 case V8DF_FTYPE_V8SF_V8DF_QI:
34033 case V8DF_FTYPE_V8SI_V8DF_QI:
34034 case V8DI_FTYPE_DI_V8DI_QI:
34035 case V8DI_FTYPE_V16QI_V8DI_QI:
34036 case V8DI_FTYPE_V2DI_V8DI_QI:
34037 case V8DI_FTYPE_V4DI_V8DI_QI:
34038 case V8DI_FTYPE_V8DI_V8DI_QI:
34039 case V8DI_FTYPE_V8DI_V8DI_V8DI:
34040 case V8DI_FTYPE_V8HI_V8DI_QI:
34041 case V8DI_FTYPE_V8SI_V8DI_QI:
34042 case V8HI_FTYPE_V8DI_V8HI_QI:
34043 case V8SF_FTYPE_V8DF_V8SF_QI:
34044 case V8SI_FTYPE_V8DF_V8SI_QI:
34045 case V8SI_FTYPE_V8DI_V8SI_QI:
34046 case V4SI_FTYPE_V4SI_V4SI_V4SI:
34047 nargs = 3;
34048 break;
34049 case V32QI_FTYPE_V32QI_V32QI_INT:
34050 case V16HI_FTYPE_V16HI_V16HI_INT:
34051 case V16QI_FTYPE_V16QI_V16QI_INT:
34052 case V4DI_FTYPE_V4DI_V4DI_INT:
34053 case V8HI_FTYPE_V8HI_V8HI_INT:
34054 case V8SI_FTYPE_V8SI_V8SI_INT:
34055 case V8SI_FTYPE_V8SI_V4SI_INT:
34056 case V8SF_FTYPE_V8SF_V8SF_INT:
34057 case V8SF_FTYPE_V8SF_V4SF_INT:
34058 case V4SI_FTYPE_V4SI_V4SI_INT:
34059 case V4DF_FTYPE_V4DF_V4DF_INT:
34060 case V16SF_FTYPE_V16SF_V16SF_INT:
34061 case V16SF_FTYPE_V16SF_V4SF_INT:
34062 case V16SI_FTYPE_V16SI_V4SI_INT:
34063 case V4DF_FTYPE_V4DF_V2DF_INT:
34064 case V4SF_FTYPE_V4SF_V4SF_INT:
34065 case V2DI_FTYPE_V2DI_V2DI_INT:
34066 case V4DI_FTYPE_V4DI_V2DI_INT:
34067 case V2DF_FTYPE_V2DF_V2DF_INT:
34068 case QI_FTYPE_V8DI_V8DI_INT:
34069 case QI_FTYPE_V8DF_V8DF_INT:
34070 case QI_FTYPE_V2DF_V2DF_INT:
34071 case QI_FTYPE_V4SF_V4SF_INT:
34072 case HI_FTYPE_V16SI_V16SI_INT:
34073 case HI_FTYPE_V16SF_V16SF_INT:
34074 nargs = 3;
34075 nargs_constant = 1;
34076 break;
34077 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
34078 nargs = 3;
34079 rmode = V4DImode;
34080 nargs_constant = 1;
34081 break;
34082 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
34083 nargs = 3;
34084 rmode = V2DImode;
34085 nargs_constant = 1;
34086 break;
34087 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
34088 nargs = 3;
34089 rmode = DImode;
34090 nargs_constant = 1;
34091 break;
34092 case V2DI_FTYPE_V2DI_UINT_UINT:
34093 nargs = 3;
34094 nargs_constant = 2;
34095 break;
34096 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
34097 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
34098 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
34099 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
34100 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
34101 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
34102 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
34103 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
34104 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
34105 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
34106 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
34107 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
34108 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
34109 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
34110 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
34111 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
34112 nargs = 4;
34113 break;
34114 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
34115 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
34116 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
34117 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
34118 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
34119 nargs = 4;
34120 nargs_constant = 1;
34121 break;
34122 case QI_FTYPE_V2DF_V2DF_INT_QI:
34123 case QI_FTYPE_V4SF_V4SF_INT_QI:
34124 nargs = 4;
34125 mask_pos = 1;
34126 nargs_constant = 1;
34127 break;
34128 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
34129 nargs = 4;
34130 nargs_constant = 2;
34131 break;
34132 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
34133 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
34134 nargs = 4;
34135 break;
34136 case QI_FTYPE_V8DI_V8DI_INT_QI:
34137 case HI_FTYPE_V16SI_V16SI_INT_HI:
34138 case QI_FTYPE_V8DF_V8DF_INT_QI:
34139 case HI_FTYPE_V16SF_V16SF_INT_HI:
34140 mask_pos = 1;
34141 nargs = 4;
34142 nargs_constant = 1;
34143 break;
34144 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
34145 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
34146 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
34147 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
34148 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
34149 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
34150 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
34151 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
34152 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
34153 nargs = 4;
34154 mask_pos = 2;
34155 nargs_constant = 1;
34156 break;
34157 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
34158 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
34159 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
34160 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
34161 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
34162 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
34163 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
34164 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
34165 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
34166 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
34167 nargs = 5;
34168 mask_pos = 2;
34169 nargs_constant = 1;
34170 break;
34171 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
34172 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
34173 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
34174 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
34175 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
34176 nargs = 5;
34177 mask_pos = 1;
34178 nargs_constant = 1;
34179 break;
34180
34181 default:
34182 gcc_unreachable ();
34183 }
34184
34185 gcc_assert (nargs <= ARRAY_SIZE (args));
34186
34187 if (comparison != UNKNOWN)
34188 {
34189 gcc_assert (nargs == 2);
34190 return ix86_expand_sse_compare (d, exp, target, swap);
34191 }
34192
34193 if (rmode == VOIDmode || rmode == tmode)
34194 {
34195 if (optimize
34196 || target == 0
34197 || GET_MODE (target) != tmode
34198 || !insn_p->operand[0].predicate (target, tmode))
34199 target = gen_reg_rtx (tmode);
34200 real_target = target;
34201 }
34202 else
34203 {
34204 real_target = gen_reg_rtx (tmode);
34205 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
34206 }
34207
34208 for (i = 0; i < nargs; i++)
34209 {
34210 tree arg = CALL_EXPR_ARG (exp, i);
34211 rtx op = expand_normal (arg);
34212 enum machine_mode mode = insn_p->operand[i + 1].mode;
34213 bool match = insn_p->operand[i + 1].predicate (op, mode);
34214
34215 if (last_arg_count && (i + 1) == nargs)
34216 {
34217 /* SIMD shift insns take either an 8-bit immediate or
34218 register as count. But builtin functions take int as
34219 count. If count doesn't match, we put it in register. */
34220 if (!match)
34221 {
34222 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
34223 if (!insn_p->operand[i + 1].predicate (op, mode))
34224 op = copy_to_reg (op);
34225 }
34226 }
34227 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34228 (!mask_pos && (nargs - i) <= nargs_constant))
34229 {
34230 if (!match)
34231 switch (icode)
34232 {
34233 case CODE_FOR_avx_vinsertf128v4di:
34234 case CODE_FOR_avx_vextractf128v4di:
34235 error ("the last argument must be an 1-bit immediate");
34236 return const0_rtx;
34237
34238 case CODE_FOR_avx512f_cmpv8di3_mask:
34239 case CODE_FOR_avx512f_cmpv16si3_mask:
34240 case CODE_FOR_avx512f_ucmpv8di3_mask:
34241 case CODE_FOR_avx512f_ucmpv16si3_mask:
34242 case CODE_FOR_avx512vl_cmpv4di3_mask:
34243 case CODE_FOR_avx512vl_cmpv8si3_mask:
34244 case CODE_FOR_avx512vl_ucmpv4di3_mask:
34245 case CODE_FOR_avx512vl_ucmpv8si3_mask:
34246 case CODE_FOR_avx512vl_cmpv2di3_mask:
34247 case CODE_FOR_avx512vl_cmpv4si3_mask:
34248 case CODE_FOR_avx512vl_ucmpv2di3_mask:
34249 case CODE_FOR_avx512vl_ucmpv4si3_mask:
34250 error ("the last argument must be a 3-bit immediate");
34251 return const0_rtx;
34252
34253 case CODE_FOR_sse4_1_roundsd:
34254 case CODE_FOR_sse4_1_roundss:
34255
34256 case CODE_FOR_sse4_1_roundpd:
34257 case CODE_FOR_sse4_1_roundps:
34258 case CODE_FOR_avx_roundpd256:
34259 case CODE_FOR_avx_roundps256:
34260
34261 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
34262 case CODE_FOR_sse4_1_roundps_sfix:
34263 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
34264 case CODE_FOR_avx_roundps_sfix256:
34265
34266 case CODE_FOR_sse4_1_blendps:
34267 case CODE_FOR_avx_blendpd256:
34268 case CODE_FOR_avx_vpermilv4df:
34269 case CODE_FOR_avx512f_getmantv8df_mask:
34270 case CODE_FOR_avx512f_getmantv16sf_mask:
34271 case CODE_FOR_avx512vl_getmantv8sf_mask:
34272 case CODE_FOR_avx512vl_getmantv4df_mask:
34273 case CODE_FOR_avx512vl_getmantv4sf_mask:
34274 case CODE_FOR_avx512vl_getmantv2df_mask:
34275 case CODE_FOR_avx512dq_rangepv8df_mask_round:
34276 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
34277 case CODE_FOR_avx512dq_rangepv4df_mask:
34278 case CODE_FOR_avx512dq_rangepv8sf_mask:
34279 case CODE_FOR_avx512dq_rangepv2df_mask:
34280 case CODE_FOR_avx512dq_rangepv4sf_mask:
34281 error ("the last argument must be a 4-bit immediate");
34282 return const0_rtx;
34283
34284 case CODE_FOR_sha1rnds4:
34285 case CODE_FOR_sse4_1_blendpd:
34286 case CODE_FOR_avx_vpermilv2df:
34287 case CODE_FOR_xop_vpermil2v2df3:
34288 case CODE_FOR_xop_vpermil2v4sf3:
34289 case CODE_FOR_xop_vpermil2v4df3:
34290 case CODE_FOR_xop_vpermil2v8sf3:
34291 case CODE_FOR_avx512f_vinsertf32x4_mask:
34292 case CODE_FOR_avx512f_vinserti32x4_mask:
34293 case CODE_FOR_avx512f_vextractf32x4_mask:
34294 case CODE_FOR_avx512f_vextracti32x4_mask:
34295 case CODE_FOR_sse2_shufpd:
34296 case CODE_FOR_sse2_shufpd_mask:
34297 case CODE_FOR_avx512dq_shuf_f64x2_mask:
34298 case CODE_FOR_avx512dq_shuf_i64x2_mask:
34299 case CODE_FOR_avx512vl_shuf_i32x4_mask:
34300 case CODE_FOR_avx512vl_shuf_f32x4_mask:
34301 error ("the last argument must be a 2-bit immediate");
34302 return const0_rtx;
34303
34304 case CODE_FOR_avx_vextractf128v4df:
34305 case CODE_FOR_avx_vextractf128v8sf:
34306 case CODE_FOR_avx_vextractf128v8si:
34307 case CODE_FOR_avx_vinsertf128v4df:
34308 case CODE_FOR_avx_vinsertf128v8sf:
34309 case CODE_FOR_avx_vinsertf128v8si:
34310 case CODE_FOR_avx512f_vinsertf64x4_mask:
34311 case CODE_FOR_avx512f_vinserti64x4_mask:
34312 case CODE_FOR_avx512f_vextractf64x4_mask:
34313 case CODE_FOR_avx512f_vextracti64x4_mask:
34314 case CODE_FOR_avx512dq_vinsertf32x8_mask:
34315 case CODE_FOR_avx512dq_vinserti32x8_mask:
34316 case CODE_FOR_avx512vl_vinsertv4df:
34317 case CODE_FOR_avx512vl_vinsertv4di:
34318 case CODE_FOR_avx512vl_vinsertv8sf:
34319 case CODE_FOR_avx512vl_vinsertv8si:
34320 error ("the last argument must be a 1-bit immediate");
34321 return const0_rtx;
34322
34323 case CODE_FOR_avx_vmcmpv2df3:
34324 case CODE_FOR_avx_vmcmpv4sf3:
34325 case CODE_FOR_avx_cmpv2df3:
34326 case CODE_FOR_avx_cmpv4sf3:
34327 case CODE_FOR_avx_cmpv4df3:
34328 case CODE_FOR_avx_cmpv8sf3:
34329 case CODE_FOR_avx512f_cmpv8df3_mask:
34330 case CODE_FOR_avx512f_cmpv16sf3_mask:
34331 case CODE_FOR_avx512f_vmcmpv2df3_mask:
34332 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
34333 error ("the last argument must be a 5-bit immediate");
34334 return const0_rtx;
34335
34336 default:
34337 switch (nargs_constant)
34338 {
34339 case 2:
34340 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34341 (!mask_pos && (nargs - i) == nargs_constant))
34342 {
34343 error ("the next to last argument must be an 8-bit immediate");
34344 break;
34345 }
34346 case 1:
34347 error ("the last argument must be an 8-bit immediate");
34348 break;
34349 default:
34350 gcc_unreachable ();
34351 }
34352 return const0_rtx;
34353 }
34354 }
34355 else
34356 {
34357 if (VECTOR_MODE_P (mode))
34358 op = safe_vector_operand (op, mode);
34359
34360 /* If we aren't optimizing, only allow one memory operand to
34361 be generated. */
34362 if (memory_operand (op, mode))
34363 num_memory++;
34364
34365 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34366 {
34367 if (optimize || !match || num_memory > 1)
34368 op = copy_to_mode_reg (mode, op);
34369 }
34370 else
34371 {
34372 op = copy_to_reg (op);
34373 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34374 }
34375 }
34376
34377 args[i].op = op;
34378 args[i].mode = mode;
34379 }
34380
34381 switch (nargs)
34382 {
34383 case 1:
34384 pat = GEN_FCN (icode) (real_target, args[0].op);
34385 break;
34386 case 2:
34387 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
34388 break;
34389 case 3:
34390 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34391 args[2].op);
34392 break;
34393 case 4:
34394 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34395 args[2].op, args[3].op);
34396 break;
34397 case 5:
34398 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34399 args[2].op, args[3].op, args[4].op);
34400 case 6:
34401 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34402 args[2].op, args[3].op, args[4].op,
34403 args[5].op);
34404 break;
34405 default:
34406 gcc_unreachable ();
34407 }
34408
34409 if (! pat)
34410 return 0;
34411
34412 emit_insn (pat);
34413 return target;
34414 }
34415
34416 /* Transform pattern of following layout:
34417 (parallel [
34418 set (A B)
34419 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
34420 ])
34421 into:
34422 (set (A B))
34423
34424 Or:
34425 (parallel [ A B
34426 ...
34427 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
34428 ...
34429 ])
34430 into:
34431 (parallel [ A B ... ]) */
34432
34433 static rtx
34434 ix86_erase_embedded_rounding (rtx pat)
34435 {
34436 if (GET_CODE (pat) == INSN)
34437 pat = PATTERN (pat);
34438
34439 gcc_assert (GET_CODE (pat) == PARALLEL);
34440
34441 if (XVECLEN (pat, 0) == 2)
34442 {
34443 rtx p0 = XVECEXP (pat, 0, 0);
34444 rtx p1 = XVECEXP (pat, 0, 1);
34445
34446 gcc_assert (GET_CODE (p0) == SET
34447 && GET_CODE (p1) == UNSPEC
34448 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
34449
34450 return p0;
34451 }
34452 else
34453 {
34454 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
34455 int i = 0;
34456 int j = 0;
34457
34458 for (; i < XVECLEN (pat, 0); ++i)
34459 {
34460 rtx elem = XVECEXP (pat, 0, i);
34461 if (GET_CODE (elem) != UNSPEC
34462 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
34463 res [j++] = elem;
34464 }
34465
34466 /* No more than 1 occurence was removed. */
34467 gcc_assert (j >= XVECLEN (pat, 0) - 1);
34468
34469 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
34470 }
34471 }
34472
34473 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
34474 with rounding. */
34475 static rtx
34476 ix86_expand_sse_comi_round (const struct builtin_description *d,
34477 tree exp, rtx target)
34478 {
34479 rtx pat, set_dst;
34480 tree arg0 = CALL_EXPR_ARG (exp, 0);
34481 tree arg1 = CALL_EXPR_ARG (exp, 1);
34482 tree arg2 = CALL_EXPR_ARG (exp, 2);
34483 tree arg3 = CALL_EXPR_ARG (exp, 3);
34484 rtx op0 = expand_normal (arg0);
34485 rtx op1 = expand_normal (arg1);
34486 rtx op2 = expand_normal (arg2);
34487 rtx op3 = expand_normal (arg3);
34488 enum insn_code icode = d->icode;
34489 const struct insn_data_d *insn_p = &insn_data[icode];
34490 enum machine_mode mode0 = insn_p->operand[0].mode;
34491 enum machine_mode mode1 = insn_p->operand[1].mode;
34492 enum rtx_code comparison = UNEQ;
34493 bool need_ucomi = false;
34494
34495 /* See avxintrin.h for values. */
34496 enum rtx_code comi_comparisons[32] =
34497 {
34498 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
34499 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
34500 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
34501 };
34502 bool need_ucomi_values[32] =
34503 {
34504 true, false, false, true, true, false, false, true,
34505 true, false, false, true, true, false, false, true,
34506 false, true, true, false, false, true, true, false,
34507 false, true, true, false, false, true, true, false
34508 };
34509
34510 if (!CONST_INT_P (op2))
34511 {
34512 error ("the third argument must be comparison constant");
34513 return const0_rtx;
34514 }
34515 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
34516 {
34517 error ("incorect comparison mode");
34518 return const0_rtx;
34519 }
34520
34521 if (!insn_p->operand[2].predicate (op3, SImode))
34522 {
34523 error ("incorrect rounding operand");
34524 return const0_rtx;
34525 }
34526
34527 comparison = comi_comparisons[INTVAL (op2)];
34528 need_ucomi = need_ucomi_values[INTVAL (op2)];
34529
34530 if (VECTOR_MODE_P (mode0))
34531 op0 = safe_vector_operand (op0, mode0);
34532 if (VECTOR_MODE_P (mode1))
34533 op1 = safe_vector_operand (op1, mode1);
34534
34535 target = gen_reg_rtx (SImode);
34536 emit_move_insn (target, const0_rtx);
34537 target = gen_rtx_SUBREG (QImode, target, 0);
34538
34539 if ((optimize && !register_operand (op0, mode0))
34540 || !insn_p->operand[0].predicate (op0, mode0))
34541 op0 = copy_to_mode_reg (mode0, op0);
34542 if ((optimize && !register_operand (op1, mode1))
34543 || !insn_p->operand[1].predicate (op1, mode1))
34544 op1 = copy_to_mode_reg (mode1, op1);
34545
34546 if (need_ucomi)
34547 icode = icode == CODE_FOR_sse_comi_round
34548 ? CODE_FOR_sse_ucomi_round
34549 : CODE_FOR_sse2_ucomi_round;
34550
34551 pat = GEN_FCN (icode) (op0, op1, op3);
34552 if (! pat)
34553 return 0;
34554
34555 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
34556 if (INTVAL (op3) == NO_ROUND)
34557 {
34558 pat = ix86_erase_embedded_rounding (pat);
34559 if (! pat)
34560 return 0;
34561
34562 set_dst = SET_DEST (pat);
34563 }
34564 else
34565 {
34566 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
34567 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
34568 }
34569
34570 emit_insn (pat);
34571 emit_insn (gen_rtx_SET (VOIDmode,
34572 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34573 gen_rtx_fmt_ee (comparison, QImode,
34574 set_dst,
34575 const0_rtx)));
34576
34577 return SUBREG_REG (target);
34578 }
34579
34580 static rtx
34581 ix86_expand_round_builtin (const struct builtin_description *d,
34582 tree exp, rtx target)
34583 {
34584 rtx pat;
34585 unsigned int i, nargs;
34586 struct
34587 {
34588 rtx op;
34589 enum machine_mode mode;
34590 } args[6];
34591 enum insn_code icode = d->icode;
34592 const struct insn_data_d *insn_p = &insn_data[icode];
34593 enum machine_mode tmode = insn_p->operand[0].mode;
34594 unsigned int nargs_constant = 0;
34595 unsigned int redundant_embed_rnd = 0;
34596
34597 switch ((enum ix86_builtin_func_type) d->flag)
34598 {
34599 case UINT64_FTYPE_V2DF_INT:
34600 case UINT64_FTYPE_V4SF_INT:
34601 case UINT_FTYPE_V2DF_INT:
34602 case UINT_FTYPE_V4SF_INT:
34603 case INT64_FTYPE_V2DF_INT:
34604 case INT64_FTYPE_V4SF_INT:
34605 case INT_FTYPE_V2DF_INT:
34606 case INT_FTYPE_V4SF_INT:
34607 nargs = 2;
34608 break;
34609 case V4SF_FTYPE_V4SF_UINT_INT:
34610 case V4SF_FTYPE_V4SF_UINT64_INT:
34611 case V2DF_FTYPE_V2DF_UINT64_INT:
34612 case V4SF_FTYPE_V4SF_INT_INT:
34613 case V4SF_FTYPE_V4SF_INT64_INT:
34614 case V2DF_FTYPE_V2DF_INT64_INT:
34615 case V4SF_FTYPE_V4SF_V4SF_INT:
34616 case V2DF_FTYPE_V2DF_V2DF_INT:
34617 case V4SF_FTYPE_V4SF_V2DF_INT:
34618 case V2DF_FTYPE_V2DF_V4SF_INT:
34619 nargs = 3;
34620 break;
34621 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
34622 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
34623 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
34624 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
34625 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
34626 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
34627 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
34628 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
34629 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
34630 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
34631 nargs = 4;
34632 break;
34633 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
34634 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
34635 nargs_constant = 2;
34636 nargs = 4;
34637 break;
34638 case INT_FTYPE_V4SF_V4SF_INT_INT:
34639 case INT_FTYPE_V2DF_V2DF_INT_INT:
34640 return ix86_expand_sse_comi_round (d, exp, target);
34641 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
34642 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
34643 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
34644 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
34645 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
34646 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
34647 nargs = 5;
34648 break;
34649 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
34650 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
34651 nargs_constant = 4;
34652 nargs = 5;
34653 break;
34654 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
34655 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
34656 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
34657 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
34658 nargs_constant = 3;
34659 nargs = 5;
34660 break;
34661 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
34662 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
34663 nargs = 6;
34664 nargs_constant = 4;
34665 break;
34666 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
34667 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
34668 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
34669 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
34670 nargs = 6;
34671 nargs_constant = 3;
34672 break;
34673 default:
34674 gcc_unreachable ();
34675 }
34676 gcc_assert (nargs <= ARRAY_SIZE (args));
34677
34678 if (optimize
34679 || target == 0
34680 || GET_MODE (target) != tmode
34681 || !insn_p->operand[0].predicate (target, tmode))
34682 target = gen_reg_rtx (tmode);
34683
34684 for (i = 0; i < nargs; i++)
34685 {
34686 tree arg = CALL_EXPR_ARG (exp, i);
34687 rtx op = expand_normal (arg);
34688 enum machine_mode mode = insn_p->operand[i + 1].mode;
34689 bool match = insn_p->operand[i + 1].predicate (op, mode);
34690
34691 if (i == nargs - nargs_constant)
34692 {
34693 if (!match)
34694 {
34695 switch (icode)
34696 {
34697 case CODE_FOR_avx512f_getmantv8df_mask_round:
34698 case CODE_FOR_avx512f_getmantv16sf_mask_round:
34699 case CODE_FOR_avx512f_vgetmantv2df_round:
34700 case CODE_FOR_avx512f_vgetmantv4sf_round:
34701 error ("the immediate argument must be a 4-bit immediate");
34702 return const0_rtx;
34703 case CODE_FOR_avx512f_cmpv8df3_mask_round:
34704 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
34705 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
34706 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
34707 error ("the immediate argument must be a 5-bit immediate");
34708 return const0_rtx;
34709 default:
34710 error ("the immediate argument must be an 8-bit immediate");
34711 return const0_rtx;
34712 }
34713 }
34714 }
34715 else if (i == nargs-1)
34716 {
34717 if (!insn_p->operand[nargs].predicate (op, SImode))
34718 {
34719 error ("incorrect rounding operand");
34720 return const0_rtx;
34721 }
34722
34723 /* If there is no rounding use normal version of the pattern. */
34724 if (INTVAL (op) == NO_ROUND)
34725 redundant_embed_rnd = 1;
34726 }
34727 else
34728 {
34729 if (VECTOR_MODE_P (mode))
34730 op = safe_vector_operand (op, mode);
34731
34732 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34733 {
34734 if (optimize || !match)
34735 op = copy_to_mode_reg (mode, op);
34736 }
34737 else
34738 {
34739 op = copy_to_reg (op);
34740 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34741 }
34742 }
34743
34744 args[i].op = op;
34745 args[i].mode = mode;
34746 }
34747
34748 switch (nargs)
34749 {
34750 case 1:
34751 pat = GEN_FCN (icode) (target, args[0].op);
34752 break;
34753 case 2:
34754 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34755 break;
34756 case 3:
34757 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34758 args[2].op);
34759 break;
34760 case 4:
34761 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34762 args[2].op, args[3].op);
34763 break;
34764 case 5:
34765 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34766 args[2].op, args[3].op, args[4].op);
34767 case 6:
34768 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34769 args[2].op, args[3].op, args[4].op,
34770 args[5].op);
34771 break;
34772 default:
34773 gcc_unreachable ();
34774 }
34775
34776 if (!pat)
34777 return 0;
34778
34779 if (redundant_embed_rnd)
34780 pat = ix86_erase_embedded_rounding (pat);
34781
34782 emit_insn (pat);
34783 return target;
34784 }
34785
34786 /* Subroutine of ix86_expand_builtin to take care of special insns
34787 with variable number of operands. */
34788
34789 static rtx
34790 ix86_expand_special_args_builtin (const struct builtin_description *d,
34791 tree exp, rtx target)
34792 {
34793 tree arg;
34794 rtx pat, op;
34795 unsigned int i, nargs, arg_adjust, memory;
34796 bool aligned_mem = false;
34797 struct
34798 {
34799 rtx op;
34800 enum machine_mode mode;
34801 } args[3];
34802 enum insn_code icode = d->icode;
34803 bool last_arg_constant = false;
34804 const struct insn_data_d *insn_p = &insn_data[icode];
34805 enum machine_mode tmode = insn_p->operand[0].mode;
34806 enum { load, store } klass;
34807
34808 switch ((enum ix86_builtin_func_type) d->flag)
34809 {
34810 case VOID_FTYPE_VOID:
34811 emit_insn (GEN_FCN (icode) (target));
34812 return 0;
34813 case VOID_FTYPE_UINT64:
34814 case VOID_FTYPE_UNSIGNED:
34815 nargs = 0;
34816 klass = store;
34817 memory = 0;
34818 break;
34819
34820 case INT_FTYPE_VOID:
34821 case USHORT_FTYPE_VOID:
34822 case UINT64_FTYPE_VOID:
34823 case UNSIGNED_FTYPE_VOID:
34824 nargs = 0;
34825 klass = load;
34826 memory = 0;
34827 break;
34828 case UINT64_FTYPE_PUNSIGNED:
34829 case V2DI_FTYPE_PV2DI:
34830 case V4DI_FTYPE_PV4DI:
34831 case V32QI_FTYPE_PCCHAR:
34832 case V16QI_FTYPE_PCCHAR:
34833 case V8SF_FTYPE_PCV4SF:
34834 case V8SF_FTYPE_PCFLOAT:
34835 case V4SF_FTYPE_PCFLOAT:
34836 case V4DF_FTYPE_PCV2DF:
34837 case V4DF_FTYPE_PCDOUBLE:
34838 case V2DF_FTYPE_PCDOUBLE:
34839 case VOID_FTYPE_PVOID:
34840 case V16SI_FTYPE_PV4SI:
34841 case V16SF_FTYPE_PV4SF:
34842 case V8DI_FTYPE_PV4DI:
34843 case V8DI_FTYPE_PV8DI:
34844 case V8DF_FTYPE_PV4DF:
34845 nargs = 1;
34846 klass = load;
34847 memory = 0;
34848 switch (icode)
34849 {
34850 case CODE_FOR_sse4_1_movntdqa:
34851 case CODE_FOR_avx2_movntdqa:
34852 case CODE_FOR_avx512f_movntdqa:
34853 aligned_mem = true;
34854 break;
34855 default:
34856 break;
34857 }
34858 break;
34859 case VOID_FTYPE_PV2SF_V4SF:
34860 case VOID_FTYPE_PV8DI_V8DI:
34861 case VOID_FTYPE_PV4DI_V4DI:
34862 case VOID_FTYPE_PV2DI_V2DI:
34863 case VOID_FTYPE_PCHAR_V32QI:
34864 case VOID_FTYPE_PCHAR_V16QI:
34865 case VOID_FTYPE_PFLOAT_V16SF:
34866 case VOID_FTYPE_PFLOAT_V8SF:
34867 case VOID_FTYPE_PFLOAT_V4SF:
34868 case VOID_FTYPE_PDOUBLE_V8DF:
34869 case VOID_FTYPE_PDOUBLE_V4DF:
34870 case VOID_FTYPE_PDOUBLE_V2DF:
34871 case VOID_FTYPE_PLONGLONG_LONGLONG:
34872 case VOID_FTYPE_PULONGLONG_ULONGLONG:
34873 case VOID_FTYPE_PINT_INT:
34874 nargs = 1;
34875 klass = store;
34876 /* Reserve memory operand for target. */
34877 memory = ARRAY_SIZE (args);
34878 switch (icode)
34879 {
34880 /* These builtins and instructions require the memory
34881 to be properly aligned. */
34882 case CODE_FOR_avx_movntv4di:
34883 case CODE_FOR_sse2_movntv2di:
34884 case CODE_FOR_avx_movntv8sf:
34885 case CODE_FOR_sse_movntv4sf:
34886 case CODE_FOR_sse4a_vmmovntv4sf:
34887 case CODE_FOR_avx_movntv4df:
34888 case CODE_FOR_sse2_movntv2df:
34889 case CODE_FOR_sse4a_vmmovntv2df:
34890 case CODE_FOR_sse2_movntidi:
34891 case CODE_FOR_sse_movntq:
34892 case CODE_FOR_sse2_movntisi:
34893 case CODE_FOR_avx512f_movntv16sf:
34894 case CODE_FOR_avx512f_movntv8df:
34895 case CODE_FOR_avx512f_movntv8di:
34896 aligned_mem = true;
34897 break;
34898 default:
34899 break;
34900 }
34901 break;
34902 case V4SF_FTYPE_V4SF_PCV2SF:
34903 case V2DF_FTYPE_V2DF_PCDOUBLE:
34904 nargs = 2;
34905 klass = load;
34906 memory = 1;
34907 break;
34908 case V8SF_FTYPE_PCV8SF_V8SI:
34909 case V4DF_FTYPE_PCV4DF_V4DI:
34910 case V4SF_FTYPE_PCV4SF_V4SI:
34911 case V2DF_FTYPE_PCV2DF_V2DI:
34912 case V8SI_FTYPE_PCV8SI_V8SI:
34913 case V4DI_FTYPE_PCV4DI_V4DI:
34914 case V4SI_FTYPE_PCV4SI_V4SI:
34915 case V2DI_FTYPE_PCV2DI_V2DI:
34916 nargs = 2;
34917 klass = load;
34918 memory = 0;
34919 break;
34920 case VOID_FTYPE_PV8DF_V8DF_QI:
34921 case VOID_FTYPE_PV16SF_V16SF_HI:
34922 case VOID_FTYPE_PV8DI_V8DI_QI:
34923 case VOID_FTYPE_PV16SI_V16SI_HI:
34924 switch (icode)
34925 {
34926 /* These builtins and instructions require the memory
34927 to be properly aligned. */
34928 case CODE_FOR_avx512f_storev16sf_mask:
34929 case CODE_FOR_avx512f_storev16si_mask:
34930 case CODE_FOR_avx512f_storev8df_mask:
34931 case CODE_FOR_avx512f_storev8di_mask:
34932 case CODE_FOR_avx512vl_storev8sf_mask:
34933 case CODE_FOR_avx512vl_storev8si_mask:
34934 case CODE_FOR_avx512vl_storev4df_mask:
34935 case CODE_FOR_avx512vl_storev4di_mask:
34936 case CODE_FOR_avx512vl_storev4sf_mask:
34937 case CODE_FOR_avx512vl_storev4si_mask:
34938 case CODE_FOR_avx512vl_storev2df_mask:
34939 case CODE_FOR_avx512vl_storev2di_mask:
34940 aligned_mem = true;
34941 break;
34942 default:
34943 break;
34944 }
34945 /* FALLTHRU */
34946 case VOID_FTYPE_PV8SF_V8SI_V8SF:
34947 case VOID_FTYPE_PV4DF_V4DI_V4DF:
34948 case VOID_FTYPE_PV4SF_V4SI_V4SF:
34949 case VOID_FTYPE_PV2DF_V2DI_V2DF:
34950 case VOID_FTYPE_PV8SI_V8SI_V8SI:
34951 case VOID_FTYPE_PV4DI_V4DI_V4DI:
34952 case VOID_FTYPE_PV4SI_V4SI_V4SI:
34953 case VOID_FTYPE_PV2DI_V2DI_V2DI:
34954 case VOID_FTYPE_PDOUBLE_V2DF_QI:
34955 case VOID_FTYPE_PFLOAT_V4SF_QI:
34956 case VOID_FTYPE_PV8SI_V8DI_QI:
34957 case VOID_FTYPE_PV8HI_V8DI_QI:
34958 case VOID_FTYPE_PV16HI_V16SI_HI:
34959 case VOID_FTYPE_PV16QI_V8DI_QI:
34960 case VOID_FTYPE_PV16QI_V16SI_HI:
34961 nargs = 2;
34962 klass = store;
34963 /* Reserve memory operand for target. */
34964 memory = ARRAY_SIZE (args);
34965 break;
34966 case V16SF_FTYPE_PCV16SF_V16SF_HI:
34967 case V16SI_FTYPE_PCV16SI_V16SI_HI:
34968 case V8DF_FTYPE_PCV8DF_V8DF_QI:
34969 case V8DI_FTYPE_PCV8DI_V8DI_QI:
34970 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
34971 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
34972 nargs = 3;
34973 klass = load;
34974 memory = 0;
34975 switch (icode)
34976 {
34977 /* These builtins and instructions require the memory
34978 to be properly aligned. */
34979 case CODE_FOR_avx512f_loadv16sf_mask:
34980 case CODE_FOR_avx512f_loadv16si_mask:
34981 case CODE_FOR_avx512f_loadv8df_mask:
34982 case CODE_FOR_avx512f_loadv8di_mask:
34983 case CODE_FOR_avx512vl_loadv8sf_mask:
34984 case CODE_FOR_avx512vl_loadv8si_mask:
34985 case CODE_FOR_avx512vl_loadv4df_mask:
34986 case CODE_FOR_avx512vl_loadv4di_mask:
34987 case CODE_FOR_avx512vl_loadv4sf_mask:
34988 case CODE_FOR_avx512vl_loadv4si_mask:
34989 case CODE_FOR_avx512vl_loadv2df_mask:
34990 case CODE_FOR_avx512vl_loadv2di_mask:
34991 case CODE_FOR_avx512bw_loadv64qi_mask:
34992 case CODE_FOR_avx512vl_loadv32qi_mask:
34993 case CODE_FOR_avx512vl_loadv16qi_mask:
34994 case CODE_FOR_avx512bw_loadv32hi_mask:
34995 case CODE_FOR_avx512vl_loadv16hi_mask:
34996 case CODE_FOR_avx512vl_loadv8hi_mask:
34997 aligned_mem = true;
34998 break;
34999 default:
35000 break;
35001 }
35002 break;
35003 case VOID_FTYPE_UINT_UINT_UINT:
35004 case VOID_FTYPE_UINT64_UINT_UINT:
35005 case UCHAR_FTYPE_UINT_UINT_UINT:
35006 case UCHAR_FTYPE_UINT64_UINT_UINT:
35007 nargs = 3;
35008 klass = load;
35009 memory = ARRAY_SIZE (args);
35010 last_arg_constant = true;
35011 break;
35012 default:
35013 gcc_unreachable ();
35014 }
35015
35016 gcc_assert (nargs <= ARRAY_SIZE (args));
35017
35018 if (klass == store)
35019 {
35020 arg = CALL_EXPR_ARG (exp, 0);
35021 op = expand_normal (arg);
35022 gcc_assert (target == 0);
35023 if (memory)
35024 {
35025 op = ix86_zero_extend_to_Pmode (op);
35026 target = gen_rtx_MEM (tmode, op);
35027 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
35028 on it. Try to improve it using get_pointer_alignment,
35029 and if the special builtin is one that requires strict
35030 mode alignment, also from it's GET_MODE_ALIGNMENT.
35031 Failure to do so could lead to ix86_legitimate_combined_insn
35032 rejecting all changes to such insns. */
35033 unsigned int align = get_pointer_alignment (arg);
35034 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
35035 align = GET_MODE_ALIGNMENT (tmode);
35036 if (MEM_ALIGN (target) < align)
35037 set_mem_align (target, align);
35038 }
35039 else
35040 target = force_reg (tmode, op);
35041 arg_adjust = 1;
35042 }
35043 else
35044 {
35045 arg_adjust = 0;
35046 if (optimize
35047 || target == 0
35048 || !register_operand (target, tmode)
35049 || GET_MODE (target) != tmode)
35050 target = gen_reg_rtx (tmode);
35051 }
35052
35053 for (i = 0; i < nargs; i++)
35054 {
35055 enum machine_mode mode = insn_p->operand[i + 1].mode;
35056 bool match;
35057
35058 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
35059 op = expand_normal (arg);
35060 match = insn_p->operand[i + 1].predicate (op, mode);
35061
35062 if (last_arg_constant && (i + 1) == nargs)
35063 {
35064 if (!match)
35065 {
35066 if (icode == CODE_FOR_lwp_lwpvalsi3
35067 || icode == CODE_FOR_lwp_lwpinssi3
35068 || icode == CODE_FOR_lwp_lwpvaldi3
35069 || icode == CODE_FOR_lwp_lwpinsdi3)
35070 error ("the last argument must be a 32-bit immediate");
35071 else
35072 error ("the last argument must be an 8-bit immediate");
35073 return const0_rtx;
35074 }
35075 }
35076 else
35077 {
35078 if (i == memory)
35079 {
35080 /* This must be the memory operand. */
35081 op = ix86_zero_extend_to_Pmode (op);
35082 op = gen_rtx_MEM (mode, op);
35083 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
35084 on it. Try to improve it using get_pointer_alignment,
35085 and if the special builtin is one that requires strict
35086 mode alignment, also from it's GET_MODE_ALIGNMENT.
35087 Failure to do so could lead to ix86_legitimate_combined_insn
35088 rejecting all changes to such insns. */
35089 unsigned int align = get_pointer_alignment (arg);
35090 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
35091 align = GET_MODE_ALIGNMENT (mode);
35092 if (MEM_ALIGN (op) < align)
35093 set_mem_align (op, align);
35094 }
35095 else
35096 {
35097 /* This must be register. */
35098 if (VECTOR_MODE_P (mode))
35099 op = safe_vector_operand (op, mode);
35100
35101 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
35102 op = copy_to_mode_reg (mode, op);
35103 else
35104 {
35105 op = copy_to_reg (op);
35106 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
35107 }
35108 }
35109 }
35110
35111 args[i].op = op;
35112 args[i].mode = mode;
35113 }
35114
35115 switch (nargs)
35116 {
35117 case 0:
35118 pat = GEN_FCN (icode) (target);
35119 break;
35120 case 1:
35121 pat = GEN_FCN (icode) (target, args[0].op);
35122 break;
35123 case 2:
35124 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
35125 break;
35126 case 3:
35127 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
35128 break;
35129 default:
35130 gcc_unreachable ();
35131 }
35132
35133 if (! pat)
35134 return 0;
35135 emit_insn (pat);
35136 return klass == store ? 0 : target;
35137 }
35138
35139 /* Return the integer constant in ARG. Constrain it to be in the range
35140 of the subparts of VEC_TYPE; issue an error if not. */
35141
35142 static int
35143 get_element_number (tree vec_type, tree arg)
35144 {
35145 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
35146
35147 if (!tree_fits_uhwi_p (arg)
35148 || (elt = tree_to_uhwi (arg), elt > max))
35149 {
35150 error ("selector must be an integer constant in the range 0..%wi", max);
35151 return 0;
35152 }
35153
35154 return elt;
35155 }
35156
35157 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
35158 ix86_expand_vector_init. We DO have language-level syntax for this, in
35159 the form of (type){ init-list }. Except that since we can't place emms
35160 instructions from inside the compiler, we can't allow the use of MMX
35161 registers unless the user explicitly asks for it. So we do *not* define
35162 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
35163 we have builtins invoked by mmintrin.h that gives us license to emit
35164 these sorts of instructions. */
35165
35166 static rtx
35167 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
35168 {
35169 enum machine_mode tmode = TYPE_MODE (type);
35170 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
35171 int i, n_elt = GET_MODE_NUNITS (tmode);
35172 rtvec v = rtvec_alloc (n_elt);
35173
35174 gcc_assert (VECTOR_MODE_P (tmode));
35175 gcc_assert (call_expr_nargs (exp) == n_elt);
35176
35177 for (i = 0; i < n_elt; ++i)
35178 {
35179 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
35180 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
35181 }
35182
35183 if (!target || !register_operand (target, tmode))
35184 target = gen_reg_rtx (tmode);
35185
35186 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
35187 return target;
35188 }
35189
35190 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
35191 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
35192 had a language-level syntax for referencing vector elements. */
35193
35194 static rtx
35195 ix86_expand_vec_ext_builtin (tree exp, rtx target)
35196 {
35197 enum machine_mode tmode, mode0;
35198 tree arg0, arg1;
35199 int elt;
35200 rtx op0;
35201
35202 arg0 = CALL_EXPR_ARG (exp, 0);
35203 arg1 = CALL_EXPR_ARG (exp, 1);
35204
35205 op0 = expand_normal (arg0);
35206 elt = get_element_number (TREE_TYPE (arg0), arg1);
35207
35208 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
35209 mode0 = TYPE_MODE (TREE_TYPE (arg0));
35210 gcc_assert (VECTOR_MODE_P (mode0));
35211
35212 op0 = force_reg (mode0, op0);
35213
35214 if (optimize || !target || !register_operand (target, tmode))
35215 target = gen_reg_rtx (tmode);
35216
35217 ix86_expand_vector_extract (true, target, op0, elt);
35218
35219 return target;
35220 }
35221
35222 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
35223 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
35224 a language-level syntax for referencing vector elements. */
35225
35226 static rtx
35227 ix86_expand_vec_set_builtin (tree exp)
35228 {
35229 enum machine_mode tmode, mode1;
35230 tree arg0, arg1, arg2;
35231 int elt;
35232 rtx op0, op1, target;
35233
35234 arg0 = CALL_EXPR_ARG (exp, 0);
35235 arg1 = CALL_EXPR_ARG (exp, 1);
35236 arg2 = CALL_EXPR_ARG (exp, 2);
35237
35238 tmode = TYPE_MODE (TREE_TYPE (arg0));
35239 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
35240 gcc_assert (VECTOR_MODE_P (tmode));
35241
35242 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
35243 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
35244 elt = get_element_number (TREE_TYPE (arg0), arg2);
35245
35246 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
35247 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
35248
35249 op0 = force_reg (tmode, op0);
35250 op1 = force_reg (mode1, op1);
35251
35252 /* OP0 is the source of these builtin functions and shouldn't be
35253 modified. Create a copy, use it and return it as target. */
35254 target = gen_reg_rtx (tmode);
35255 emit_move_insn (target, op0);
35256 ix86_expand_vector_set (true, target, op1, elt);
35257
35258 return target;
35259 }
35260
35261 /* Expand an expression EXP that calls a built-in function,
35262 with result going to TARGET if that's convenient
35263 (and in mode MODE if that's convenient).
35264 SUBTARGET may be used as the target for computing one of EXP's operands.
35265 IGNORE is nonzero if the value is to be ignored. */
35266
35267 static rtx
35268 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
35269 enum machine_mode mode, int ignore)
35270 {
35271 const struct builtin_description *d;
35272 size_t i;
35273 enum insn_code icode;
35274 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
35275 tree arg0, arg1, arg2, arg3, arg4;
35276 rtx op0, op1, op2, op3, op4, pat, insn;
35277 enum machine_mode mode0, mode1, mode2, mode3, mode4;
35278 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
35279
35280 /* For CPU builtins that can be folded, fold first and expand the fold. */
35281 switch (fcode)
35282 {
35283 case IX86_BUILTIN_CPU_INIT:
35284 {
35285 /* Make it call __cpu_indicator_init in libgcc. */
35286 tree call_expr, fndecl, type;
35287 type = build_function_type_list (integer_type_node, NULL_TREE);
35288 fndecl = build_fn_decl ("__cpu_indicator_init", type);
35289 call_expr = build_call_expr (fndecl, 0);
35290 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
35291 }
35292 case IX86_BUILTIN_CPU_IS:
35293 case IX86_BUILTIN_CPU_SUPPORTS:
35294 {
35295 tree arg0 = CALL_EXPR_ARG (exp, 0);
35296 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
35297 gcc_assert (fold_expr != NULL_TREE);
35298 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
35299 }
35300 }
35301
35302 /* Determine whether the builtin function is available under the current ISA.
35303 Originally the builtin was not created if it wasn't applicable to the
35304 current ISA based on the command line switches. With function specific
35305 options, we need to check in the context of the function making the call
35306 whether it is supported. */
35307 if (ix86_builtins_isa[fcode].isa
35308 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
35309 {
35310 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
35311 NULL, (enum fpmath_unit) 0, false);
35312
35313 if (!opts)
35314 error ("%qE needs unknown isa option", fndecl);
35315 else
35316 {
35317 gcc_assert (opts != NULL);
35318 error ("%qE needs isa option %s", fndecl, opts);
35319 free (opts);
35320 }
35321 return const0_rtx;
35322 }
35323
35324 switch (fcode)
35325 {
35326 case IX86_BUILTIN_MASKMOVQ:
35327 case IX86_BUILTIN_MASKMOVDQU:
35328 icode = (fcode == IX86_BUILTIN_MASKMOVQ
35329 ? CODE_FOR_mmx_maskmovq
35330 : CODE_FOR_sse2_maskmovdqu);
35331 /* Note the arg order is different from the operand order. */
35332 arg1 = CALL_EXPR_ARG (exp, 0);
35333 arg2 = CALL_EXPR_ARG (exp, 1);
35334 arg0 = CALL_EXPR_ARG (exp, 2);
35335 op0 = expand_normal (arg0);
35336 op1 = expand_normal (arg1);
35337 op2 = expand_normal (arg2);
35338 mode0 = insn_data[icode].operand[0].mode;
35339 mode1 = insn_data[icode].operand[1].mode;
35340 mode2 = insn_data[icode].operand[2].mode;
35341
35342 op0 = ix86_zero_extend_to_Pmode (op0);
35343 op0 = gen_rtx_MEM (mode1, op0);
35344
35345 if (!insn_data[icode].operand[0].predicate (op0, mode0))
35346 op0 = copy_to_mode_reg (mode0, op0);
35347 if (!insn_data[icode].operand[1].predicate (op1, mode1))
35348 op1 = copy_to_mode_reg (mode1, op1);
35349 if (!insn_data[icode].operand[2].predicate (op2, mode2))
35350 op2 = copy_to_mode_reg (mode2, op2);
35351 pat = GEN_FCN (icode) (op0, op1, op2);
35352 if (! pat)
35353 return 0;
35354 emit_insn (pat);
35355 return 0;
35356
35357 case IX86_BUILTIN_LDMXCSR:
35358 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
35359 target = assign_386_stack_local (SImode, SLOT_TEMP);
35360 emit_move_insn (target, op0);
35361 emit_insn (gen_sse_ldmxcsr (target));
35362 return 0;
35363
35364 case IX86_BUILTIN_STMXCSR:
35365 target = assign_386_stack_local (SImode, SLOT_TEMP);
35366 emit_insn (gen_sse_stmxcsr (target));
35367 return copy_to_mode_reg (SImode, target);
35368
35369 case IX86_BUILTIN_CLFLUSH:
35370 arg0 = CALL_EXPR_ARG (exp, 0);
35371 op0 = expand_normal (arg0);
35372 icode = CODE_FOR_sse2_clflush;
35373 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35374 op0 = ix86_zero_extend_to_Pmode (op0);
35375
35376 emit_insn (gen_sse2_clflush (op0));
35377 return 0;
35378
35379 case IX86_BUILTIN_CLFLUSHOPT:
35380 arg0 = CALL_EXPR_ARG (exp, 0);
35381 op0 = expand_normal (arg0);
35382 icode = CODE_FOR_clflushopt;
35383 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35384 op0 = ix86_zero_extend_to_Pmode (op0);
35385
35386 emit_insn (gen_clflushopt (op0));
35387 return 0;
35388
35389 case IX86_BUILTIN_MONITOR:
35390 arg0 = CALL_EXPR_ARG (exp, 0);
35391 arg1 = CALL_EXPR_ARG (exp, 1);
35392 arg2 = CALL_EXPR_ARG (exp, 2);
35393 op0 = expand_normal (arg0);
35394 op1 = expand_normal (arg1);
35395 op2 = expand_normal (arg2);
35396 if (!REG_P (op0))
35397 op0 = ix86_zero_extend_to_Pmode (op0);
35398 if (!REG_P (op1))
35399 op1 = copy_to_mode_reg (SImode, op1);
35400 if (!REG_P (op2))
35401 op2 = copy_to_mode_reg (SImode, op2);
35402 emit_insn (ix86_gen_monitor (op0, op1, op2));
35403 return 0;
35404
35405 case IX86_BUILTIN_MWAIT:
35406 arg0 = CALL_EXPR_ARG (exp, 0);
35407 arg1 = CALL_EXPR_ARG (exp, 1);
35408 op0 = expand_normal (arg0);
35409 op1 = expand_normal (arg1);
35410 if (!REG_P (op0))
35411 op0 = copy_to_mode_reg (SImode, op0);
35412 if (!REG_P (op1))
35413 op1 = copy_to_mode_reg (SImode, op1);
35414 emit_insn (gen_sse3_mwait (op0, op1));
35415 return 0;
35416
35417 case IX86_BUILTIN_VEC_INIT_V2SI:
35418 case IX86_BUILTIN_VEC_INIT_V4HI:
35419 case IX86_BUILTIN_VEC_INIT_V8QI:
35420 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
35421
35422 case IX86_BUILTIN_VEC_EXT_V2DF:
35423 case IX86_BUILTIN_VEC_EXT_V2DI:
35424 case IX86_BUILTIN_VEC_EXT_V4SF:
35425 case IX86_BUILTIN_VEC_EXT_V4SI:
35426 case IX86_BUILTIN_VEC_EXT_V8HI:
35427 case IX86_BUILTIN_VEC_EXT_V2SI:
35428 case IX86_BUILTIN_VEC_EXT_V4HI:
35429 case IX86_BUILTIN_VEC_EXT_V16QI:
35430 return ix86_expand_vec_ext_builtin (exp, target);
35431
35432 case IX86_BUILTIN_VEC_SET_V2DI:
35433 case IX86_BUILTIN_VEC_SET_V4SF:
35434 case IX86_BUILTIN_VEC_SET_V4SI:
35435 case IX86_BUILTIN_VEC_SET_V8HI:
35436 case IX86_BUILTIN_VEC_SET_V4HI:
35437 case IX86_BUILTIN_VEC_SET_V16QI:
35438 return ix86_expand_vec_set_builtin (exp);
35439
35440 case IX86_BUILTIN_INFQ:
35441 case IX86_BUILTIN_HUGE_VALQ:
35442 {
35443 REAL_VALUE_TYPE inf;
35444 rtx tmp;
35445
35446 real_inf (&inf);
35447 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
35448
35449 tmp = validize_mem (force_const_mem (mode, tmp));
35450
35451 if (target == 0)
35452 target = gen_reg_rtx (mode);
35453
35454 emit_move_insn (target, tmp);
35455 return target;
35456 }
35457
35458 case IX86_BUILTIN_RDPMC:
35459 case IX86_BUILTIN_RDTSC:
35460 case IX86_BUILTIN_RDTSCP:
35461
35462 op0 = gen_reg_rtx (DImode);
35463 op1 = gen_reg_rtx (DImode);
35464
35465 if (fcode == IX86_BUILTIN_RDPMC)
35466 {
35467 arg0 = CALL_EXPR_ARG (exp, 0);
35468 op2 = expand_normal (arg0);
35469 if (!register_operand (op2, SImode))
35470 op2 = copy_to_mode_reg (SImode, op2);
35471
35472 insn = (TARGET_64BIT
35473 ? gen_rdpmc_rex64 (op0, op1, op2)
35474 : gen_rdpmc (op0, op2));
35475 emit_insn (insn);
35476 }
35477 else if (fcode == IX86_BUILTIN_RDTSC)
35478 {
35479 insn = (TARGET_64BIT
35480 ? gen_rdtsc_rex64 (op0, op1)
35481 : gen_rdtsc (op0));
35482 emit_insn (insn);
35483 }
35484 else
35485 {
35486 op2 = gen_reg_rtx (SImode);
35487
35488 insn = (TARGET_64BIT
35489 ? gen_rdtscp_rex64 (op0, op1, op2)
35490 : gen_rdtscp (op0, op2));
35491 emit_insn (insn);
35492
35493 arg0 = CALL_EXPR_ARG (exp, 0);
35494 op4 = expand_normal (arg0);
35495 if (!address_operand (op4, VOIDmode))
35496 {
35497 op4 = convert_memory_address (Pmode, op4);
35498 op4 = copy_addr_to_reg (op4);
35499 }
35500 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
35501 }
35502
35503 if (target == 0)
35504 {
35505 /* mode is VOIDmode if __builtin_rd* has been called
35506 without lhs. */
35507 if (mode == VOIDmode)
35508 return target;
35509 target = gen_reg_rtx (mode);
35510 }
35511
35512 if (TARGET_64BIT)
35513 {
35514 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
35515 op1, 1, OPTAB_DIRECT);
35516 op0 = expand_simple_binop (DImode, IOR, op0, op1,
35517 op0, 1, OPTAB_DIRECT);
35518 }
35519
35520 emit_move_insn (target, op0);
35521 return target;
35522
35523 case IX86_BUILTIN_FXSAVE:
35524 case IX86_BUILTIN_FXRSTOR:
35525 case IX86_BUILTIN_FXSAVE64:
35526 case IX86_BUILTIN_FXRSTOR64:
35527 case IX86_BUILTIN_FNSTENV:
35528 case IX86_BUILTIN_FLDENV:
35529 mode0 = BLKmode;
35530 switch (fcode)
35531 {
35532 case IX86_BUILTIN_FXSAVE:
35533 icode = CODE_FOR_fxsave;
35534 break;
35535 case IX86_BUILTIN_FXRSTOR:
35536 icode = CODE_FOR_fxrstor;
35537 break;
35538 case IX86_BUILTIN_FXSAVE64:
35539 icode = CODE_FOR_fxsave64;
35540 break;
35541 case IX86_BUILTIN_FXRSTOR64:
35542 icode = CODE_FOR_fxrstor64;
35543 break;
35544 case IX86_BUILTIN_FNSTENV:
35545 icode = CODE_FOR_fnstenv;
35546 break;
35547 case IX86_BUILTIN_FLDENV:
35548 icode = CODE_FOR_fldenv;
35549 break;
35550 default:
35551 gcc_unreachable ();
35552 }
35553
35554 arg0 = CALL_EXPR_ARG (exp, 0);
35555 op0 = expand_normal (arg0);
35556
35557 if (!address_operand (op0, VOIDmode))
35558 {
35559 op0 = convert_memory_address (Pmode, op0);
35560 op0 = copy_addr_to_reg (op0);
35561 }
35562 op0 = gen_rtx_MEM (mode0, op0);
35563
35564 pat = GEN_FCN (icode) (op0);
35565 if (pat)
35566 emit_insn (pat);
35567 return 0;
35568
35569 case IX86_BUILTIN_XSAVE:
35570 case IX86_BUILTIN_XRSTOR:
35571 case IX86_BUILTIN_XSAVE64:
35572 case IX86_BUILTIN_XRSTOR64:
35573 case IX86_BUILTIN_XSAVEOPT:
35574 case IX86_BUILTIN_XSAVEOPT64:
35575 case IX86_BUILTIN_XSAVES:
35576 case IX86_BUILTIN_XRSTORS:
35577 case IX86_BUILTIN_XSAVES64:
35578 case IX86_BUILTIN_XRSTORS64:
35579 case IX86_BUILTIN_XSAVEC:
35580 case IX86_BUILTIN_XSAVEC64:
35581 arg0 = CALL_EXPR_ARG (exp, 0);
35582 arg1 = CALL_EXPR_ARG (exp, 1);
35583 op0 = expand_normal (arg0);
35584 op1 = expand_normal (arg1);
35585
35586 if (!address_operand (op0, VOIDmode))
35587 {
35588 op0 = convert_memory_address (Pmode, op0);
35589 op0 = copy_addr_to_reg (op0);
35590 }
35591 op0 = gen_rtx_MEM (BLKmode, op0);
35592
35593 op1 = force_reg (DImode, op1);
35594
35595 if (TARGET_64BIT)
35596 {
35597 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
35598 NULL, 1, OPTAB_DIRECT);
35599 switch (fcode)
35600 {
35601 case IX86_BUILTIN_XSAVE:
35602 icode = CODE_FOR_xsave_rex64;
35603 break;
35604 case IX86_BUILTIN_XRSTOR:
35605 icode = CODE_FOR_xrstor_rex64;
35606 break;
35607 case IX86_BUILTIN_XSAVE64:
35608 icode = CODE_FOR_xsave64;
35609 break;
35610 case IX86_BUILTIN_XRSTOR64:
35611 icode = CODE_FOR_xrstor64;
35612 break;
35613 case IX86_BUILTIN_XSAVEOPT:
35614 icode = CODE_FOR_xsaveopt_rex64;
35615 break;
35616 case IX86_BUILTIN_XSAVEOPT64:
35617 icode = CODE_FOR_xsaveopt64;
35618 break;
35619 case IX86_BUILTIN_XSAVES:
35620 icode = CODE_FOR_xsaves_rex64;
35621 break;
35622 case IX86_BUILTIN_XRSTORS:
35623 icode = CODE_FOR_xrstors_rex64;
35624 break;
35625 case IX86_BUILTIN_XSAVES64:
35626 icode = CODE_FOR_xsaves64;
35627 break;
35628 case IX86_BUILTIN_XRSTORS64:
35629 icode = CODE_FOR_xrstors64;
35630 break;
35631 case IX86_BUILTIN_XSAVEC:
35632 icode = CODE_FOR_xsavec_rex64;
35633 break;
35634 case IX86_BUILTIN_XSAVEC64:
35635 icode = CODE_FOR_xsavec64;
35636 break;
35637 default:
35638 gcc_unreachable ();
35639 }
35640
35641 op2 = gen_lowpart (SImode, op2);
35642 op1 = gen_lowpart (SImode, op1);
35643 pat = GEN_FCN (icode) (op0, op1, op2);
35644 }
35645 else
35646 {
35647 switch (fcode)
35648 {
35649 case IX86_BUILTIN_XSAVE:
35650 icode = CODE_FOR_xsave;
35651 break;
35652 case IX86_BUILTIN_XRSTOR:
35653 icode = CODE_FOR_xrstor;
35654 break;
35655 case IX86_BUILTIN_XSAVEOPT:
35656 icode = CODE_FOR_xsaveopt;
35657 break;
35658 case IX86_BUILTIN_XSAVES:
35659 icode = CODE_FOR_xsaves;
35660 break;
35661 case IX86_BUILTIN_XRSTORS:
35662 icode = CODE_FOR_xrstors;
35663 break;
35664 case IX86_BUILTIN_XSAVEC:
35665 icode = CODE_FOR_xsavec;
35666 break;
35667 default:
35668 gcc_unreachable ();
35669 }
35670 pat = GEN_FCN (icode) (op0, op1);
35671 }
35672
35673 if (pat)
35674 emit_insn (pat);
35675 return 0;
35676
35677 case IX86_BUILTIN_LLWPCB:
35678 arg0 = CALL_EXPR_ARG (exp, 0);
35679 op0 = expand_normal (arg0);
35680 icode = CODE_FOR_lwp_llwpcb;
35681 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35682 op0 = ix86_zero_extend_to_Pmode (op0);
35683 emit_insn (gen_lwp_llwpcb (op0));
35684 return 0;
35685
35686 case IX86_BUILTIN_SLWPCB:
35687 icode = CODE_FOR_lwp_slwpcb;
35688 if (!target
35689 || !insn_data[icode].operand[0].predicate (target, Pmode))
35690 target = gen_reg_rtx (Pmode);
35691 emit_insn (gen_lwp_slwpcb (target));
35692 return target;
35693
35694 case IX86_BUILTIN_BEXTRI32:
35695 case IX86_BUILTIN_BEXTRI64:
35696 arg0 = CALL_EXPR_ARG (exp, 0);
35697 arg1 = CALL_EXPR_ARG (exp, 1);
35698 op0 = expand_normal (arg0);
35699 op1 = expand_normal (arg1);
35700 icode = (fcode == IX86_BUILTIN_BEXTRI32
35701 ? CODE_FOR_tbm_bextri_si
35702 : CODE_FOR_tbm_bextri_di);
35703 if (!CONST_INT_P (op1))
35704 {
35705 error ("last argument must be an immediate");
35706 return const0_rtx;
35707 }
35708 else
35709 {
35710 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
35711 unsigned char lsb_index = INTVAL (op1) & 0xFF;
35712 op1 = GEN_INT (length);
35713 op2 = GEN_INT (lsb_index);
35714 pat = GEN_FCN (icode) (target, op0, op1, op2);
35715 if (pat)
35716 emit_insn (pat);
35717 return target;
35718 }
35719
35720 case IX86_BUILTIN_RDRAND16_STEP:
35721 icode = CODE_FOR_rdrandhi_1;
35722 mode0 = HImode;
35723 goto rdrand_step;
35724
35725 case IX86_BUILTIN_RDRAND32_STEP:
35726 icode = CODE_FOR_rdrandsi_1;
35727 mode0 = SImode;
35728 goto rdrand_step;
35729
35730 case IX86_BUILTIN_RDRAND64_STEP:
35731 icode = CODE_FOR_rdranddi_1;
35732 mode0 = DImode;
35733
35734 rdrand_step:
35735 op0 = gen_reg_rtx (mode0);
35736 emit_insn (GEN_FCN (icode) (op0));
35737
35738 arg0 = CALL_EXPR_ARG (exp, 0);
35739 op1 = expand_normal (arg0);
35740 if (!address_operand (op1, VOIDmode))
35741 {
35742 op1 = convert_memory_address (Pmode, op1);
35743 op1 = copy_addr_to_reg (op1);
35744 }
35745 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35746
35747 op1 = gen_reg_rtx (SImode);
35748 emit_move_insn (op1, CONST1_RTX (SImode));
35749
35750 /* Emit SImode conditional move. */
35751 if (mode0 == HImode)
35752 {
35753 op2 = gen_reg_rtx (SImode);
35754 emit_insn (gen_zero_extendhisi2 (op2, op0));
35755 }
35756 else if (mode0 == SImode)
35757 op2 = op0;
35758 else
35759 op2 = gen_rtx_SUBREG (SImode, op0, 0);
35760
35761 if (target == 0
35762 || !register_operand (target, SImode))
35763 target = gen_reg_rtx (SImode);
35764
35765 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
35766 const0_rtx);
35767 emit_insn (gen_rtx_SET (VOIDmode, target,
35768 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
35769 return target;
35770
35771 case IX86_BUILTIN_RDSEED16_STEP:
35772 icode = CODE_FOR_rdseedhi_1;
35773 mode0 = HImode;
35774 goto rdseed_step;
35775
35776 case IX86_BUILTIN_RDSEED32_STEP:
35777 icode = CODE_FOR_rdseedsi_1;
35778 mode0 = SImode;
35779 goto rdseed_step;
35780
35781 case IX86_BUILTIN_RDSEED64_STEP:
35782 icode = CODE_FOR_rdseeddi_1;
35783 mode0 = DImode;
35784
35785 rdseed_step:
35786 op0 = gen_reg_rtx (mode0);
35787 emit_insn (GEN_FCN (icode) (op0));
35788
35789 arg0 = CALL_EXPR_ARG (exp, 0);
35790 op1 = expand_normal (arg0);
35791 if (!address_operand (op1, VOIDmode))
35792 {
35793 op1 = convert_memory_address (Pmode, op1);
35794 op1 = copy_addr_to_reg (op1);
35795 }
35796 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35797
35798 op2 = gen_reg_rtx (QImode);
35799
35800 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
35801 const0_rtx);
35802 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
35803
35804 if (target == 0
35805 || !register_operand (target, SImode))
35806 target = gen_reg_rtx (SImode);
35807
35808 emit_insn (gen_zero_extendqisi2 (target, op2));
35809 return target;
35810
35811 case IX86_BUILTIN_SBB32:
35812 icode = CODE_FOR_subsi3_carry;
35813 mode0 = SImode;
35814 goto addcarryx;
35815
35816 case IX86_BUILTIN_SBB64:
35817 icode = CODE_FOR_subdi3_carry;
35818 mode0 = DImode;
35819 goto addcarryx;
35820
35821 case IX86_BUILTIN_ADDCARRYX32:
35822 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
35823 mode0 = SImode;
35824 goto addcarryx;
35825
35826 case IX86_BUILTIN_ADDCARRYX64:
35827 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
35828 mode0 = DImode;
35829
35830 addcarryx:
35831 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
35832 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
35833 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
35834 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
35835
35836 op0 = gen_reg_rtx (QImode);
35837
35838 /* Generate CF from input operand. */
35839 op1 = expand_normal (arg0);
35840 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
35841 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
35842
35843 /* Gen ADCX instruction to compute X+Y+CF. */
35844 op2 = expand_normal (arg1);
35845 op3 = expand_normal (arg2);
35846
35847 if (!REG_P (op2))
35848 op2 = copy_to_mode_reg (mode0, op2);
35849 if (!REG_P (op3))
35850 op3 = copy_to_mode_reg (mode0, op3);
35851
35852 op0 = gen_reg_rtx (mode0);
35853
35854 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
35855 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
35856 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
35857
35858 /* Store the result. */
35859 op4 = expand_normal (arg3);
35860 if (!address_operand (op4, VOIDmode))
35861 {
35862 op4 = convert_memory_address (Pmode, op4);
35863 op4 = copy_addr_to_reg (op4);
35864 }
35865 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
35866
35867 /* Return current CF value. */
35868 if (target == 0)
35869 target = gen_reg_rtx (QImode);
35870
35871 PUT_MODE (pat, QImode);
35872 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
35873 return target;
35874
35875 case IX86_BUILTIN_READ_FLAGS:
35876 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
35877
35878 if (optimize
35879 || target == NULL_RTX
35880 || !nonimmediate_operand (target, word_mode)
35881 || GET_MODE (target) != word_mode)
35882 target = gen_reg_rtx (word_mode);
35883
35884 emit_insn (gen_pop (target));
35885 return target;
35886
35887 case IX86_BUILTIN_WRITE_FLAGS:
35888
35889 arg0 = CALL_EXPR_ARG (exp, 0);
35890 op0 = expand_normal (arg0);
35891 if (!general_no_elim_operand (op0, word_mode))
35892 op0 = copy_to_mode_reg (word_mode, op0);
35893
35894 emit_insn (gen_push (op0));
35895 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
35896 return 0;
35897
35898 case IX86_BUILTIN_KORTESTC16:
35899 icode = CODE_FOR_kortestchi;
35900 mode0 = HImode;
35901 mode1 = CCCmode;
35902 goto kortest;
35903
35904 case IX86_BUILTIN_KORTESTZ16:
35905 icode = CODE_FOR_kortestzhi;
35906 mode0 = HImode;
35907 mode1 = CCZmode;
35908
35909 kortest:
35910 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
35911 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
35912 op0 = expand_normal (arg0);
35913 op1 = expand_normal (arg1);
35914
35915 op0 = copy_to_reg (op0);
35916 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
35917 op1 = copy_to_reg (op1);
35918 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
35919
35920 target = gen_reg_rtx (QImode);
35921 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
35922
35923 /* Emit kortest. */
35924 emit_insn (GEN_FCN (icode) (op0, op1));
35925 /* And use setcc to return result from flags. */
35926 ix86_expand_setcc (target, EQ,
35927 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
35928 return target;
35929
35930 case IX86_BUILTIN_GATHERSIV2DF:
35931 icode = CODE_FOR_avx2_gathersiv2df;
35932 goto gather_gen;
35933 case IX86_BUILTIN_GATHERSIV4DF:
35934 icode = CODE_FOR_avx2_gathersiv4df;
35935 goto gather_gen;
35936 case IX86_BUILTIN_GATHERDIV2DF:
35937 icode = CODE_FOR_avx2_gatherdiv2df;
35938 goto gather_gen;
35939 case IX86_BUILTIN_GATHERDIV4DF:
35940 icode = CODE_FOR_avx2_gatherdiv4df;
35941 goto gather_gen;
35942 case IX86_BUILTIN_GATHERSIV4SF:
35943 icode = CODE_FOR_avx2_gathersiv4sf;
35944 goto gather_gen;
35945 case IX86_BUILTIN_GATHERSIV8SF:
35946 icode = CODE_FOR_avx2_gathersiv8sf;
35947 goto gather_gen;
35948 case IX86_BUILTIN_GATHERDIV4SF:
35949 icode = CODE_FOR_avx2_gatherdiv4sf;
35950 goto gather_gen;
35951 case IX86_BUILTIN_GATHERDIV8SF:
35952 icode = CODE_FOR_avx2_gatherdiv8sf;
35953 goto gather_gen;
35954 case IX86_BUILTIN_GATHERSIV2DI:
35955 icode = CODE_FOR_avx2_gathersiv2di;
35956 goto gather_gen;
35957 case IX86_BUILTIN_GATHERSIV4DI:
35958 icode = CODE_FOR_avx2_gathersiv4di;
35959 goto gather_gen;
35960 case IX86_BUILTIN_GATHERDIV2DI:
35961 icode = CODE_FOR_avx2_gatherdiv2di;
35962 goto gather_gen;
35963 case IX86_BUILTIN_GATHERDIV4DI:
35964 icode = CODE_FOR_avx2_gatherdiv4di;
35965 goto gather_gen;
35966 case IX86_BUILTIN_GATHERSIV4SI:
35967 icode = CODE_FOR_avx2_gathersiv4si;
35968 goto gather_gen;
35969 case IX86_BUILTIN_GATHERSIV8SI:
35970 icode = CODE_FOR_avx2_gathersiv8si;
35971 goto gather_gen;
35972 case IX86_BUILTIN_GATHERDIV4SI:
35973 icode = CODE_FOR_avx2_gatherdiv4si;
35974 goto gather_gen;
35975 case IX86_BUILTIN_GATHERDIV8SI:
35976 icode = CODE_FOR_avx2_gatherdiv8si;
35977 goto gather_gen;
35978 case IX86_BUILTIN_GATHERALTSIV4DF:
35979 icode = CODE_FOR_avx2_gathersiv4df;
35980 goto gather_gen;
35981 case IX86_BUILTIN_GATHERALTDIV8SF:
35982 icode = CODE_FOR_avx2_gatherdiv8sf;
35983 goto gather_gen;
35984 case IX86_BUILTIN_GATHERALTSIV4DI:
35985 icode = CODE_FOR_avx2_gathersiv4di;
35986 goto gather_gen;
35987 case IX86_BUILTIN_GATHERALTDIV8SI:
35988 icode = CODE_FOR_avx2_gatherdiv8si;
35989 goto gather_gen;
35990 case IX86_BUILTIN_GATHER3SIV16SF:
35991 icode = CODE_FOR_avx512f_gathersiv16sf;
35992 goto gather_gen;
35993 case IX86_BUILTIN_GATHER3SIV8DF:
35994 icode = CODE_FOR_avx512f_gathersiv8df;
35995 goto gather_gen;
35996 case IX86_BUILTIN_GATHER3DIV16SF:
35997 icode = CODE_FOR_avx512f_gatherdiv16sf;
35998 goto gather_gen;
35999 case IX86_BUILTIN_GATHER3DIV8DF:
36000 icode = CODE_FOR_avx512f_gatherdiv8df;
36001 goto gather_gen;
36002 case IX86_BUILTIN_GATHER3SIV16SI:
36003 icode = CODE_FOR_avx512f_gathersiv16si;
36004 goto gather_gen;
36005 case IX86_BUILTIN_GATHER3SIV8DI:
36006 icode = CODE_FOR_avx512f_gathersiv8di;
36007 goto gather_gen;
36008 case IX86_BUILTIN_GATHER3DIV16SI:
36009 icode = CODE_FOR_avx512f_gatherdiv16si;
36010 goto gather_gen;
36011 case IX86_BUILTIN_GATHER3DIV8DI:
36012 icode = CODE_FOR_avx512f_gatherdiv8di;
36013 goto gather_gen;
36014 case IX86_BUILTIN_GATHER3ALTSIV8DF:
36015 icode = CODE_FOR_avx512f_gathersiv8df;
36016 goto gather_gen;
36017 case IX86_BUILTIN_GATHER3ALTDIV16SF:
36018 icode = CODE_FOR_avx512f_gatherdiv16sf;
36019 goto gather_gen;
36020 case IX86_BUILTIN_GATHER3ALTSIV8DI:
36021 icode = CODE_FOR_avx512f_gathersiv8di;
36022 goto gather_gen;
36023 case IX86_BUILTIN_GATHER3ALTDIV16SI:
36024 icode = CODE_FOR_avx512f_gatherdiv16si;
36025 goto gather_gen;
36026 case IX86_BUILTIN_SCATTERSIV16SF:
36027 icode = CODE_FOR_avx512f_scattersiv16sf;
36028 goto scatter_gen;
36029 case IX86_BUILTIN_SCATTERSIV8DF:
36030 icode = CODE_FOR_avx512f_scattersiv8df;
36031 goto scatter_gen;
36032 case IX86_BUILTIN_SCATTERDIV16SF:
36033 icode = CODE_FOR_avx512f_scatterdiv16sf;
36034 goto scatter_gen;
36035 case IX86_BUILTIN_SCATTERDIV8DF:
36036 icode = CODE_FOR_avx512f_scatterdiv8df;
36037 goto scatter_gen;
36038 case IX86_BUILTIN_SCATTERSIV16SI:
36039 icode = CODE_FOR_avx512f_scattersiv16si;
36040 goto scatter_gen;
36041 case IX86_BUILTIN_SCATTERSIV8DI:
36042 icode = CODE_FOR_avx512f_scattersiv8di;
36043 goto scatter_gen;
36044 case IX86_BUILTIN_SCATTERDIV16SI:
36045 icode = CODE_FOR_avx512f_scatterdiv16si;
36046 goto scatter_gen;
36047 case IX86_BUILTIN_SCATTERDIV8DI:
36048 icode = CODE_FOR_avx512f_scatterdiv8di;
36049 goto scatter_gen;
36050
36051 case IX86_BUILTIN_GATHERPFDPD:
36052 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
36053 goto vec_prefetch_gen;
36054 case IX86_BUILTIN_GATHERPFDPS:
36055 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
36056 goto vec_prefetch_gen;
36057 case IX86_BUILTIN_GATHERPFQPD:
36058 icode = CODE_FOR_avx512pf_gatherpfv8didf;
36059 goto vec_prefetch_gen;
36060 case IX86_BUILTIN_GATHERPFQPS:
36061 icode = CODE_FOR_avx512pf_gatherpfv8disf;
36062 goto vec_prefetch_gen;
36063 case IX86_BUILTIN_SCATTERPFDPD:
36064 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
36065 goto vec_prefetch_gen;
36066 case IX86_BUILTIN_SCATTERPFDPS:
36067 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
36068 goto vec_prefetch_gen;
36069 case IX86_BUILTIN_SCATTERPFQPD:
36070 icode = CODE_FOR_avx512pf_scatterpfv8didf;
36071 goto vec_prefetch_gen;
36072 case IX86_BUILTIN_SCATTERPFQPS:
36073 icode = CODE_FOR_avx512pf_scatterpfv8disf;
36074 goto vec_prefetch_gen;
36075
36076 gather_gen:
36077 rtx half;
36078 rtx (*gen) (rtx, rtx);
36079
36080 arg0 = CALL_EXPR_ARG (exp, 0);
36081 arg1 = CALL_EXPR_ARG (exp, 1);
36082 arg2 = CALL_EXPR_ARG (exp, 2);
36083 arg3 = CALL_EXPR_ARG (exp, 3);
36084 arg4 = CALL_EXPR_ARG (exp, 4);
36085 op0 = expand_normal (arg0);
36086 op1 = expand_normal (arg1);
36087 op2 = expand_normal (arg2);
36088 op3 = expand_normal (arg3);
36089 op4 = expand_normal (arg4);
36090 /* Note the arg order is different from the operand order. */
36091 mode0 = insn_data[icode].operand[1].mode;
36092 mode2 = insn_data[icode].operand[3].mode;
36093 mode3 = insn_data[icode].operand[4].mode;
36094 mode4 = insn_data[icode].operand[5].mode;
36095
36096 if (target == NULL_RTX
36097 || GET_MODE (target) != insn_data[icode].operand[0].mode
36098 || !insn_data[icode].operand[0].predicate (target,
36099 GET_MODE (target)))
36100 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
36101 else
36102 subtarget = target;
36103
36104 switch (fcode)
36105 {
36106 case IX86_BUILTIN_GATHER3ALTSIV8DF:
36107 case IX86_BUILTIN_GATHER3ALTSIV8DI:
36108 half = gen_reg_rtx (V8SImode);
36109 if (!nonimmediate_operand (op2, V16SImode))
36110 op2 = copy_to_mode_reg (V16SImode, op2);
36111 emit_insn (gen_vec_extract_lo_v16si (half, op2));
36112 op2 = half;
36113 break;
36114 case IX86_BUILTIN_GATHERALTSIV4DF:
36115 case IX86_BUILTIN_GATHERALTSIV4DI:
36116 half = gen_reg_rtx (V4SImode);
36117 if (!nonimmediate_operand (op2, V8SImode))
36118 op2 = copy_to_mode_reg (V8SImode, op2);
36119 emit_insn (gen_vec_extract_lo_v8si (half, op2));
36120 op2 = half;
36121 break;
36122 case IX86_BUILTIN_GATHER3ALTDIV16SF:
36123 case IX86_BUILTIN_GATHER3ALTDIV16SI:
36124 half = gen_reg_rtx (mode0);
36125 if (mode0 == V8SFmode)
36126 gen = gen_vec_extract_lo_v16sf;
36127 else
36128 gen = gen_vec_extract_lo_v16si;
36129 if (!nonimmediate_operand (op0, GET_MODE (op0)))
36130 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
36131 emit_insn (gen (half, op0));
36132 op0 = half;
36133 if (GET_MODE (op3) != VOIDmode)
36134 {
36135 if (!nonimmediate_operand (op3, GET_MODE (op3)))
36136 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
36137 emit_insn (gen (half, op3));
36138 op3 = half;
36139 }
36140 break;
36141 case IX86_BUILTIN_GATHERALTDIV8SF:
36142 case IX86_BUILTIN_GATHERALTDIV8SI:
36143 half = gen_reg_rtx (mode0);
36144 if (mode0 == V4SFmode)
36145 gen = gen_vec_extract_lo_v8sf;
36146 else
36147 gen = gen_vec_extract_lo_v8si;
36148 if (!nonimmediate_operand (op0, GET_MODE (op0)))
36149 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
36150 emit_insn (gen (half, op0));
36151 op0 = half;
36152 if (GET_MODE (op3) != VOIDmode)
36153 {
36154 if (!nonimmediate_operand (op3, GET_MODE (op3)))
36155 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
36156 emit_insn (gen (half, op3));
36157 op3 = half;
36158 }
36159 break;
36160 default:
36161 break;
36162 }
36163
36164 /* Force memory operand only with base register here. But we
36165 don't want to do it on memory operand for other builtin
36166 functions. */
36167 op1 = ix86_zero_extend_to_Pmode (op1);
36168
36169 if (!insn_data[icode].operand[1].predicate (op0, mode0))
36170 op0 = copy_to_mode_reg (mode0, op0);
36171 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
36172 op1 = copy_to_mode_reg (Pmode, op1);
36173 if (!insn_data[icode].operand[3].predicate (op2, mode2))
36174 op2 = copy_to_mode_reg (mode2, op2);
36175 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
36176 {
36177 if (!insn_data[icode].operand[4].predicate (op3, mode3))
36178 op3 = copy_to_mode_reg (mode3, op3);
36179 }
36180 else
36181 {
36182 op3 = copy_to_reg (op3);
36183 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
36184 }
36185 if (!insn_data[icode].operand[5].predicate (op4, mode4))
36186 {
36187 error ("the last argument must be scale 1, 2, 4, 8");
36188 return const0_rtx;
36189 }
36190
36191 /* Optimize. If mask is known to have all high bits set,
36192 replace op0 with pc_rtx to signal that the instruction
36193 overwrites the whole destination and doesn't use its
36194 previous contents. */
36195 if (optimize)
36196 {
36197 if (TREE_CODE (arg3) == INTEGER_CST)
36198 {
36199 if (integer_all_onesp (arg3))
36200 op0 = pc_rtx;
36201 }
36202 else if (TREE_CODE (arg3) == VECTOR_CST)
36203 {
36204 unsigned int negative = 0;
36205 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
36206 {
36207 tree cst = VECTOR_CST_ELT (arg3, i);
36208 if (TREE_CODE (cst) == INTEGER_CST
36209 && tree_int_cst_sign_bit (cst))
36210 negative++;
36211 else if (TREE_CODE (cst) == REAL_CST
36212 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
36213 negative++;
36214 }
36215 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
36216 op0 = pc_rtx;
36217 }
36218 else if (TREE_CODE (arg3) == SSA_NAME
36219 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
36220 {
36221 /* Recognize also when mask is like:
36222 __v2df src = _mm_setzero_pd ();
36223 __v2df mask = _mm_cmpeq_pd (src, src);
36224 or
36225 __v8sf src = _mm256_setzero_ps ();
36226 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
36227 as that is a cheaper way to load all ones into
36228 a register than having to load a constant from
36229 memory. */
36230 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
36231 if (is_gimple_call (def_stmt))
36232 {
36233 tree fndecl = gimple_call_fndecl (def_stmt);
36234 if (fndecl
36235 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
36236 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
36237 {
36238 case IX86_BUILTIN_CMPPD:
36239 case IX86_BUILTIN_CMPPS:
36240 case IX86_BUILTIN_CMPPD256:
36241 case IX86_BUILTIN_CMPPS256:
36242 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
36243 break;
36244 /* FALLTHRU */
36245 case IX86_BUILTIN_CMPEQPD:
36246 case IX86_BUILTIN_CMPEQPS:
36247 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
36248 && initializer_zerop (gimple_call_arg (def_stmt,
36249 1)))
36250 op0 = pc_rtx;
36251 break;
36252 default:
36253 break;
36254 }
36255 }
36256 }
36257 }
36258
36259 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
36260 if (! pat)
36261 return const0_rtx;
36262 emit_insn (pat);
36263
36264 switch (fcode)
36265 {
36266 case IX86_BUILTIN_GATHER3DIV16SF:
36267 if (target == NULL_RTX)
36268 target = gen_reg_rtx (V8SFmode);
36269 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
36270 break;
36271 case IX86_BUILTIN_GATHER3DIV16SI:
36272 if (target == NULL_RTX)
36273 target = gen_reg_rtx (V8SImode);
36274 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
36275 break;
36276 case IX86_BUILTIN_GATHERDIV8SF:
36277 if (target == NULL_RTX)
36278 target = gen_reg_rtx (V4SFmode);
36279 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
36280 break;
36281 case IX86_BUILTIN_GATHERDIV8SI:
36282 if (target == NULL_RTX)
36283 target = gen_reg_rtx (V4SImode);
36284 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
36285 break;
36286 default:
36287 target = subtarget;
36288 break;
36289 }
36290 return target;
36291
36292 scatter_gen:
36293 arg0 = CALL_EXPR_ARG (exp, 0);
36294 arg1 = CALL_EXPR_ARG (exp, 1);
36295 arg2 = CALL_EXPR_ARG (exp, 2);
36296 arg3 = CALL_EXPR_ARG (exp, 3);
36297 arg4 = CALL_EXPR_ARG (exp, 4);
36298 op0 = expand_normal (arg0);
36299 op1 = expand_normal (arg1);
36300 op2 = expand_normal (arg2);
36301 op3 = expand_normal (arg3);
36302 op4 = expand_normal (arg4);
36303 mode1 = insn_data[icode].operand[1].mode;
36304 mode2 = insn_data[icode].operand[2].mode;
36305 mode3 = insn_data[icode].operand[3].mode;
36306 mode4 = insn_data[icode].operand[4].mode;
36307
36308 /* Force memory operand only with base register here. But we
36309 don't want to do it on memory operand for other builtin
36310 functions. */
36311 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
36312
36313 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
36314 op0 = copy_to_mode_reg (Pmode, op0);
36315
36316 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
36317 {
36318 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36319 op1 = copy_to_mode_reg (mode1, op1);
36320 }
36321 else
36322 {
36323 op1 = copy_to_reg (op1);
36324 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
36325 }
36326
36327 if (!insn_data[icode].operand[2].predicate (op2, mode2))
36328 op2 = copy_to_mode_reg (mode2, op2);
36329
36330 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36331 op3 = copy_to_mode_reg (mode3, op3);
36332
36333 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36334 {
36335 error ("the last argument must be scale 1, 2, 4, 8");
36336 return const0_rtx;
36337 }
36338
36339 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36340 if (! pat)
36341 return const0_rtx;
36342
36343 emit_insn (pat);
36344 return 0;
36345
36346 vec_prefetch_gen:
36347 arg0 = CALL_EXPR_ARG (exp, 0);
36348 arg1 = CALL_EXPR_ARG (exp, 1);
36349 arg2 = CALL_EXPR_ARG (exp, 2);
36350 arg3 = CALL_EXPR_ARG (exp, 3);
36351 arg4 = CALL_EXPR_ARG (exp, 4);
36352 op0 = expand_normal (arg0);
36353 op1 = expand_normal (arg1);
36354 op2 = expand_normal (arg2);
36355 op3 = expand_normal (arg3);
36356 op4 = expand_normal (arg4);
36357 mode0 = insn_data[icode].operand[0].mode;
36358 mode1 = insn_data[icode].operand[1].mode;
36359 mode3 = insn_data[icode].operand[3].mode;
36360 mode4 = insn_data[icode].operand[4].mode;
36361
36362 if (GET_MODE (op0) == mode0
36363 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
36364 {
36365 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36366 op0 = copy_to_mode_reg (mode0, op0);
36367 }
36368 else if (op0 != constm1_rtx)
36369 {
36370 op0 = copy_to_reg (op0);
36371 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
36372 }
36373
36374 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36375 op1 = copy_to_mode_reg (mode1, op1);
36376
36377 /* Force memory operand only with base register here. But we
36378 don't want to do it on memory operand for other builtin
36379 functions. */
36380 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
36381
36382 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
36383 op2 = copy_to_mode_reg (Pmode, op2);
36384
36385 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36386 {
36387 error ("the forth argument must be scale 1, 2, 4, 8");
36388 return const0_rtx;
36389 }
36390
36391 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36392 {
36393 error ("incorrect hint operand");
36394 return const0_rtx;
36395 }
36396
36397 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36398 if (! pat)
36399 return const0_rtx;
36400
36401 emit_insn (pat);
36402
36403 return 0;
36404
36405 case IX86_BUILTIN_XABORT:
36406 icode = CODE_FOR_xabort;
36407 arg0 = CALL_EXPR_ARG (exp, 0);
36408 op0 = expand_normal (arg0);
36409 mode0 = insn_data[icode].operand[0].mode;
36410 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36411 {
36412 error ("the xabort's argument must be an 8-bit immediate");
36413 return const0_rtx;
36414 }
36415 emit_insn (gen_xabort (op0));
36416 return 0;
36417
36418 default:
36419 break;
36420 }
36421
36422 for (i = 0, d = bdesc_special_args;
36423 i < ARRAY_SIZE (bdesc_special_args);
36424 i++, d++)
36425 if (d->code == fcode)
36426 return ix86_expand_special_args_builtin (d, exp, target);
36427
36428 for (i = 0, d = bdesc_args;
36429 i < ARRAY_SIZE (bdesc_args);
36430 i++, d++)
36431 if (d->code == fcode)
36432 switch (fcode)
36433 {
36434 case IX86_BUILTIN_FABSQ:
36435 case IX86_BUILTIN_COPYSIGNQ:
36436 if (!TARGET_SSE)
36437 /* Emit a normal call if SSE isn't available. */
36438 return expand_call (exp, target, ignore);
36439 default:
36440 return ix86_expand_args_builtin (d, exp, target);
36441 }
36442
36443 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
36444 if (d->code == fcode)
36445 return ix86_expand_sse_comi (d, exp, target);
36446
36447 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
36448 if (d->code == fcode)
36449 return ix86_expand_round_builtin (d, exp, target);
36450
36451 for (i = 0, d = bdesc_pcmpestr;
36452 i < ARRAY_SIZE (bdesc_pcmpestr);
36453 i++, d++)
36454 if (d->code == fcode)
36455 return ix86_expand_sse_pcmpestr (d, exp, target);
36456
36457 for (i = 0, d = bdesc_pcmpistr;
36458 i < ARRAY_SIZE (bdesc_pcmpistr);
36459 i++, d++)
36460 if (d->code == fcode)
36461 return ix86_expand_sse_pcmpistr (d, exp, target);
36462
36463 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
36464 if (d->code == fcode)
36465 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
36466 (enum ix86_builtin_func_type)
36467 d->flag, d->comparison);
36468
36469 gcc_unreachable ();
36470 }
36471
36472 /* This returns the target-specific builtin with code CODE if
36473 current_function_decl has visibility on this builtin, which is checked
36474 using isa flags. Returns NULL_TREE otherwise. */
36475
36476 static tree ix86_get_builtin (enum ix86_builtins code)
36477 {
36478 struct cl_target_option *opts;
36479 tree target_tree = NULL_TREE;
36480
36481 /* Determine the isa flags of current_function_decl. */
36482
36483 if (current_function_decl)
36484 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
36485
36486 if (target_tree == NULL)
36487 target_tree = target_option_default_node;
36488
36489 opts = TREE_TARGET_OPTION (target_tree);
36490
36491 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
36492 return ix86_builtin_decl (code, true);
36493 else
36494 return NULL_TREE;
36495 }
36496
36497 /* Returns a function decl for a vectorized version of the builtin function
36498 with builtin function code FN and the result vector type TYPE, or NULL_TREE
36499 if it is not available. */
36500
36501 static tree
36502 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
36503 tree type_in)
36504 {
36505 enum machine_mode in_mode, out_mode;
36506 int in_n, out_n;
36507 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
36508
36509 if (TREE_CODE (type_out) != VECTOR_TYPE
36510 || TREE_CODE (type_in) != VECTOR_TYPE
36511 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
36512 return NULL_TREE;
36513
36514 out_mode = TYPE_MODE (TREE_TYPE (type_out));
36515 out_n = TYPE_VECTOR_SUBPARTS (type_out);
36516 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36517 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36518
36519 switch (fn)
36520 {
36521 case BUILT_IN_SQRT:
36522 if (out_mode == DFmode && in_mode == DFmode)
36523 {
36524 if (out_n == 2 && in_n == 2)
36525 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
36526 else if (out_n == 4 && in_n == 4)
36527 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
36528 else if (out_n == 8 && in_n == 8)
36529 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
36530 }
36531 break;
36532
36533 case BUILT_IN_EXP2F:
36534 if (out_mode == SFmode && in_mode == SFmode)
36535 {
36536 if (out_n == 16 && in_n == 16)
36537 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
36538 }
36539 break;
36540
36541 case BUILT_IN_SQRTF:
36542 if (out_mode == SFmode && in_mode == SFmode)
36543 {
36544 if (out_n == 4 && in_n == 4)
36545 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
36546 else if (out_n == 8 && in_n == 8)
36547 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
36548 else if (out_n == 16 && in_n == 16)
36549 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
36550 }
36551 break;
36552
36553 case BUILT_IN_IFLOOR:
36554 case BUILT_IN_LFLOOR:
36555 case BUILT_IN_LLFLOOR:
36556 /* The round insn does not trap on denormals. */
36557 if (flag_trapping_math || !TARGET_ROUND)
36558 break;
36559
36560 if (out_mode == SImode && in_mode == DFmode)
36561 {
36562 if (out_n == 4 && in_n == 2)
36563 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
36564 else if (out_n == 8 && in_n == 4)
36565 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
36566 else if (out_n == 16 && in_n == 8)
36567 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
36568 }
36569 break;
36570
36571 case BUILT_IN_IFLOORF:
36572 case BUILT_IN_LFLOORF:
36573 case BUILT_IN_LLFLOORF:
36574 /* The round insn does not trap on denormals. */
36575 if (flag_trapping_math || !TARGET_ROUND)
36576 break;
36577
36578 if (out_mode == SImode && in_mode == SFmode)
36579 {
36580 if (out_n == 4 && in_n == 4)
36581 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
36582 else if (out_n == 8 && in_n == 8)
36583 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
36584 }
36585 break;
36586
36587 case BUILT_IN_ICEIL:
36588 case BUILT_IN_LCEIL:
36589 case BUILT_IN_LLCEIL:
36590 /* The round insn does not trap on denormals. */
36591 if (flag_trapping_math || !TARGET_ROUND)
36592 break;
36593
36594 if (out_mode == SImode && in_mode == DFmode)
36595 {
36596 if (out_n == 4 && in_n == 2)
36597 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
36598 else if (out_n == 8 && in_n == 4)
36599 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
36600 else if (out_n == 16 && in_n == 8)
36601 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
36602 }
36603 break;
36604
36605 case BUILT_IN_ICEILF:
36606 case BUILT_IN_LCEILF:
36607 case BUILT_IN_LLCEILF:
36608 /* The round insn does not trap on denormals. */
36609 if (flag_trapping_math || !TARGET_ROUND)
36610 break;
36611
36612 if (out_mode == SImode && in_mode == SFmode)
36613 {
36614 if (out_n == 4 && in_n == 4)
36615 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
36616 else if (out_n == 8 && in_n == 8)
36617 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
36618 }
36619 break;
36620
36621 case BUILT_IN_IRINT:
36622 case BUILT_IN_LRINT:
36623 case BUILT_IN_LLRINT:
36624 if (out_mode == SImode && in_mode == DFmode)
36625 {
36626 if (out_n == 4 && in_n == 2)
36627 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
36628 else if (out_n == 8 && in_n == 4)
36629 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
36630 }
36631 break;
36632
36633 case BUILT_IN_IRINTF:
36634 case BUILT_IN_LRINTF:
36635 case BUILT_IN_LLRINTF:
36636 if (out_mode == SImode && in_mode == SFmode)
36637 {
36638 if (out_n == 4 && in_n == 4)
36639 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
36640 else if (out_n == 8 && in_n == 8)
36641 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
36642 }
36643 break;
36644
36645 case BUILT_IN_IROUND:
36646 case BUILT_IN_LROUND:
36647 case BUILT_IN_LLROUND:
36648 /* The round insn does not trap on denormals. */
36649 if (flag_trapping_math || !TARGET_ROUND)
36650 break;
36651
36652 if (out_mode == SImode && in_mode == DFmode)
36653 {
36654 if (out_n == 4 && in_n == 2)
36655 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
36656 else if (out_n == 8 && in_n == 4)
36657 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
36658 else if (out_n == 16 && in_n == 8)
36659 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
36660 }
36661 break;
36662
36663 case BUILT_IN_IROUNDF:
36664 case BUILT_IN_LROUNDF:
36665 case BUILT_IN_LLROUNDF:
36666 /* The round insn does not trap on denormals. */
36667 if (flag_trapping_math || !TARGET_ROUND)
36668 break;
36669
36670 if (out_mode == SImode && in_mode == SFmode)
36671 {
36672 if (out_n == 4 && in_n == 4)
36673 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
36674 else if (out_n == 8 && in_n == 8)
36675 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
36676 }
36677 break;
36678
36679 case BUILT_IN_COPYSIGN:
36680 if (out_mode == DFmode && in_mode == DFmode)
36681 {
36682 if (out_n == 2 && in_n == 2)
36683 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
36684 else if (out_n == 4 && in_n == 4)
36685 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
36686 else if (out_n == 8 && in_n == 8)
36687 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
36688 }
36689 break;
36690
36691 case BUILT_IN_COPYSIGNF:
36692 if (out_mode == SFmode && in_mode == SFmode)
36693 {
36694 if (out_n == 4 && in_n == 4)
36695 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
36696 else if (out_n == 8 && in_n == 8)
36697 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
36698 else if (out_n == 16 && in_n == 16)
36699 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
36700 }
36701 break;
36702
36703 case BUILT_IN_FLOOR:
36704 /* The round insn does not trap on denormals. */
36705 if (flag_trapping_math || !TARGET_ROUND)
36706 break;
36707
36708 if (out_mode == DFmode && in_mode == DFmode)
36709 {
36710 if (out_n == 2 && in_n == 2)
36711 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
36712 else if (out_n == 4 && in_n == 4)
36713 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
36714 }
36715 break;
36716
36717 case BUILT_IN_FLOORF:
36718 /* The round insn does not trap on denormals. */
36719 if (flag_trapping_math || !TARGET_ROUND)
36720 break;
36721
36722 if (out_mode == SFmode && in_mode == SFmode)
36723 {
36724 if (out_n == 4 && in_n == 4)
36725 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
36726 else if (out_n == 8 && in_n == 8)
36727 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
36728 }
36729 break;
36730
36731 case BUILT_IN_CEIL:
36732 /* The round insn does not trap on denormals. */
36733 if (flag_trapping_math || !TARGET_ROUND)
36734 break;
36735
36736 if (out_mode == DFmode && in_mode == DFmode)
36737 {
36738 if (out_n == 2 && in_n == 2)
36739 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
36740 else if (out_n == 4 && in_n == 4)
36741 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
36742 }
36743 break;
36744
36745 case BUILT_IN_CEILF:
36746 /* The round insn does not trap on denormals. */
36747 if (flag_trapping_math || !TARGET_ROUND)
36748 break;
36749
36750 if (out_mode == SFmode && in_mode == SFmode)
36751 {
36752 if (out_n == 4 && in_n == 4)
36753 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
36754 else if (out_n == 8 && in_n == 8)
36755 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
36756 }
36757 break;
36758
36759 case BUILT_IN_TRUNC:
36760 /* The round insn does not trap on denormals. */
36761 if (flag_trapping_math || !TARGET_ROUND)
36762 break;
36763
36764 if (out_mode == DFmode && in_mode == DFmode)
36765 {
36766 if (out_n == 2 && in_n == 2)
36767 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
36768 else if (out_n == 4 && in_n == 4)
36769 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
36770 }
36771 break;
36772
36773 case BUILT_IN_TRUNCF:
36774 /* The round insn does not trap on denormals. */
36775 if (flag_trapping_math || !TARGET_ROUND)
36776 break;
36777
36778 if (out_mode == SFmode && in_mode == SFmode)
36779 {
36780 if (out_n == 4 && in_n == 4)
36781 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
36782 else if (out_n == 8 && in_n == 8)
36783 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
36784 }
36785 break;
36786
36787 case BUILT_IN_RINT:
36788 /* The round insn does not trap on denormals. */
36789 if (flag_trapping_math || !TARGET_ROUND)
36790 break;
36791
36792 if (out_mode == DFmode && in_mode == DFmode)
36793 {
36794 if (out_n == 2 && in_n == 2)
36795 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
36796 else if (out_n == 4 && in_n == 4)
36797 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
36798 }
36799 break;
36800
36801 case BUILT_IN_RINTF:
36802 /* The round insn does not trap on denormals. */
36803 if (flag_trapping_math || !TARGET_ROUND)
36804 break;
36805
36806 if (out_mode == SFmode && in_mode == SFmode)
36807 {
36808 if (out_n == 4 && in_n == 4)
36809 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
36810 else if (out_n == 8 && in_n == 8)
36811 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
36812 }
36813 break;
36814
36815 case BUILT_IN_ROUND:
36816 /* The round insn does not trap on denormals. */
36817 if (flag_trapping_math || !TARGET_ROUND)
36818 break;
36819
36820 if (out_mode == DFmode && in_mode == DFmode)
36821 {
36822 if (out_n == 2 && in_n == 2)
36823 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
36824 else if (out_n == 4 && in_n == 4)
36825 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
36826 }
36827 break;
36828
36829 case BUILT_IN_ROUNDF:
36830 /* The round insn does not trap on denormals. */
36831 if (flag_trapping_math || !TARGET_ROUND)
36832 break;
36833
36834 if (out_mode == SFmode && in_mode == SFmode)
36835 {
36836 if (out_n == 4 && in_n == 4)
36837 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
36838 else if (out_n == 8 && in_n == 8)
36839 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
36840 }
36841 break;
36842
36843 case BUILT_IN_FMA:
36844 if (out_mode == DFmode && in_mode == DFmode)
36845 {
36846 if (out_n == 2 && in_n == 2)
36847 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
36848 if (out_n == 4 && in_n == 4)
36849 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
36850 }
36851 break;
36852
36853 case BUILT_IN_FMAF:
36854 if (out_mode == SFmode && in_mode == SFmode)
36855 {
36856 if (out_n == 4 && in_n == 4)
36857 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
36858 if (out_n == 8 && in_n == 8)
36859 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
36860 }
36861 break;
36862
36863 default:
36864 break;
36865 }
36866
36867 /* Dispatch to a handler for a vectorization library. */
36868 if (ix86_veclib_handler)
36869 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
36870 type_in);
36871
36872 return NULL_TREE;
36873 }
36874
36875 /* Handler for an SVML-style interface to
36876 a library with vectorized intrinsics. */
36877
36878 static tree
36879 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
36880 {
36881 char name[20];
36882 tree fntype, new_fndecl, args;
36883 unsigned arity;
36884 const char *bname;
36885 enum machine_mode el_mode, in_mode;
36886 int n, in_n;
36887
36888 /* The SVML is suitable for unsafe math only. */
36889 if (!flag_unsafe_math_optimizations)
36890 return NULL_TREE;
36891
36892 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36893 n = TYPE_VECTOR_SUBPARTS (type_out);
36894 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36895 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36896 if (el_mode != in_mode
36897 || n != in_n)
36898 return NULL_TREE;
36899
36900 switch (fn)
36901 {
36902 case BUILT_IN_EXP:
36903 case BUILT_IN_LOG:
36904 case BUILT_IN_LOG10:
36905 case BUILT_IN_POW:
36906 case BUILT_IN_TANH:
36907 case BUILT_IN_TAN:
36908 case BUILT_IN_ATAN:
36909 case BUILT_IN_ATAN2:
36910 case BUILT_IN_ATANH:
36911 case BUILT_IN_CBRT:
36912 case BUILT_IN_SINH:
36913 case BUILT_IN_SIN:
36914 case BUILT_IN_ASINH:
36915 case BUILT_IN_ASIN:
36916 case BUILT_IN_COSH:
36917 case BUILT_IN_COS:
36918 case BUILT_IN_ACOSH:
36919 case BUILT_IN_ACOS:
36920 if (el_mode != DFmode || n != 2)
36921 return NULL_TREE;
36922 break;
36923
36924 case BUILT_IN_EXPF:
36925 case BUILT_IN_LOGF:
36926 case BUILT_IN_LOG10F:
36927 case BUILT_IN_POWF:
36928 case BUILT_IN_TANHF:
36929 case BUILT_IN_TANF:
36930 case BUILT_IN_ATANF:
36931 case BUILT_IN_ATAN2F:
36932 case BUILT_IN_ATANHF:
36933 case BUILT_IN_CBRTF:
36934 case BUILT_IN_SINHF:
36935 case BUILT_IN_SINF:
36936 case BUILT_IN_ASINHF:
36937 case BUILT_IN_ASINF:
36938 case BUILT_IN_COSHF:
36939 case BUILT_IN_COSF:
36940 case BUILT_IN_ACOSHF:
36941 case BUILT_IN_ACOSF:
36942 if (el_mode != SFmode || n != 4)
36943 return NULL_TREE;
36944 break;
36945
36946 default:
36947 return NULL_TREE;
36948 }
36949
36950 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36951
36952 if (fn == BUILT_IN_LOGF)
36953 strcpy (name, "vmlsLn4");
36954 else if (fn == BUILT_IN_LOG)
36955 strcpy (name, "vmldLn2");
36956 else if (n == 4)
36957 {
36958 sprintf (name, "vmls%s", bname+10);
36959 name[strlen (name)-1] = '4';
36960 }
36961 else
36962 sprintf (name, "vmld%s2", bname+10);
36963
36964 /* Convert to uppercase. */
36965 name[4] &= ~0x20;
36966
36967 arity = 0;
36968 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36969 args;
36970 args = TREE_CHAIN (args))
36971 arity++;
36972
36973 if (arity == 1)
36974 fntype = build_function_type_list (type_out, type_in, NULL);
36975 else
36976 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36977
36978 /* Build a function declaration for the vectorized function. */
36979 new_fndecl = build_decl (BUILTINS_LOCATION,
36980 FUNCTION_DECL, get_identifier (name), fntype);
36981 TREE_PUBLIC (new_fndecl) = 1;
36982 DECL_EXTERNAL (new_fndecl) = 1;
36983 DECL_IS_NOVOPS (new_fndecl) = 1;
36984 TREE_READONLY (new_fndecl) = 1;
36985
36986 return new_fndecl;
36987 }
36988
36989 /* Handler for an ACML-style interface to
36990 a library with vectorized intrinsics. */
36991
36992 static tree
36993 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
36994 {
36995 char name[20] = "__vr.._";
36996 tree fntype, new_fndecl, args;
36997 unsigned arity;
36998 const char *bname;
36999 enum machine_mode el_mode, in_mode;
37000 int n, in_n;
37001
37002 /* The ACML is 64bits only and suitable for unsafe math only as
37003 it does not correctly support parts of IEEE with the required
37004 precision such as denormals. */
37005 if (!TARGET_64BIT
37006 || !flag_unsafe_math_optimizations)
37007 return NULL_TREE;
37008
37009 el_mode = TYPE_MODE (TREE_TYPE (type_out));
37010 n = TYPE_VECTOR_SUBPARTS (type_out);
37011 in_mode = TYPE_MODE (TREE_TYPE (type_in));
37012 in_n = TYPE_VECTOR_SUBPARTS (type_in);
37013 if (el_mode != in_mode
37014 || n != in_n)
37015 return NULL_TREE;
37016
37017 switch (fn)
37018 {
37019 case BUILT_IN_SIN:
37020 case BUILT_IN_COS:
37021 case BUILT_IN_EXP:
37022 case BUILT_IN_LOG:
37023 case BUILT_IN_LOG2:
37024 case BUILT_IN_LOG10:
37025 name[4] = 'd';
37026 name[5] = '2';
37027 if (el_mode != DFmode
37028 || n != 2)
37029 return NULL_TREE;
37030 break;
37031
37032 case BUILT_IN_SINF:
37033 case BUILT_IN_COSF:
37034 case BUILT_IN_EXPF:
37035 case BUILT_IN_POWF:
37036 case BUILT_IN_LOGF:
37037 case BUILT_IN_LOG2F:
37038 case BUILT_IN_LOG10F:
37039 name[4] = 's';
37040 name[5] = '4';
37041 if (el_mode != SFmode
37042 || n != 4)
37043 return NULL_TREE;
37044 break;
37045
37046 default:
37047 return NULL_TREE;
37048 }
37049
37050 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
37051 sprintf (name + 7, "%s", bname+10);
37052
37053 arity = 0;
37054 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
37055 args;
37056 args = TREE_CHAIN (args))
37057 arity++;
37058
37059 if (arity == 1)
37060 fntype = build_function_type_list (type_out, type_in, NULL);
37061 else
37062 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
37063
37064 /* Build a function declaration for the vectorized function. */
37065 new_fndecl = build_decl (BUILTINS_LOCATION,
37066 FUNCTION_DECL, get_identifier (name), fntype);
37067 TREE_PUBLIC (new_fndecl) = 1;
37068 DECL_EXTERNAL (new_fndecl) = 1;
37069 DECL_IS_NOVOPS (new_fndecl) = 1;
37070 TREE_READONLY (new_fndecl) = 1;
37071
37072 return new_fndecl;
37073 }
37074
37075 /* Returns a decl of a function that implements gather load with
37076 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
37077 Return NULL_TREE if it is not available. */
37078
37079 static tree
37080 ix86_vectorize_builtin_gather (const_tree mem_vectype,
37081 const_tree index_type, int scale)
37082 {
37083 bool si;
37084 enum ix86_builtins code;
37085
37086 if (! TARGET_AVX2)
37087 return NULL_TREE;
37088
37089 if ((TREE_CODE (index_type) != INTEGER_TYPE
37090 && !POINTER_TYPE_P (index_type))
37091 || (TYPE_MODE (index_type) != SImode
37092 && TYPE_MODE (index_type) != DImode))
37093 return NULL_TREE;
37094
37095 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
37096 return NULL_TREE;
37097
37098 /* v*gather* insn sign extends index to pointer mode. */
37099 if (TYPE_PRECISION (index_type) < POINTER_SIZE
37100 && TYPE_UNSIGNED (index_type))
37101 return NULL_TREE;
37102
37103 if (scale <= 0
37104 || scale > 8
37105 || (scale & (scale - 1)) != 0)
37106 return NULL_TREE;
37107
37108 si = TYPE_MODE (index_type) == SImode;
37109 switch (TYPE_MODE (mem_vectype))
37110 {
37111 case V2DFmode:
37112 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
37113 break;
37114 case V4DFmode:
37115 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
37116 break;
37117 case V2DImode:
37118 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
37119 break;
37120 case V4DImode:
37121 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
37122 break;
37123 case V4SFmode:
37124 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
37125 break;
37126 case V8SFmode:
37127 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
37128 break;
37129 case V4SImode:
37130 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
37131 break;
37132 case V8SImode:
37133 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
37134 break;
37135 case V8DFmode:
37136 if (TARGET_AVX512F)
37137 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
37138 else
37139 return NULL_TREE;
37140 break;
37141 case V8DImode:
37142 if (TARGET_AVX512F)
37143 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
37144 else
37145 return NULL_TREE;
37146 break;
37147 case V16SFmode:
37148 if (TARGET_AVX512F)
37149 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
37150 else
37151 return NULL_TREE;
37152 break;
37153 case V16SImode:
37154 if (TARGET_AVX512F)
37155 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
37156 else
37157 return NULL_TREE;
37158 break;
37159 default:
37160 return NULL_TREE;
37161 }
37162
37163 return ix86_get_builtin (code);
37164 }
37165
37166 /* Returns a code for a target-specific builtin that implements
37167 reciprocal of the function, or NULL_TREE if not available. */
37168
37169 static tree
37170 ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool)
37171 {
37172 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
37173 && flag_finite_math_only && !flag_trapping_math
37174 && flag_unsafe_math_optimizations))
37175 return NULL_TREE;
37176
37177 if (md_fn)
37178 /* Machine dependent builtins. */
37179 switch (fn)
37180 {
37181 /* Vectorized version of sqrt to rsqrt conversion. */
37182 case IX86_BUILTIN_SQRTPS_NR:
37183 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
37184
37185 case IX86_BUILTIN_SQRTPS_NR256:
37186 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
37187
37188 default:
37189 return NULL_TREE;
37190 }
37191 else
37192 /* Normal builtins. */
37193 switch (fn)
37194 {
37195 /* Sqrt to rsqrt conversion. */
37196 case BUILT_IN_SQRTF:
37197 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
37198
37199 default:
37200 return NULL_TREE;
37201 }
37202 }
37203 \f
37204 /* Helper for avx_vpermilps256_operand et al. This is also used by
37205 the expansion functions to turn the parallel back into a mask.
37206 The return value is 0 for no match and the imm8+1 for a match. */
37207
37208 int
37209 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
37210 {
37211 unsigned i, nelt = GET_MODE_NUNITS (mode);
37212 unsigned mask = 0;
37213 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
37214
37215 if (XVECLEN (par, 0) != (int) nelt)
37216 return 0;
37217
37218 /* Validate that all of the elements are constants, and not totally
37219 out of range. Copy the data into an integral array to make the
37220 subsequent checks easier. */
37221 for (i = 0; i < nelt; ++i)
37222 {
37223 rtx er = XVECEXP (par, 0, i);
37224 unsigned HOST_WIDE_INT ei;
37225
37226 if (!CONST_INT_P (er))
37227 return 0;
37228 ei = INTVAL (er);
37229 if (ei >= nelt)
37230 return 0;
37231 ipar[i] = ei;
37232 }
37233
37234 switch (mode)
37235 {
37236 case V8DFmode:
37237 /* In the 512-bit DFmode case, we can only move elements within
37238 a 128-bit lane. First fill the second part of the mask,
37239 then fallthru. */
37240 for (i = 4; i < 6; ++i)
37241 {
37242 if (ipar[i] < 4 || ipar[i] >= 6)
37243 return 0;
37244 mask |= (ipar[i] - 4) << i;
37245 }
37246 for (i = 6; i < 8; ++i)
37247 {
37248 if (ipar[i] < 6)
37249 return 0;
37250 mask |= (ipar[i] - 6) << i;
37251 }
37252 /* FALLTHRU */
37253
37254 case V4DFmode:
37255 /* In the 256-bit DFmode case, we can only move elements within
37256 a 128-bit lane. */
37257 for (i = 0; i < 2; ++i)
37258 {
37259 if (ipar[i] >= 2)
37260 return 0;
37261 mask |= ipar[i] << i;
37262 }
37263 for (i = 2; i < 4; ++i)
37264 {
37265 if (ipar[i] < 2)
37266 return 0;
37267 mask |= (ipar[i] - 2) << i;
37268 }
37269 break;
37270
37271 case V16SFmode:
37272 /* In 512 bit SFmode case, permutation in the upper 256 bits
37273 must mirror the permutation in the lower 256-bits. */
37274 for (i = 0; i < 8; ++i)
37275 if (ipar[i] + 8 != ipar[i + 8])
37276 return 0;
37277 /* FALLTHRU */
37278
37279 case V8SFmode:
37280 /* In 256 bit SFmode case, we have full freedom of
37281 movement within the low 128-bit lane, but the high 128-bit
37282 lane must mirror the exact same pattern. */
37283 for (i = 0; i < 4; ++i)
37284 if (ipar[i] + 4 != ipar[i + 4])
37285 return 0;
37286 nelt = 4;
37287 /* FALLTHRU */
37288
37289 case V2DFmode:
37290 case V4SFmode:
37291 /* In the 128-bit case, we've full freedom in the placement of
37292 the elements from the source operand. */
37293 for (i = 0; i < nelt; ++i)
37294 mask |= ipar[i] << (i * (nelt / 2));
37295 break;
37296
37297 default:
37298 gcc_unreachable ();
37299 }
37300
37301 /* Make sure success has a non-zero value by adding one. */
37302 return mask + 1;
37303 }
37304
37305 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
37306 the expansion functions to turn the parallel back into a mask.
37307 The return value is 0 for no match and the imm8+1 for a match. */
37308
37309 int
37310 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
37311 {
37312 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
37313 unsigned mask = 0;
37314 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
37315
37316 if (XVECLEN (par, 0) != (int) nelt)
37317 return 0;
37318
37319 /* Validate that all of the elements are constants, and not totally
37320 out of range. Copy the data into an integral array to make the
37321 subsequent checks easier. */
37322 for (i = 0; i < nelt; ++i)
37323 {
37324 rtx er = XVECEXP (par, 0, i);
37325 unsigned HOST_WIDE_INT ei;
37326
37327 if (!CONST_INT_P (er))
37328 return 0;
37329 ei = INTVAL (er);
37330 if (ei >= 2 * nelt)
37331 return 0;
37332 ipar[i] = ei;
37333 }
37334
37335 /* Validate that the halves of the permute are halves. */
37336 for (i = 0; i < nelt2 - 1; ++i)
37337 if (ipar[i] + 1 != ipar[i + 1])
37338 return 0;
37339 for (i = nelt2; i < nelt - 1; ++i)
37340 if (ipar[i] + 1 != ipar[i + 1])
37341 return 0;
37342
37343 /* Reconstruct the mask. */
37344 for (i = 0; i < 2; ++i)
37345 {
37346 unsigned e = ipar[i * nelt2];
37347 if (e % nelt2)
37348 return 0;
37349 e /= nelt2;
37350 mask |= e << (i * 4);
37351 }
37352
37353 /* Make sure success has a non-zero value by adding one. */
37354 return mask + 1;
37355 }
37356 \f
37357 /* Return a register priority for hard reg REGNO. */
37358 static int
37359 ix86_register_priority (int hard_regno)
37360 {
37361 /* ebp and r13 as the base always wants a displacement, r12 as the
37362 base always wants an index. So discourage their usage in an
37363 address. */
37364 if (hard_regno == R12_REG || hard_regno == R13_REG)
37365 return 0;
37366 if (hard_regno == BP_REG)
37367 return 1;
37368 /* New x86-64 int registers result in bigger code size. Discourage
37369 them. */
37370 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
37371 return 2;
37372 /* New x86-64 SSE registers result in bigger code size. Discourage
37373 them. */
37374 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
37375 return 2;
37376 /* Usage of AX register results in smaller code. Prefer it. */
37377 if (hard_regno == 0)
37378 return 4;
37379 return 3;
37380 }
37381
37382 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
37383
37384 Put float CONST_DOUBLE in the constant pool instead of fp regs.
37385 QImode must go into class Q_REGS.
37386 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
37387 movdf to do mem-to-mem moves through integer regs. */
37388
37389 static reg_class_t
37390 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
37391 {
37392 enum machine_mode mode = GET_MODE (x);
37393
37394 /* We're only allowed to return a subclass of CLASS. Many of the
37395 following checks fail for NO_REGS, so eliminate that early. */
37396 if (regclass == NO_REGS)
37397 return NO_REGS;
37398
37399 /* All classes can load zeros. */
37400 if (x == CONST0_RTX (mode))
37401 return regclass;
37402
37403 /* Force constants into memory if we are loading a (nonzero) constant into
37404 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
37405 instructions to load from a constant. */
37406 if (CONSTANT_P (x)
37407 && (MAYBE_MMX_CLASS_P (regclass)
37408 || MAYBE_SSE_CLASS_P (regclass)
37409 || MAYBE_MASK_CLASS_P (regclass)))
37410 return NO_REGS;
37411
37412 /* Prefer SSE regs only, if we can use them for math. */
37413 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
37414 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
37415
37416 /* Floating-point constants need more complex checks. */
37417 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
37418 {
37419 /* General regs can load everything. */
37420 if (reg_class_subset_p (regclass, GENERAL_REGS))
37421 return regclass;
37422
37423 /* Floats can load 0 and 1 plus some others. Note that we eliminated
37424 zero above. We only want to wind up preferring 80387 registers if
37425 we plan on doing computation with them. */
37426 if (TARGET_80387
37427 && standard_80387_constant_p (x) > 0)
37428 {
37429 /* Limit class to non-sse. */
37430 if (regclass == FLOAT_SSE_REGS)
37431 return FLOAT_REGS;
37432 if (regclass == FP_TOP_SSE_REGS)
37433 return FP_TOP_REG;
37434 if (regclass == FP_SECOND_SSE_REGS)
37435 return FP_SECOND_REG;
37436 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
37437 return regclass;
37438 }
37439
37440 return NO_REGS;
37441 }
37442
37443 /* Generally when we see PLUS here, it's the function invariant
37444 (plus soft-fp const_int). Which can only be computed into general
37445 regs. */
37446 if (GET_CODE (x) == PLUS)
37447 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
37448
37449 /* QImode constants are easy to load, but non-constant QImode data
37450 must go into Q_REGS. */
37451 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
37452 {
37453 if (reg_class_subset_p (regclass, Q_REGS))
37454 return regclass;
37455 if (reg_class_subset_p (Q_REGS, regclass))
37456 return Q_REGS;
37457 return NO_REGS;
37458 }
37459
37460 return regclass;
37461 }
37462
37463 /* Discourage putting floating-point values in SSE registers unless
37464 SSE math is being used, and likewise for the 387 registers. */
37465 static reg_class_t
37466 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
37467 {
37468 enum machine_mode mode = GET_MODE (x);
37469
37470 /* Restrict the output reload class to the register bank that we are doing
37471 math on. If we would like not to return a subset of CLASS, reject this
37472 alternative: if reload cannot do this, it will still use its choice. */
37473 mode = GET_MODE (x);
37474 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
37475 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
37476
37477 if (X87_FLOAT_MODE_P (mode))
37478 {
37479 if (regclass == FP_TOP_SSE_REGS)
37480 return FP_TOP_REG;
37481 else if (regclass == FP_SECOND_SSE_REGS)
37482 return FP_SECOND_REG;
37483 else
37484 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
37485 }
37486
37487 return regclass;
37488 }
37489
37490 static reg_class_t
37491 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
37492 enum machine_mode mode, secondary_reload_info *sri)
37493 {
37494 /* Double-word spills from general registers to non-offsettable memory
37495 references (zero-extended addresses) require special handling. */
37496 if (TARGET_64BIT
37497 && MEM_P (x)
37498 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
37499 && INTEGER_CLASS_P (rclass)
37500 && !offsettable_memref_p (x))
37501 {
37502 sri->icode = (in_p
37503 ? CODE_FOR_reload_noff_load
37504 : CODE_FOR_reload_noff_store);
37505 /* Add the cost of moving address to a temporary. */
37506 sri->extra_cost = 1;
37507
37508 return NO_REGS;
37509 }
37510
37511 /* QImode spills from non-QI registers require
37512 intermediate register on 32bit targets. */
37513 if (mode == QImode
37514 && (MAYBE_MASK_CLASS_P (rclass)
37515 || (!TARGET_64BIT && !in_p
37516 && INTEGER_CLASS_P (rclass)
37517 && MAYBE_NON_Q_CLASS_P (rclass))))
37518 {
37519 int regno;
37520
37521 if (REG_P (x))
37522 regno = REGNO (x);
37523 else
37524 regno = -1;
37525
37526 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
37527 regno = true_regnum (x);
37528
37529 /* Return Q_REGS if the operand is in memory. */
37530 if (regno == -1)
37531 return Q_REGS;
37532 }
37533
37534 /* This condition handles corner case where an expression involving
37535 pointers gets vectorized. We're trying to use the address of a
37536 stack slot as a vector initializer.
37537
37538 (set (reg:V2DI 74 [ vect_cst_.2 ])
37539 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
37540
37541 Eventually frame gets turned into sp+offset like this:
37542
37543 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37544 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37545 (const_int 392 [0x188]))))
37546
37547 That later gets turned into:
37548
37549 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37550 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37551 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
37552
37553 We'll have the following reload recorded:
37554
37555 Reload 0: reload_in (DI) =
37556 (plus:DI (reg/f:DI 7 sp)
37557 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
37558 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37559 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
37560 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
37561 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37562 reload_reg_rtx: (reg:V2DI 22 xmm1)
37563
37564 Which isn't going to work since SSE instructions can't handle scalar
37565 additions. Returning GENERAL_REGS forces the addition into integer
37566 register and reload can handle subsequent reloads without problems. */
37567
37568 if (in_p && GET_CODE (x) == PLUS
37569 && SSE_CLASS_P (rclass)
37570 && SCALAR_INT_MODE_P (mode))
37571 return GENERAL_REGS;
37572
37573 return NO_REGS;
37574 }
37575
37576 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
37577
37578 static bool
37579 ix86_class_likely_spilled_p (reg_class_t rclass)
37580 {
37581 switch (rclass)
37582 {
37583 case AREG:
37584 case DREG:
37585 case CREG:
37586 case BREG:
37587 case AD_REGS:
37588 case SIREG:
37589 case DIREG:
37590 case SSE_FIRST_REG:
37591 case FP_TOP_REG:
37592 case FP_SECOND_REG:
37593 return true;
37594
37595 default:
37596 break;
37597 }
37598
37599 return false;
37600 }
37601
37602 /* If we are copying between general and FP registers, we need a memory
37603 location. The same is true for SSE and MMX registers.
37604
37605 To optimize register_move_cost performance, allow inline variant.
37606
37607 The macro can't work reliably when one of the CLASSES is class containing
37608 registers from multiple units (SSE, MMX, integer). We avoid this by never
37609 combining those units in single alternative in the machine description.
37610 Ensure that this constraint holds to avoid unexpected surprises.
37611
37612 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
37613 enforce these sanity checks. */
37614
37615 static inline bool
37616 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37617 enum machine_mode mode, int strict)
37618 {
37619 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
37620 return false;
37621 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
37622 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
37623 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
37624 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
37625 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
37626 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
37627 {
37628 gcc_assert (!strict || lra_in_progress);
37629 return true;
37630 }
37631
37632 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
37633 return true;
37634
37635 /* Between mask and general, we have moves no larger than word size. */
37636 if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
37637 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
37638 return true;
37639
37640 /* ??? This is a lie. We do have moves between mmx/general, and for
37641 mmx/sse2. But by saying we need secondary memory we discourage the
37642 register allocator from using the mmx registers unless needed. */
37643 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
37644 return true;
37645
37646 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37647 {
37648 /* SSE1 doesn't have any direct moves from other classes. */
37649 if (!TARGET_SSE2)
37650 return true;
37651
37652 /* If the target says that inter-unit moves are more expensive
37653 than moving through memory, then don't generate them. */
37654 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
37655 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
37656 return true;
37657
37658 /* Between SSE and general, we have moves no larger than word size. */
37659 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
37660 return true;
37661 }
37662
37663 return false;
37664 }
37665
37666 bool
37667 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37668 enum machine_mode mode, int strict)
37669 {
37670 return inline_secondary_memory_needed (class1, class2, mode, strict);
37671 }
37672
37673 /* Implement the TARGET_CLASS_MAX_NREGS hook.
37674
37675 On the 80386, this is the size of MODE in words,
37676 except in the FP regs, where a single reg is always enough. */
37677
37678 static unsigned char
37679 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
37680 {
37681 if (MAYBE_INTEGER_CLASS_P (rclass))
37682 {
37683 if (mode == XFmode)
37684 return (TARGET_64BIT ? 2 : 3);
37685 else if (mode == XCmode)
37686 return (TARGET_64BIT ? 4 : 6);
37687 else
37688 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
37689 }
37690 else
37691 {
37692 if (COMPLEX_MODE_P (mode))
37693 return 2;
37694 else
37695 return 1;
37696 }
37697 }
37698
37699 /* Return true if the registers in CLASS cannot represent the change from
37700 modes FROM to TO. */
37701
37702 bool
37703 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
37704 enum reg_class regclass)
37705 {
37706 if (from == to)
37707 return false;
37708
37709 /* x87 registers can't do subreg at all, as all values are reformatted
37710 to extended precision. */
37711 if (MAYBE_FLOAT_CLASS_P (regclass))
37712 return true;
37713
37714 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
37715 {
37716 /* Vector registers do not support QI or HImode loads. If we don't
37717 disallow a change to these modes, reload will assume it's ok to
37718 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
37719 the vec_dupv4hi pattern. */
37720 if (GET_MODE_SIZE (from) < 4)
37721 return true;
37722 }
37723
37724 return false;
37725 }
37726
37727 /* Return the cost of moving data of mode M between a
37728 register and memory. A value of 2 is the default; this cost is
37729 relative to those in `REGISTER_MOVE_COST'.
37730
37731 This function is used extensively by register_move_cost that is used to
37732 build tables at startup. Make it inline in this case.
37733 When IN is 2, return maximum of in and out move cost.
37734
37735 If moving between registers and memory is more expensive than
37736 between two registers, you should define this macro to express the
37737 relative cost.
37738
37739 Model also increased moving costs of QImode registers in non
37740 Q_REGS classes.
37741 */
37742 static inline int
37743 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
37744 int in)
37745 {
37746 int cost;
37747 if (FLOAT_CLASS_P (regclass))
37748 {
37749 int index;
37750 switch (mode)
37751 {
37752 case SFmode:
37753 index = 0;
37754 break;
37755 case DFmode:
37756 index = 1;
37757 break;
37758 case XFmode:
37759 index = 2;
37760 break;
37761 default:
37762 return 100;
37763 }
37764 if (in == 2)
37765 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
37766 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
37767 }
37768 if (SSE_CLASS_P (regclass))
37769 {
37770 int index;
37771 switch (GET_MODE_SIZE (mode))
37772 {
37773 case 4:
37774 index = 0;
37775 break;
37776 case 8:
37777 index = 1;
37778 break;
37779 case 16:
37780 index = 2;
37781 break;
37782 default:
37783 return 100;
37784 }
37785 if (in == 2)
37786 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
37787 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
37788 }
37789 if (MMX_CLASS_P (regclass))
37790 {
37791 int index;
37792 switch (GET_MODE_SIZE (mode))
37793 {
37794 case 4:
37795 index = 0;
37796 break;
37797 case 8:
37798 index = 1;
37799 break;
37800 default:
37801 return 100;
37802 }
37803 if (in)
37804 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
37805 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
37806 }
37807 switch (GET_MODE_SIZE (mode))
37808 {
37809 case 1:
37810 if (Q_CLASS_P (regclass) || TARGET_64BIT)
37811 {
37812 if (!in)
37813 return ix86_cost->int_store[0];
37814 if (TARGET_PARTIAL_REG_DEPENDENCY
37815 && optimize_function_for_speed_p (cfun))
37816 cost = ix86_cost->movzbl_load;
37817 else
37818 cost = ix86_cost->int_load[0];
37819 if (in == 2)
37820 return MAX (cost, ix86_cost->int_store[0]);
37821 return cost;
37822 }
37823 else
37824 {
37825 if (in == 2)
37826 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
37827 if (in)
37828 return ix86_cost->movzbl_load;
37829 else
37830 return ix86_cost->int_store[0] + 4;
37831 }
37832 break;
37833 case 2:
37834 if (in == 2)
37835 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
37836 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
37837 default:
37838 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
37839 if (mode == TFmode)
37840 mode = XFmode;
37841 if (in == 2)
37842 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
37843 else if (in)
37844 cost = ix86_cost->int_load[2];
37845 else
37846 cost = ix86_cost->int_store[2];
37847 return (cost * (((int) GET_MODE_SIZE (mode)
37848 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
37849 }
37850 }
37851
37852 static int
37853 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
37854 bool in)
37855 {
37856 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
37857 }
37858
37859
37860 /* Return the cost of moving data from a register in class CLASS1 to
37861 one in class CLASS2.
37862
37863 It is not required that the cost always equal 2 when FROM is the same as TO;
37864 on some machines it is expensive to move between registers if they are not
37865 general registers. */
37866
37867 static int
37868 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
37869 reg_class_t class2_i)
37870 {
37871 enum reg_class class1 = (enum reg_class) class1_i;
37872 enum reg_class class2 = (enum reg_class) class2_i;
37873
37874 /* In case we require secondary memory, compute cost of the store followed
37875 by load. In order to avoid bad register allocation choices, we need
37876 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
37877
37878 if (inline_secondary_memory_needed (class1, class2, mode, 0))
37879 {
37880 int cost = 1;
37881
37882 cost += inline_memory_move_cost (mode, class1, 2);
37883 cost += inline_memory_move_cost (mode, class2, 2);
37884
37885 /* In case of copying from general_purpose_register we may emit multiple
37886 stores followed by single load causing memory size mismatch stall.
37887 Count this as arbitrarily high cost of 20. */
37888 if (targetm.class_max_nregs (class1, mode)
37889 > targetm.class_max_nregs (class2, mode))
37890 cost += 20;
37891
37892 /* In the case of FP/MMX moves, the registers actually overlap, and we
37893 have to switch modes in order to treat them differently. */
37894 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
37895 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
37896 cost += 20;
37897
37898 return cost;
37899 }
37900
37901 /* Moves between SSE/MMX and integer unit are expensive. */
37902 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
37903 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37904
37905 /* ??? By keeping returned value relatively high, we limit the number
37906 of moves between integer and MMX/SSE registers for all targets.
37907 Additionally, high value prevents problem with x86_modes_tieable_p(),
37908 where integer modes in MMX/SSE registers are not tieable
37909 because of missing QImode and HImode moves to, from or between
37910 MMX/SSE registers. */
37911 return MAX (8, ix86_cost->mmxsse_to_integer);
37912
37913 if (MAYBE_FLOAT_CLASS_P (class1))
37914 return ix86_cost->fp_move;
37915 if (MAYBE_SSE_CLASS_P (class1))
37916 return ix86_cost->sse_move;
37917 if (MAYBE_MMX_CLASS_P (class1))
37918 return ix86_cost->mmx_move;
37919 return 2;
37920 }
37921
37922 /* Return TRUE if hard register REGNO can hold a value of machine-mode
37923 MODE. */
37924
37925 bool
37926 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
37927 {
37928 /* Flags and only flags can only hold CCmode values. */
37929 if (CC_REGNO_P (regno))
37930 return GET_MODE_CLASS (mode) == MODE_CC;
37931 if (GET_MODE_CLASS (mode) == MODE_CC
37932 || GET_MODE_CLASS (mode) == MODE_RANDOM
37933 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
37934 return false;
37935 if (STACK_REGNO_P (regno))
37936 return VALID_FP_MODE_P (mode);
37937 if (MASK_REGNO_P (regno))
37938 return (VALID_MASK_REG_MODE (mode)
37939 || (TARGET_AVX512BW && VALID_MASK_AVX512BW_MODE (mode)));
37940 if (SSE_REGNO_P (regno))
37941 {
37942 /* We implement the move patterns for all vector modes into and
37943 out of SSE registers, even when no operation instructions
37944 are available. */
37945
37946 /* For AVX-512 we allow, regardless of regno:
37947 - XI mode
37948 - any of 512-bit wide vector mode
37949 - any scalar mode. */
37950 if (TARGET_AVX512F
37951 && (mode == XImode
37952 || VALID_AVX512F_REG_MODE (mode)
37953 || VALID_AVX512F_SCALAR_MODE (mode)))
37954 return true;
37955
37956 /* TODO check for QI/HI scalars. */
37957 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
37958 if (TARGET_AVX512VL
37959 && (mode == OImode
37960 || mode == TImode
37961 || VALID_AVX256_REG_MODE (mode)
37962 || VALID_AVX512VL_128_REG_MODE (mode)))
37963 return true;
37964
37965 /* xmm16-xmm31 are only available for AVX-512. */
37966 if (EXT_REX_SSE_REGNO_P (regno))
37967 return false;
37968
37969 /* OImode and AVX modes are available only when AVX is enabled. */
37970 return ((TARGET_AVX
37971 && VALID_AVX256_REG_OR_OI_MODE (mode))
37972 || VALID_SSE_REG_MODE (mode)
37973 || VALID_SSE2_REG_MODE (mode)
37974 || VALID_MMX_REG_MODE (mode)
37975 || VALID_MMX_REG_MODE_3DNOW (mode));
37976 }
37977 if (MMX_REGNO_P (regno))
37978 {
37979 /* We implement the move patterns for 3DNOW modes even in MMX mode,
37980 so if the register is available at all, then we can move data of
37981 the given mode into or out of it. */
37982 return (VALID_MMX_REG_MODE (mode)
37983 || VALID_MMX_REG_MODE_3DNOW (mode));
37984 }
37985
37986 if (mode == QImode)
37987 {
37988 /* Take care for QImode values - they can be in non-QI regs,
37989 but then they do cause partial register stalls. */
37990 if (ANY_QI_REGNO_P (regno))
37991 return true;
37992 if (!TARGET_PARTIAL_REG_STALL)
37993 return true;
37994 /* LRA checks if the hard register is OK for the given mode.
37995 QImode values can live in non-QI regs, so we allow all
37996 registers here. */
37997 if (lra_in_progress)
37998 return true;
37999 return !can_create_pseudo_p ();
38000 }
38001 /* We handle both integer and floats in the general purpose registers. */
38002 else if (VALID_INT_MODE_P (mode))
38003 return true;
38004 else if (VALID_FP_MODE_P (mode))
38005 return true;
38006 else if (VALID_DFP_MODE_P (mode))
38007 return true;
38008 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
38009 on to use that value in smaller contexts, this can easily force a
38010 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
38011 supporting DImode, allow it. */
38012 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
38013 return true;
38014
38015 return false;
38016 }
38017
38018 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
38019 tieable integer mode. */
38020
38021 static bool
38022 ix86_tieable_integer_mode_p (enum machine_mode mode)
38023 {
38024 switch (mode)
38025 {
38026 case HImode:
38027 case SImode:
38028 return true;
38029
38030 case QImode:
38031 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
38032
38033 case DImode:
38034 return TARGET_64BIT;
38035
38036 default:
38037 return false;
38038 }
38039 }
38040
38041 /* Return true if MODE1 is accessible in a register that can hold MODE2
38042 without copying. That is, all register classes that can hold MODE2
38043 can also hold MODE1. */
38044
38045 bool
38046 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
38047 {
38048 if (mode1 == mode2)
38049 return true;
38050
38051 if (ix86_tieable_integer_mode_p (mode1)
38052 && ix86_tieable_integer_mode_p (mode2))
38053 return true;
38054
38055 /* MODE2 being XFmode implies fp stack or general regs, which means we
38056 can tie any smaller floating point modes to it. Note that we do not
38057 tie this with TFmode. */
38058 if (mode2 == XFmode)
38059 return mode1 == SFmode || mode1 == DFmode;
38060
38061 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
38062 that we can tie it with SFmode. */
38063 if (mode2 == DFmode)
38064 return mode1 == SFmode;
38065
38066 /* If MODE2 is only appropriate for an SSE register, then tie with
38067 any other mode acceptable to SSE registers. */
38068 if (GET_MODE_SIZE (mode2) == 32
38069 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
38070 return (GET_MODE_SIZE (mode1) == 32
38071 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
38072 if (GET_MODE_SIZE (mode2) == 16
38073 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
38074 return (GET_MODE_SIZE (mode1) == 16
38075 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
38076
38077 /* If MODE2 is appropriate for an MMX register, then tie
38078 with any other mode acceptable to MMX registers. */
38079 if (GET_MODE_SIZE (mode2) == 8
38080 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
38081 return (GET_MODE_SIZE (mode1) == 8
38082 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
38083
38084 return false;
38085 }
38086
38087 /* Return the cost of moving between two registers of mode MODE. */
38088
38089 static int
38090 ix86_set_reg_reg_cost (enum machine_mode mode)
38091 {
38092 unsigned int units = UNITS_PER_WORD;
38093
38094 switch (GET_MODE_CLASS (mode))
38095 {
38096 default:
38097 break;
38098
38099 case MODE_CC:
38100 units = GET_MODE_SIZE (CCmode);
38101 break;
38102
38103 case MODE_FLOAT:
38104 if ((TARGET_SSE && mode == TFmode)
38105 || (TARGET_80387 && mode == XFmode)
38106 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
38107 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
38108 units = GET_MODE_SIZE (mode);
38109 break;
38110
38111 case MODE_COMPLEX_FLOAT:
38112 if ((TARGET_SSE && mode == TCmode)
38113 || (TARGET_80387 && mode == XCmode)
38114 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
38115 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
38116 units = GET_MODE_SIZE (mode);
38117 break;
38118
38119 case MODE_VECTOR_INT:
38120 case MODE_VECTOR_FLOAT:
38121 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
38122 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
38123 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
38124 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
38125 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
38126 units = GET_MODE_SIZE (mode);
38127 }
38128
38129 /* Return the cost of moving between two registers of mode MODE,
38130 assuming that the move will be in pieces of at most UNITS bytes. */
38131 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
38132 }
38133
38134 /* Compute a (partial) cost for rtx X. Return true if the complete
38135 cost has been computed, and false if subexpressions should be
38136 scanned. In either case, *TOTAL contains the cost result. */
38137
38138 static bool
38139 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
38140 bool speed)
38141 {
38142 rtx mask;
38143 enum rtx_code code = (enum rtx_code) code_i;
38144 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
38145 enum machine_mode mode = GET_MODE (x);
38146 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
38147
38148 switch (code)
38149 {
38150 case SET:
38151 if (register_operand (SET_DEST (x), VOIDmode)
38152 && reg_or_0_operand (SET_SRC (x), VOIDmode))
38153 {
38154 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
38155 return true;
38156 }
38157 return false;
38158
38159 case CONST_INT:
38160 case CONST:
38161 case LABEL_REF:
38162 case SYMBOL_REF:
38163 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
38164 *total = 3;
38165 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
38166 *total = 2;
38167 else if (flag_pic && SYMBOLIC_CONST (x)
38168 && !(TARGET_64BIT
38169 && (GET_CODE (x) == LABEL_REF
38170 || (GET_CODE (x) == SYMBOL_REF
38171 && SYMBOL_REF_LOCAL_P (x)))))
38172 *total = 1;
38173 else
38174 *total = 0;
38175 return true;
38176
38177 case CONST_DOUBLE:
38178 if (mode == VOIDmode)
38179 {
38180 *total = 0;
38181 return true;
38182 }
38183 switch (standard_80387_constant_p (x))
38184 {
38185 case 1: /* 0.0 */
38186 *total = 1;
38187 return true;
38188 default: /* Other constants */
38189 *total = 2;
38190 return true;
38191 case 0:
38192 case -1:
38193 break;
38194 }
38195 if (SSE_FLOAT_MODE_P (mode))
38196 {
38197 case CONST_VECTOR:
38198 switch (standard_sse_constant_p (x))
38199 {
38200 case 0:
38201 break;
38202 case 1: /* 0: xor eliminates false dependency */
38203 *total = 0;
38204 return true;
38205 default: /* -1: cmp contains false dependency */
38206 *total = 1;
38207 return true;
38208 }
38209 }
38210 /* Fall back to (MEM (SYMBOL_REF)), since that's where
38211 it'll probably end up. Add a penalty for size. */
38212 *total = (COSTS_N_INSNS (1)
38213 + (flag_pic != 0 && !TARGET_64BIT)
38214 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
38215 return true;
38216
38217 case ZERO_EXTEND:
38218 /* The zero extensions is often completely free on x86_64, so make
38219 it as cheap as possible. */
38220 if (TARGET_64BIT && mode == DImode
38221 && GET_MODE (XEXP (x, 0)) == SImode)
38222 *total = 1;
38223 else if (TARGET_ZERO_EXTEND_WITH_AND)
38224 *total = cost->add;
38225 else
38226 *total = cost->movzx;
38227 return false;
38228
38229 case SIGN_EXTEND:
38230 *total = cost->movsx;
38231 return false;
38232
38233 case ASHIFT:
38234 if (SCALAR_INT_MODE_P (mode)
38235 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
38236 && CONST_INT_P (XEXP (x, 1)))
38237 {
38238 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
38239 if (value == 1)
38240 {
38241 *total = cost->add;
38242 return false;
38243 }
38244 if ((value == 2 || value == 3)
38245 && cost->lea <= cost->shift_const)
38246 {
38247 *total = cost->lea;
38248 return false;
38249 }
38250 }
38251 /* FALLTHRU */
38252
38253 case ROTATE:
38254 case ASHIFTRT:
38255 case LSHIFTRT:
38256 case ROTATERT:
38257 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38258 {
38259 /* ??? Should be SSE vector operation cost. */
38260 /* At least for published AMD latencies, this really is the same
38261 as the latency for a simple fpu operation like fabs. */
38262 /* V*QImode is emulated with 1-11 insns. */
38263 if (mode == V16QImode || mode == V32QImode)
38264 {
38265 int count = 11;
38266 if (TARGET_XOP && mode == V16QImode)
38267 {
38268 /* For XOP we use vpshab, which requires a broadcast of the
38269 value to the variable shift insn. For constants this
38270 means a V16Q const in mem; even when we can perform the
38271 shift with one insn set the cost to prefer paddb. */
38272 if (CONSTANT_P (XEXP (x, 1)))
38273 {
38274 *total = (cost->fabs
38275 + rtx_cost (XEXP (x, 0), code, 0, speed)
38276 + (speed ? 2 : COSTS_N_BYTES (16)));
38277 return true;
38278 }
38279 count = 3;
38280 }
38281 else if (TARGET_SSSE3)
38282 count = 7;
38283 *total = cost->fabs * count;
38284 }
38285 else
38286 *total = cost->fabs;
38287 }
38288 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38289 {
38290 if (CONST_INT_P (XEXP (x, 1)))
38291 {
38292 if (INTVAL (XEXP (x, 1)) > 32)
38293 *total = cost->shift_const + COSTS_N_INSNS (2);
38294 else
38295 *total = cost->shift_const * 2;
38296 }
38297 else
38298 {
38299 if (GET_CODE (XEXP (x, 1)) == AND)
38300 *total = cost->shift_var * 2;
38301 else
38302 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
38303 }
38304 }
38305 else
38306 {
38307 if (CONST_INT_P (XEXP (x, 1)))
38308 *total = cost->shift_const;
38309 else if (GET_CODE (XEXP (x, 1)) == SUBREG
38310 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
38311 {
38312 /* Return the cost after shift-and truncation. */
38313 *total = cost->shift_var;
38314 return true;
38315 }
38316 else
38317 *total = cost->shift_var;
38318 }
38319 return false;
38320
38321 case FMA:
38322 {
38323 rtx sub;
38324
38325 gcc_assert (FLOAT_MODE_P (mode));
38326 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
38327
38328 /* ??? SSE scalar/vector cost should be used here. */
38329 /* ??? Bald assumption that fma has the same cost as fmul. */
38330 *total = cost->fmul;
38331 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
38332
38333 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
38334 sub = XEXP (x, 0);
38335 if (GET_CODE (sub) == NEG)
38336 sub = XEXP (sub, 0);
38337 *total += rtx_cost (sub, FMA, 0, speed);
38338
38339 sub = XEXP (x, 2);
38340 if (GET_CODE (sub) == NEG)
38341 sub = XEXP (sub, 0);
38342 *total += rtx_cost (sub, FMA, 2, speed);
38343 return true;
38344 }
38345
38346 case MULT:
38347 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38348 {
38349 /* ??? SSE scalar cost should be used here. */
38350 *total = cost->fmul;
38351 return false;
38352 }
38353 else if (X87_FLOAT_MODE_P (mode))
38354 {
38355 *total = cost->fmul;
38356 return false;
38357 }
38358 else if (FLOAT_MODE_P (mode))
38359 {
38360 /* ??? SSE vector cost should be used here. */
38361 *total = cost->fmul;
38362 return false;
38363 }
38364 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38365 {
38366 /* V*QImode is emulated with 7-13 insns. */
38367 if (mode == V16QImode || mode == V32QImode)
38368 {
38369 int extra = 11;
38370 if (TARGET_XOP && mode == V16QImode)
38371 extra = 5;
38372 else if (TARGET_SSSE3)
38373 extra = 6;
38374 *total = cost->fmul * 2 + cost->fabs * extra;
38375 }
38376 /* V*DImode is emulated with 5-8 insns. */
38377 else if (mode == V2DImode || mode == V4DImode)
38378 {
38379 if (TARGET_XOP && mode == V2DImode)
38380 *total = cost->fmul * 2 + cost->fabs * 3;
38381 else
38382 *total = cost->fmul * 3 + cost->fabs * 5;
38383 }
38384 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
38385 insns, including two PMULUDQ. */
38386 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
38387 *total = cost->fmul * 2 + cost->fabs * 5;
38388 else
38389 *total = cost->fmul;
38390 return false;
38391 }
38392 else
38393 {
38394 rtx op0 = XEXP (x, 0);
38395 rtx op1 = XEXP (x, 1);
38396 int nbits;
38397 if (CONST_INT_P (XEXP (x, 1)))
38398 {
38399 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
38400 for (nbits = 0; value != 0; value &= value - 1)
38401 nbits++;
38402 }
38403 else
38404 /* This is arbitrary. */
38405 nbits = 7;
38406
38407 /* Compute costs correctly for widening multiplication. */
38408 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
38409 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
38410 == GET_MODE_SIZE (mode))
38411 {
38412 int is_mulwiden = 0;
38413 enum machine_mode inner_mode = GET_MODE (op0);
38414
38415 if (GET_CODE (op0) == GET_CODE (op1))
38416 is_mulwiden = 1, op1 = XEXP (op1, 0);
38417 else if (CONST_INT_P (op1))
38418 {
38419 if (GET_CODE (op0) == SIGN_EXTEND)
38420 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
38421 == INTVAL (op1);
38422 else
38423 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
38424 }
38425
38426 if (is_mulwiden)
38427 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
38428 }
38429
38430 *total = (cost->mult_init[MODE_INDEX (mode)]
38431 + nbits * cost->mult_bit
38432 + rtx_cost (op0, outer_code, opno, speed)
38433 + rtx_cost (op1, outer_code, opno, speed));
38434
38435 return true;
38436 }
38437
38438 case DIV:
38439 case UDIV:
38440 case MOD:
38441 case UMOD:
38442 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38443 /* ??? SSE cost should be used here. */
38444 *total = cost->fdiv;
38445 else if (X87_FLOAT_MODE_P (mode))
38446 *total = cost->fdiv;
38447 else if (FLOAT_MODE_P (mode))
38448 /* ??? SSE vector cost should be used here. */
38449 *total = cost->fdiv;
38450 else
38451 *total = cost->divide[MODE_INDEX (mode)];
38452 return false;
38453
38454 case PLUS:
38455 if (GET_MODE_CLASS (mode) == MODE_INT
38456 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
38457 {
38458 if (GET_CODE (XEXP (x, 0)) == PLUS
38459 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
38460 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
38461 && CONSTANT_P (XEXP (x, 1)))
38462 {
38463 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
38464 if (val == 2 || val == 4 || val == 8)
38465 {
38466 *total = cost->lea;
38467 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38468 outer_code, opno, speed);
38469 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
38470 outer_code, opno, speed);
38471 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38472 return true;
38473 }
38474 }
38475 else if (GET_CODE (XEXP (x, 0)) == MULT
38476 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
38477 {
38478 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
38479 if (val == 2 || val == 4 || val == 8)
38480 {
38481 *total = cost->lea;
38482 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38483 outer_code, opno, speed);
38484 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38485 return true;
38486 }
38487 }
38488 else if (GET_CODE (XEXP (x, 0)) == PLUS)
38489 {
38490 *total = cost->lea;
38491 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38492 outer_code, opno, speed);
38493 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38494 outer_code, opno, speed);
38495 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38496 return true;
38497 }
38498 }
38499 /* FALLTHRU */
38500
38501 case MINUS:
38502 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38503 {
38504 /* ??? SSE cost should be used here. */
38505 *total = cost->fadd;
38506 return false;
38507 }
38508 else if (X87_FLOAT_MODE_P (mode))
38509 {
38510 *total = cost->fadd;
38511 return false;
38512 }
38513 else if (FLOAT_MODE_P (mode))
38514 {
38515 /* ??? SSE vector cost should be used here. */
38516 *total = cost->fadd;
38517 return false;
38518 }
38519 /* FALLTHRU */
38520
38521 case AND:
38522 case IOR:
38523 case XOR:
38524 if (GET_MODE_CLASS (mode) == MODE_INT
38525 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38526 {
38527 *total = (cost->add * 2
38528 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
38529 << (GET_MODE (XEXP (x, 0)) != DImode))
38530 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
38531 << (GET_MODE (XEXP (x, 1)) != DImode)));
38532 return true;
38533 }
38534 /* FALLTHRU */
38535
38536 case NEG:
38537 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38538 {
38539 /* ??? SSE cost should be used here. */
38540 *total = cost->fchs;
38541 return false;
38542 }
38543 else if (X87_FLOAT_MODE_P (mode))
38544 {
38545 *total = cost->fchs;
38546 return false;
38547 }
38548 else if (FLOAT_MODE_P (mode))
38549 {
38550 /* ??? SSE vector cost should be used here. */
38551 *total = cost->fchs;
38552 return false;
38553 }
38554 /* FALLTHRU */
38555
38556 case NOT:
38557 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38558 {
38559 /* ??? Should be SSE vector operation cost. */
38560 /* At least for published AMD latencies, this really is the same
38561 as the latency for a simple fpu operation like fabs. */
38562 *total = cost->fabs;
38563 }
38564 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38565 *total = cost->add * 2;
38566 else
38567 *total = cost->add;
38568 return false;
38569
38570 case COMPARE:
38571 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
38572 && XEXP (XEXP (x, 0), 1) == const1_rtx
38573 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
38574 && XEXP (x, 1) == const0_rtx)
38575 {
38576 /* This kind of construct is implemented using test[bwl].
38577 Treat it as if we had an AND. */
38578 *total = (cost->add
38579 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
38580 + rtx_cost (const1_rtx, outer_code, opno, speed));
38581 return true;
38582 }
38583 return false;
38584
38585 case FLOAT_EXTEND:
38586 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
38587 *total = 0;
38588 return false;
38589
38590 case ABS:
38591 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38592 /* ??? SSE cost should be used here. */
38593 *total = cost->fabs;
38594 else if (X87_FLOAT_MODE_P (mode))
38595 *total = cost->fabs;
38596 else if (FLOAT_MODE_P (mode))
38597 /* ??? SSE vector cost should be used here. */
38598 *total = cost->fabs;
38599 return false;
38600
38601 case SQRT:
38602 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38603 /* ??? SSE cost should be used here. */
38604 *total = cost->fsqrt;
38605 else if (X87_FLOAT_MODE_P (mode))
38606 *total = cost->fsqrt;
38607 else if (FLOAT_MODE_P (mode))
38608 /* ??? SSE vector cost should be used here. */
38609 *total = cost->fsqrt;
38610 return false;
38611
38612 case UNSPEC:
38613 if (XINT (x, 1) == UNSPEC_TP)
38614 *total = 0;
38615 return false;
38616
38617 case VEC_SELECT:
38618 case VEC_CONCAT:
38619 case VEC_DUPLICATE:
38620 /* ??? Assume all of these vector manipulation patterns are
38621 recognizable. In which case they all pretty much have the
38622 same cost. */
38623 *total = cost->fabs;
38624 return true;
38625 case VEC_MERGE:
38626 mask = XEXP (x, 2);
38627 /* This is masked instruction, assume the same cost,
38628 as nonmasked variant. */
38629 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
38630 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
38631 else
38632 *total = cost->fabs;
38633 return true;
38634
38635 default:
38636 return false;
38637 }
38638 }
38639
38640 #if TARGET_MACHO
38641
38642 static int current_machopic_label_num;
38643
38644 /* Given a symbol name and its associated stub, write out the
38645 definition of the stub. */
38646
38647 void
38648 machopic_output_stub (FILE *file, const char *symb, const char *stub)
38649 {
38650 unsigned int length;
38651 char *binder_name, *symbol_name, lazy_ptr_name[32];
38652 int label = ++current_machopic_label_num;
38653
38654 /* For 64-bit we shouldn't get here. */
38655 gcc_assert (!TARGET_64BIT);
38656
38657 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
38658 symb = targetm.strip_name_encoding (symb);
38659
38660 length = strlen (stub);
38661 binder_name = XALLOCAVEC (char, length + 32);
38662 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
38663
38664 length = strlen (symb);
38665 symbol_name = XALLOCAVEC (char, length + 32);
38666 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
38667
38668 sprintf (lazy_ptr_name, "L%d$lz", label);
38669
38670 if (MACHOPIC_ATT_STUB)
38671 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
38672 else if (MACHOPIC_PURE)
38673 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
38674 else
38675 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
38676
38677 fprintf (file, "%s:\n", stub);
38678 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38679
38680 if (MACHOPIC_ATT_STUB)
38681 {
38682 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
38683 }
38684 else if (MACHOPIC_PURE)
38685 {
38686 /* PIC stub. */
38687 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38688 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
38689 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
38690 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
38691 label, lazy_ptr_name, label);
38692 fprintf (file, "\tjmp\t*%%ecx\n");
38693 }
38694 else
38695 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
38696
38697 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
38698 it needs no stub-binding-helper. */
38699 if (MACHOPIC_ATT_STUB)
38700 return;
38701
38702 fprintf (file, "%s:\n", binder_name);
38703
38704 if (MACHOPIC_PURE)
38705 {
38706 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
38707 fprintf (file, "\tpushl\t%%ecx\n");
38708 }
38709 else
38710 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
38711
38712 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
38713
38714 /* N.B. Keep the correspondence of these
38715 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
38716 old-pic/new-pic/non-pic stubs; altering this will break
38717 compatibility with existing dylibs. */
38718 if (MACHOPIC_PURE)
38719 {
38720 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38721 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
38722 }
38723 else
38724 /* 16-byte -mdynamic-no-pic stub. */
38725 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
38726
38727 fprintf (file, "%s:\n", lazy_ptr_name);
38728 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38729 fprintf (file, ASM_LONG "%s\n", binder_name);
38730 }
38731 #endif /* TARGET_MACHO */
38732
38733 /* Order the registers for register allocator. */
38734
38735 void
38736 x86_order_regs_for_local_alloc (void)
38737 {
38738 int pos = 0;
38739 int i;
38740
38741 /* First allocate the local general purpose registers. */
38742 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38743 if (GENERAL_REGNO_P (i) && call_used_regs[i])
38744 reg_alloc_order [pos++] = i;
38745
38746 /* Global general purpose registers. */
38747 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38748 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
38749 reg_alloc_order [pos++] = i;
38750
38751 /* x87 registers come first in case we are doing FP math
38752 using them. */
38753 if (!TARGET_SSE_MATH)
38754 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38755 reg_alloc_order [pos++] = i;
38756
38757 /* SSE registers. */
38758 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
38759 reg_alloc_order [pos++] = i;
38760 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
38761 reg_alloc_order [pos++] = i;
38762
38763 /* Extended REX SSE registers. */
38764 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
38765 reg_alloc_order [pos++] = i;
38766
38767 /* Mask register. */
38768 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
38769 reg_alloc_order [pos++] = i;
38770
38771 /* x87 registers. */
38772 if (TARGET_SSE_MATH)
38773 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38774 reg_alloc_order [pos++] = i;
38775
38776 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
38777 reg_alloc_order [pos++] = i;
38778
38779 /* Initialize the rest of array as we do not allocate some registers
38780 at all. */
38781 while (pos < FIRST_PSEUDO_REGISTER)
38782 reg_alloc_order [pos++] = 0;
38783 }
38784
38785 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
38786 in struct attribute_spec handler. */
38787 static tree
38788 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
38789 tree args,
38790 int,
38791 bool *no_add_attrs)
38792 {
38793 if (TREE_CODE (*node) != FUNCTION_TYPE
38794 && TREE_CODE (*node) != METHOD_TYPE
38795 && TREE_CODE (*node) != FIELD_DECL
38796 && TREE_CODE (*node) != TYPE_DECL)
38797 {
38798 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38799 name);
38800 *no_add_attrs = true;
38801 return NULL_TREE;
38802 }
38803 if (TARGET_64BIT)
38804 {
38805 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
38806 name);
38807 *no_add_attrs = true;
38808 return NULL_TREE;
38809 }
38810 if (is_attribute_p ("callee_pop_aggregate_return", name))
38811 {
38812 tree cst;
38813
38814 cst = TREE_VALUE (args);
38815 if (TREE_CODE (cst) != INTEGER_CST)
38816 {
38817 warning (OPT_Wattributes,
38818 "%qE attribute requires an integer constant argument",
38819 name);
38820 *no_add_attrs = true;
38821 }
38822 else if (compare_tree_int (cst, 0) != 0
38823 && compare_tree_int (cst, 1) != 0)
38824 {
38825 warning (OPT_Wattributes,
38826 "argument to %qE attribute is neither zero, nor one",
38827 name);
38828 *no_add_attrs = true;
38829 }
38830
38831 return NULL_TREE;
38832 }
38833
38834 return NULL_TREE;
38835 }
38836
38837 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
38838 struct attribute_spec.handler. */
38839 static tree
38840 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
38841 bool *no_add_attrs)
38842 {
38843 if (TREE_CODE (*node) != FUNCTION_TYPE
38844 && TREE_CODE (*node) != METHOD_TYPE
38845 && TREE_CODE (*node) != FIELD_DECL
38846 && TREE_CODE (*node) != TYPE_DECL)
38847 {
38848 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38849 name);
38850 *no_add_attrs = true;
38851 return NULL_TREE;
38852 }
38853
38854 /* Can combine regparm with all attributes but fastcall. */
38855 if (is_attribute_p ("ms_abi", name))
38856 {
38857 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
38858 {
38859 error ("ms_abi and sysv_abi attributes are not compatible");
38860 }
38861
38862 return NULL_TREE;
38863 }
38864 else if (is_attribute_p ("sysv_abi", name))
38865 {
38866 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
38867 {
38868 error ("ms_abi and sysv_abi attributes are not compatible");
38869 }
38870
38871 return NULL_TREE;
38872 }
38873
38874 return NULL_TREE;
38875 }
38876
38877 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
38878 struct attribute_spec.handler. */
38879 static tree
38880 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
38881 bool *no_add_attrs)
38882 {
38883 tree *type = NULL;
38884 if (DECL_P (*node))
38885 {
38886 if (TREE_CODE (*node) == TYPE_DECL)
38887 type = &TREE_TYPE (*node);
38888 }
38889 else
38890 type = node;
38891
38892 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
38893 {
38894 warning (OPT_Wattributes, "%qE attribute ignored",
38895 name);
38896 *no_add_attrs = true;
38897 }
38898
38899 else if ((is_attribute_p ("ms_struct", name)
38900 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
38901 || ((is_attribute_p ("gcc_struct", name)
38902 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
38903 {
38904 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
38905 name);
38906 *no_add_attrs = true;
38907 }
38908
38909 return NULL_TREE;
38910 }
38911
38912 static tree
38913 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
38914 bool *no_add_attrs)
38915 {
38916 if (TREE_CODE (*node) != FUNCTION_DECL)
38917 {
38918 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38919 name);
38920 *no_add_attrs = true;
38921 }
38922 return NULL_TREE;
38923 }
38924
38925 static bool
38926 ix86_ms_bitfield_layout_p (const_tree record_type)
38927 {
38928 return ((TARGET_MS_BITFIELD_LAYOUT
38929 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
38930 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
38931 }
38932
38933 /* Returns an expression indicating where the this parameter is
38934 located on entry to the FUNCTION. */
38935
38936 static rtx
38937 x86_this_parameter (tree function)
38938 {
38939 tree type = TREE_TYPE (function);
38940 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
38941 int nregs;
38942
38943 if (TARGET_64BIT)
38944 {
38945 const int *parm_regs;
38946
38947 if (ix86_function_type_abi (type) == MS_ABI)
38948 parm_regs = x86_64_ms_abi_int_parameter_registers;
38949 else
38950 parm_regs = x86_64_int_parameter_registers;
38951 return gen_rtx_REG (Pmode, parm_regs[aggr]);
38952 }
38953
38954 nregs = ix86_function_regparm (type, function);
38955
38956 if (nregs > 0 && !stdarg_p (type))
38957 {
38958 int regno;
38959 unsigned int ccvt = ix86_get_callcvt (type);
38960
38961 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38962 regno = aggr ? DX_REG : CX_REG;
38963 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38964 {
38965 regno = CX_REG;
38966 if (aggr)
38967 return gen_rtx_MEM (SImode,
38968 plus_constant (Pmode, stack_pointer_rtx, 4));
38969 }
38970 else
38971 {
38972 regno = AX_REG;
38973 if (aggr)
38974 {
38975 regno = DX_REG;
38976 if (nregs == 1)
38977 return gen_rtx_MEM (SImode,
38978 plus_constant (Pmode,
38979 stack_pointer_rtx, 4));
38980 }
38981 }
38982 return gen_rtx_REG (SImode, regno);
38983 }
38984
38985 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
38986 aggr ? 8 : 4));
38987 }
38988
38989 /* Determine whether x86_output_mi_thunk can succeed. */
38990
38991 static bool
38992 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
38993 const_tree function)
38994 {
38995 /* 64-bit can handle anything. */
38996 if (TARGET_64BIT)
38997 return true;
38998
38999 /* For 32-bit, everything's fine if we have one free register. */
39000 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
39001 return true;
39002
39003 /* Need a free register for vcall_offset. */
39004 if (vcall_offset)
39005 return false;
39006
39007 /* Need a free register for GOT references. */
39008 if (flag_pic && !targetm.binds_local_p (function))
39009 return false;
39010
39011 /* Otherwise ok. */
39012 return true;
39013 }
39014
39015 /* Output the assembler code for a thunk function. THUNK_DECL is the
39016 declaration for the thunk function itself, FUNCTION is the decl for
39017 the target function. DELTA is an immediate constant offset to be
39018 added to THIS. If VCALL_OFFSET is nonzero, the word at
39019 *(*this + vcall_offset) should be added to THIS. */
39020
39021 static void
39022 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
39023 HOST_WIDE_INT vcall_offset, tree function)
39024 {
39025 rtx this_param = x86_this_parameter (function);
39026 rtx this_reg, tmp, fnaddr;
39027 unsigned int tmp_regno;
39028 rtx_insn *insn;
39029
39030 if (TARGET_64BIT)
39031 tmp_regno = R10_REG;
39032 else
39033 {
39034 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
39035 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
39036 tmp_regno = AX_REG;
39037 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
39038 tmp_regno = DX_REG;
39039 else
39040 tmp_regno = CX_REG;
39041 }
39042
39043 emit_note (NOTE_INSN_PROLOGUE_END);
39044
39045 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
39046 pull it in now and let DELTA benefit. */
39047 if (REG_P (this_param))
39048 this_reg = this_param;
39049 else if (vcall_offset)
39050 {
39051 /* Put the this parameter into %eax. */
39052 this_reg = gen_rtx_REG (Pmode, AX_REG);
39053 emit_move_insn (this_reg, this_param);
39054 }
39055 else
39056 this_reg = NULL_RTX;
39057
39058 /* Adjust the this parameter by a fixed constant. */
39059 if (delta)
39060 {
39061 rtx delta_rtx = GEN_INT (delta);
39062 rtx delta_dst = this_reg ? this_reg : this_param;
39063
39064 if (TARGET_64BIT)
39065 {
39066 if (!x86_64_general_operand (delta_rtx, Pmode))
39067 {
39068 tmp = gen_rtx_REG (Pmode, tmp_regno);
39069 emit_move_insn (tmp, delta_rtx);
39070 delta_rtx = tmp;
39071 }
39072 }
39073
39074 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
39075 }
39076
39077 /* Adjust the this parameter by a value stored in the vtable. */
39078 if (vcall_offset)
39079 {
39080 rtx vcall_addr, vcall_mem, this_mem;
39081
39082 tmp = gen_rtx_REG (Pmode, tmp_regno);
39083
39084 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
39085 if (Pmode != ptr_mode)
39086 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
39087 emit_move_insn (tmp, this_mem);
39088
39089 /* Adjust the this parameter. */
39090 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
39091 if (TARGET_64BIT
39092 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
39093 {
39094 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
39095 emit_move_insn (tmp2, GEN_INT (vcall_offset));
39096 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
39097 }
39098
39099 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
39100 if (Pmode != ptr_mode)
39101 emit_insn (gen_addsi_1_zext (this_reg,
39102 gen_rtx_REG (ptr_mode,
39103 REGNO (this_reg)),
39104 vcall_mem));
39105 else
39106 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
39107 }
39108
39109 /* If necessary, drop THIS back to its stack slot. */
39110 if (this_reg && this_reg != this_param)
39111 emit_move_insn (this_param, this_reg);
39112
39113 fnaddr = XEXP (DECL_RTL (function), 0);
39114 if (TARGET_64BIT)
39115 {
39116 if (!flag_pic || targetm.binds_local_p (function)
39117 || TARGET_PECOFF)
39118 ;
39119 else
39120 {
39121 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
39122 tmp = gen_rtx_CONST (Pmode, tmp);
39123 fnaddr = gen_const_mem (Pmode, tmp);
39124 }
39125 }
39126 else
39127 {
39128 if (!flag_pic || targetm.binds_local_p (function))
39129 ;
39130 #if TARGET_MACHO
39131 else if (TARGET_MACHO)
39132 {
39133 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
39134 fnaddr = XEXP (fnaddr, 0);
39135 }
39136 #endif /* TARGET_MACHO */
39137 else
39138 {
39139 tmp = gen_rtx_REG (Pmode, CX_REG);
39140 output_set_got (tmp, NULL_RTX);
39141
39142 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
39143 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
39144 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
39145 fnaddr = gen_const_mem (Pmode, fnaddr);
39146 }
39147 }
39148
39149 /* Our sibling call patterns do not allow memories, because we have no
39150 predicate that can distinguish between frame and non-frame memory.
39151 For our purposes here, we can get away with (ab)using a jump pattern,
39152 because we're going to do no optimization. */
39153 if (MEM_P (fnaddr))
39154 {
39155 if (sibcall_insn_operand (fnaddr, word_mode))
39156 {
39157 fnaddr = XEXP (DECL_RTL (function), 0);
39158 tmp = gen_rtx_MEM (QImode, fnaddr);
39159 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
39160 tmp = emit_call_insn (tmp);
39161 SIBLING_CALL_P (tmp) = 1;
39162 }
39163 else
39164 emit_jump_insn (gen_indirect_jump (fnaddr));
39165 }
39166 else
39167 {
39168 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
39169 fnaddr = legitimize_pic_address (fnaddr,
39170 gen_rtx_REG (Pmode, tmp_regno));
39171
39172 if (!sibcall_insn_operand (fnaddr, word_mode))
39173 {
39174 tmp = gen_rtx_REG (word_mode, tmp_regno);
39175 if (GET_MODE (fnaddr) != word_mode)
39176 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
39177 emit_move_insn (tmp, fnaddr);
39178 fnaddr = tmp;
39179 }
39180
39181 tmp = gen_rtx_MEM (QImode, fnaddr);
39182 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
39183 tmp = emit_call_insn (tmp);
39184 SIBLING_CALL_P (tmp) = 1;
39185 }
39186 emit_barrier ();
39187
39188 /* Emit just enough of rest_of_compilation to get the insns emitted.
39189 Note that use_thunk calls assemble_start_function et al. */
39190 insn = get_insns ();
39191 shorten_branches (insn);
39192 final_start_function (insn, file, 1);
39193 final (insn, file, 1);
39194 final_end_function ();
39195 }
39196
39197 static void
39198 x86_file_start (void)
39199 {
39200 default_file_start ();
39201 if (TARGET_16BIT)
39202 fputs ("\t.code16gcc\n", asm_out_file);
39203 #if TARGET_MACHO
39204 darwin_file_start ();
39205 #endif
39206 if (X86_FILE_START_VERSION_DIRECTIVE)
39207 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
39208 if (X86_FILE_START_FLTUSED)
39209 fputs ("\t.global\t__fltused\n", asm_out_file);
39210 if (ix86_asm_dialect == ASM_INTEL)
39211 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
39212 }
39213
39214 int
39215 x86_field_alignment (tree field, int computed)
39216 {
39217 enum machine_mode mode;
39218 tree type = TREE_TYPE (field);
39219
39220 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
39221 return computed;
39222 mode = TYPE_MODE (strip_array_types (type));
39223 if (mode == DFmode || mode == DCmode
39224 || GET_MODE_CLASS (mode) == MODE_INT
39225 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
39226 return MIN (32, computed);
39227 return computed;
39228 }
39229
39230 /* Print call to TARGET to FILE. */
39231
39232 static void
39233 x86_print_call_or_nop (FILE *file, const char *target)
39234 {
39235 if (flag_nop_mcount)
39236 fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
39237 else
39238 fprintf (file, "1:\tcall\t%s\n", target);
39239 }
39240
39241 /* Output assembler code to FILE to increment profiler label # LABELNO
39242 for profiling a function entry. */
39243 void
39244 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
39245 {
39246 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
39247 : MCOUNT_NAME);
39248 if (TARGET_64BIT)
39249 {
39250 #ifndef NO_PROFILE_COUNTERS
39251 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
39252 #endif
39253
39254 if (!TARGET_PECOFF && flag_pic)
39255 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
39256 else
39257 x86_print_call_or_nop (file, mcount_name);
39258 }
39259 else if (flag_pic)
39260 {
39261 #ifndef NO_PROFILE_COUNTERS
39262 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
39263 LPREFIX, labelno);
39264 #endif
39265 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
39266 }
39267 else
39268 {
39269 #ifndef NO_PROFILE_COUNTERS
39270 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
39271 LPREFIX, labelno);
39272 #endif
39273 x86_print_call_or_nop (file, mcount_name);
39274 }
39275
39276 if (flag_record_mcount)
39277 {
39278 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
39279 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
39280 fprintf (file, "\t.previous\n");
39281 }
39282 }
39283
39284 /* We don't have exact information about the insn sizes, but we may assume
39285 quite safely that we are informed about all 1 byte insns and memory
39286 address sizes. This is enough to eliminate unnecessary padding in
39287 99% of cases. */
39288
39289 static int
39290 min_insn_size (rtx_insn *insn)
39291 {
39292 int l = 0, len;
39293
39294 if (!INSN_P (insn) || !active_insn_p (insn))
39295 return 0;
39296
39297 /* Discard alignments we've emit and jump instructions. */
39298 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
39299 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
39300 return 0;
39301
39302 /* Important case - calls are always 5 bytes.
39303 It is common to have many calls in the row. */
39304 if (CALL_P (insn)
39305 && symbolic_reference_mentioned_p (PATTERN (insn))
39306 && !SIBLING_CALL_P (insn))
39307 return 5;
39308 len = get_attr_length (insn);
39309 if (len <= 1)
39310 return 1;
39311
39312 /* For normal instructions we rely on get_attr_length being exact,
39313 with a few exceptions. */
39314 if (!JUMP_P (insn))
39315 {
39316 enum attr_type type = get_attr_type (insn);
39317
39318 switch (type)
39319 {
39320 case TYPE_MULTI:
39321 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
39322 || asm_noperands (PATTERN (insn)) >= 0)
39323 return 0;
39324 break;
39325 case TYPE_OTHER:
39326 case TYPE_FCMP:
39327 break;
39328 default:
39329 /* Otherwise trust get_attr_length. */
39330 return len;
39331 }
39332
39333 l = get_attr_length_address (insn);
39334 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
39335 l = 4;
39336 }
39337 if (l)
39338 return 1+l;
39339 else
39340 return 2;
39341 }
39342
39343 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39344
39345 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
39346 window. */
39347
39348 static void
39349 ix86_avoid_jump_mispredicts (void)
39350 {
39351 rtx_insn *insn, *start = get_insns ();
39352 int nbytes = 0, njumps = 0;
39353 int isjump = 0;
39354
39355 /* Look for all minimal intervals of instructions containing 4 jumps.
39356 The intervals are bounded by START and INSN. NBYTES is the total
39357 size of instructions in the interval including INSN and not including
39358 START. When the NBYTES is smaller than 16 bytes, it is possible
39359 that the end of START and INSN ends up in the same 16byte page.
39360
39361 The smallest offset in the page INSN can start is the case where START
39362 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
39363 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
39364
39365 Don't consider asm goto as jump, while it can contain a jump, it doesn't
39366 have to, control transfer to label(s) can be performed through other
39367 means, and also we estimate minimum length of all asm stmts as 0. */
39368 for (insn = start; insn; insn = NEXT_INSN (insn))
39369 {
39370 int min_size;
39371
39372 if (LABEL_P (insn))
39373 {
39374 int align = label_to_alignment (insn);
39375 int max_skip = label_to_max_skip (insn);
39376
39377 if (max_skip > 15)
39378 max_skip = 15;
39379 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
39380 already in the current 16 byte page, because otherwise
39381 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
39382 bytes to reach 16 byte boundary. */
39383 if (align <= 0
39384 || (align <= 3 && max_skip != (1 << align) - 1))
39385 max_skip = 0;
39386 if (dump_file)
39387 fprintf (dump_file, "Label %i with max_skip %i\n",
39388 INSN_UID (insn), max_skip);
39389 if (max_skip)
39390 {
39391 while (nbytes + max_skip >= 16)
39392 {
39393 start = NEXT_INSN (start);
39394 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39395 || CALL_P (start))
39396 njumps--, isjump = 1;
39397 else
39398 isjump = 0;
39399 nbytes -= min_insn_size (start);
39400 }
39401 }
39402 continue;
39403 }
39404
39405 min_size = min_insn_size (insn);
39406 nbytes += min_size;
39407 if (dump_file)
39408 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
39409 INSN_UID (insn), min_size);
39410 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
39411 || CALL_P (insn))
39412 njumps++;
39413 else
39414 continue;
39415
39416 while (njumps > 3)
39417 {
39418 start = NEXT_INSN (start);
39419 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39420 || CALL_P (start))
39421 njumps--, isjump = 1;
39422 else
39423 isjump = 0;
39424 nbytes -= min_insn_size (start);
39425 }
39426 gcc_assert (njumps >= 0);
39427 if (dump_file)
39428 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
39429 INSN_UID (start), INSN_UID (insn), nbytes);
39430
39431 if (njumps == 3 && isjump && nbytes < 16)
39432 {
39433 int padsize = 15 - nbytes + min_insn_size (insn);
39434
39435 if (dump_file)
39436 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
39437 INSN_UID (insn), padsize);
39438 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
39439 }
39440 }
39441 }
39442 #endif
39443
39444 /* AMD Athlon works faster
39445 when RET is not destination of conditional jump or directly preceded
39446 by other jump instruction. We avoid the penalty by inserting NOP just
39447 before the RET instructions in such cases. */
39448 static void
39449 ix86_pad_returns (void)
39450 {
39451 edge e;
39452 edge_iterator ei;
39453
39454 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39455 {
39456 basic_block bb = e->src;
39457 rtx_insn *ret = BB_END (bb);
39458 rtx_insn *prev;
39459 bool replace = false;
39460
39461 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
39462 || optimize_bb_for_size_p (bb))
39463 continue;
39464 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
39465 if (active_insn_p (prev) || LABEL_P (prev))
39466 break;
39467 if (prev && LABEL_P (prev))
39468 {
39469 edge e;
39470 edge_iterator ei;
39471
39472 FOR_EACH_EDGE (e, ei, bb->preds)
39473 if (EDGE_FREQUENCY (e) && e->src->index >= 0
39474 && !(e->flags & EDGE_FALLTHRU))
39475 {
39476 replace = true;
39477 break;
39478 }
39479 }
39480 if (!replace)
39481 {
39482 prev = prev_active_insn (ret);
39483 if (prev
39484 && ((JUMP_P (prev) && any_condjump_p (prev))
39485 || CALL_P (prev)))
39486 replace = true;
39487 /* Empty functions get branch mispredict even when
39488 the jump destination is not visible to us. */
39489 if (!prev && !optimize_function_for_size_p (cfun))
39490 replace = true;
39491 }
39492 if (replace)
39493 {
39494 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
39495 delete_insn (ret);
39496 }
39497 }
39498 }
39499
39500 /* Count the minimum number of instructions in BB. Return 4 if the
39501 number of instructions >= 4. */
39502
39503 static int
39504 ix86_count_insn_bb (basic_block bb)
39505 {
39506 rtx_insn *insn;
39507 int insn_count = 0;
39508
39509 /* Count number of instructions in this block. Return 4 if the number
39510 of instructions >= 4. */
39511 FOR_BB_INSNS (bb, insn)
39512 {
39513 /* Only happen in exit blocks. */
39514 if (JUMP_P (insn)
39515 && ANY_RETURN_P (PATTERN (insn)))
39516 break;
39517
39518 if (NONDEBUG_INSN_P (insn)
39519 && GET_CODE (PATTERN (insn)) != USE
39520 && GET_CODE (PATTERN (insn)) != CLOBBER)
39521 {
39522 insn_count++;
39523 if (insn_count >= 4)
39524 return insn_count;
39525 }
39526 }
39527
39528 return insn_count;
39529 }
39530
39531
39532 /* Count the minimum number of instructions in code path in BB.
39533 Return 4 if the number of instructions >= 4. */
39534
39535 static int
39536 ix86_count_insn (basic_block bb)
39537 {
39538 edge e;
39539 edge_iterator ei;
39540 int min_prev_count;
39541
39542 /* Only bother counting instructions along paths with no
39543 more than 2 basic blocks between entry and exit. Given
39544 that BB has an edge to exit, determine if a predecessor
39545 of BB has an edge from entry. If so, compute the number
39546 of instructions in the predecessor block. If there
39547 happen to be multiple such blocks, compute the minimum. */
39548 min_prev_count = 4;
39549 FOR_EACH_EDGE (e, ei, bb->preds)
39550 {
39551 edge prev_e;
39552 edge_iterator prev_ei;
39553
39554 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39555 {
39556 min_prev_count = 0;
39557 break;
39558 }
39559 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
39560 {
39561 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39562 {
39563 int count = ix86_count_insn_bb (e->src);
39564 if (count < min_prev_count)
39565 min_prev_count = count;
39566 break;
39567 }
39568 }
39569 }
39570
39571 if (min_prev_count < 4)
39572 min_prev_count += ix86_count_insn_bb (bb);
39573
39574 return min_prev_count;
39575 }
39576
39577 /* Pad short function to 4 instructions. */
39578
39579 static void
39580 ix86_pad_short_function (void)
39581 {
39582 edge e;
39583 edge_iterator ei;
39584
39585 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39586 {
39587 rtx_insn *ret = BB_END (e->src);
39588 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
39589 {
39590 int insn_count = ix86_count_insn (e->src);
39591
39592 /* Pad short function. */
39593 if (insn_count < 4)
39594 {
39595 rtx_insn *insn = ret;
39596
39597 /* Find epilogue. */
39598 while (insn
39599 && (!NOTE_P (insn)
39600 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
39601 insn = PREV_INSN (insn);
39602
39603 if (!insn)
39604 insn = ret;
39605
39606 /* Two NOPs count as one instruction. */
39607 insn_count = 2 * (4 - insn_count);
39608 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
39609 }
39610 }
39611 }
39612 }
39613
39614 /* Fix up a Windows system unwinder issue. If an EH region falls through into
39615 the epilogue, the Windows system unwinder will apply epilogue logic and
39616 produce incorrect offsets. This can be avoided by adding a nop between
39617 the last insn that can throw and the first insn of the epilogue. */
39618
39619 static void
39620 ix86_seh_fixup_eh_fallthru (void)
39621 {
39622 edge e;
39623 edge_iterator ei;
39624
39625 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39626 {
39627 rtx_insn *insn, *next;
39628
39629 /* Find the beginning of the epilogue. */
39630 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
39631 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
39632 break;
39633 if (insn == NULL)
39634 continue;
39635
39636 /* We only care about preceding insns that can throw. */
39637 insn = prev_active_insn (insn);
39638 if (insn == NULL || !can_throw_internal (insn))
39639 continue;
39640
39641 /* Do not separate calls from their debug information. */
39642 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
39643 if (NOTE_P (next)
39644 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
39645 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
39646 insn = next;
39647 else
39648 break;
39649
39650 emit_insn_after (gen_nops (const1_rtx), insn);
39651 }
39652 }
39653
39654 /* Implement machine specific optimizations. We implement padding of returns
39655 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
39656 static void
39657 ix86_reorg (void)
39658 {
39659 /* We are freeing block_for_insn in the toplev to keep compatibility
39660 with old MDEP_REORGS that are not CFG based. Recompute it now. */
39661 compute_bb_for_insn ();
39662
39663 if (TARGET_SEH && current_function_has_exception_handlers ())
39664 ix86_seh_fixup_eh_fallthru ();
39665
39666 if (optimize && optimize_function_for_speed_p (cfun))
39667 {
39668 if (TARGET_PAD_SHORT_FUNCTION)
39669 ix86_pad_short_function ();
39670 else if (TARGET_PAD_RETURNS)
39671 ix86_pad_returns ();
39672 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39673 if (TARGET_FOUR_JUMP_LIMIT)
39674 ix86_avoid_jump_mispredicts ();
39675 #endif
39676 }
39677 }
39678
39679 /* Return nonzero when QImode register that must be represented via REX prefix
39680 is used. */
39681 bool
39682 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
39683 {
39684 int i;
39685 extract_insn_cached (insn);
39686 for (i = 0; i < recog_data.n_operands; i++)
39687 if (GENERAL_REG_P (recog_data.operand[i])
39688 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
39689 return true;
39690 return false;
39691 }
39692
39693 /* Return nonzero when P points to register encoded via REX prefix.
39694 Called via for_each_rtx. */
39695 static int
39696 extended_reg_mentioned_1 (rtx *p, void *)
39697 {
39698 unsigned int regno;
39699 if (!REG_P (*p))
39700 return 0;
39701 regno = REGNO (*p);
39702 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
39703 }
39704
39705 /* Return true when INSN mentions register that must be encoded using REX
39706 prefix. */
39707 bool
39708 x86_extended_reg_mentioned_p (rtx insn)
39709 {
39710 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
39711 extended_reg_mentioned_1, NULL);
39712 }
39713
39714 /* If profitable, negate (without causing overflow) integer constant
39715 of mode MODE at location LOC. Return true in this case. */
39716 bool
39717 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
39718 {
39719 HOST_WIDE_INT val;
39720
39721 if (!CONST_INT_P (*loc))
39722 return false;
39723
39724 switch (mode)
39725 {
39726 case DImode:
39727 /* DImode x86_64 constants must fit in 32 bits. */
39728 gcc_assert (x86_64_immediate_operand (*loc, mode));
39729
39730 mode = SImode;
39731 break;
39732
39733 case SImode:
39734 case HImode:
39735 case QImode:
39736 break;
39737
39738 default:
39739 gcc_unreachable ();
39740 }
39741
39742 /* Avoid overflows. */
39743 if (mode_signbit_p (mode, *loc))
39744 return false;
39745
39746 val = INTVAL (*loc);
39747
39748 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
39749 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
39750 if ((val < 0 && val != -128)
39751 || val == 128)
39752 {
39753 *loc = GEN_INT (-val);
39754 return true;
39755 }
39756
39757 return false;
39758 }
39759
39760 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
39761 optabs would emit if we didn't have TFmode patterns. */
39762
39763 void
39764 x86_emit_floatuns (rtx operands[2])
39765 {
39766 rtx_code_label *neglab, *donelab;
39767 rtx i0, i1, f0, in, out;
39768 enum machine_mode mode, inmode;
39769
39770 inmode = GET_MODE (operands[1]);
39771 gcc_assert (inmode == SImode || inmode == DImode);
39772
39773 out = operands[0];
39774 in = force_reg (inmode, operands[1]);
39775 mode = GET_MODE (out);
39776 neglab = gen_label_rtx ();
39777 donelab = gen_label_rtx ();
39778 f0 = gen_reg_rtx (mode);
39779
39780 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
39781
39782 expand_float (out, in, 0);
39783
39784 emit_jump_insn (gen_jump (donelab));
39785 emit_barrier ();
39786
39787 emit_label (neglab);
39788
39789 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
39790 1, OPTAB_DIRECT);
39791 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
39792 1, OPTAB_DIRECT);
39793 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
39794
39795 expand_float (f0, i0, 0);
39796
39797 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
39798
39799 emit_label (donelab);
39800 }
39801 \f
39802 static bool canonicalize_perm (struct expand_vec_perm_d *d);
39803 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
39804 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
39805 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
39806
39807 /* Get a vector mode of the same size as the original but with elements
39808 twice as wide. This is only guaranteed to apply to integral vectors. */
39809
39810 static inline enum machine_mode
39811 get_mode_wider_vector (enum machine_mode o)
39812 {
39813 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
39814 enum machine_mode n = GET_MODE_WIDER_MODE (o);
39815 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
39816 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
39817 return n;
39818 }
39819
39820 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
39821 fill target with val via vec_duplicate. */
39822
39823 static bool
39824 ix86_vector_duplicate_value (enum machine_mode mode, rtx target, rtx val)
39825 {
39826 bool ok;
39827 rtx_insn *insn;
39828 rtx dup;
39829
39830 /* First attempt to recognize VAL as-is. */
39831 dup = gen_rtx_VEC_DUPLICATE (mode, val);
39832 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
39833 if (recog_memoized (insn) < 0)
39834 {
39835 rtx_insn *seq;
39836 /* If that fails, force VAL into a register. */
39837
39838 start_sequence ();
39839 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
39840 seq = get_insns ();
39841 end_sequence ();
39842 if (seq)
39843 emit_insn_before (seq, insn);
39844
39845 ok = recog_memoized (insn) >= 0;
39846 gcc_assert (ok);
39847 }
39848 return true;
39849 }
39850
39851 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39852 with all elements equal to VAR. Return true if successful. */
39853
39854 static bool
39855 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
39856 rtx target, rtx val)
39857 {
39858 bool ok;
39859
39860 switch (mode)
39861 {
39862 case V2SImode:
39863 case V2SFmode:
39864 if (!mmx_ok)
39865 return false;
39866 /* FALLTHRU */
39867
39868 case V4DFmode:
39869 case V4DImode:
39870 case V8SFmode:
39871 case V8SImode:
39872 case V2DFmode:
39873 case V2DImode:
39874 case V4SFmode:
39875 case V4SImode:
39876 case V16SImode:
39877 case V8DImode:
39878 case V16SFmode:
39879 case V8DFmode:
39880 return ix86_vector_duplicate_value (mode, target, val);
39881
39882 case V4HImode:
39883 if (!mmx_ok)
39884 return false;
39885 if (TARGET_SSE || TARGET_3DNOW_A)
39886 {
39887 rtx x;
39888
39889 val = gen_lowpart (SImode, val);
39890 x = gen_rtx_TRUNCATE (HImode, val);
39891 x = gen_rtx_VEC_DUPLICATE (mode, x);
39892 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39893 return true;
39894 }
39895 goto widen;
39896
39897 case V8QImode:
39898 if (!mmx_ok)
39899 return false;
39900 goto widen;
39901
39902 case V8HImode:
39903 if (TARGET_AVX2)
39904 return ix86_vector_duplicate_value (mode, target, val);
39905
39906 if (TARGET_SSE2)
39907 {
39908 struct expand_vec_perm_d dperm;
39909 rtx tmp1, tmp2;
39910
39911 permute:
39912 memset (&dperm, 0, sizeof (dperm));
39913 dperm.target = target;
39914 dperm.vmode = mode;
39915 dperm.nelt = GET_MODE_NUNITS (mode);
39916 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
39917 dperm.one_operand_p = true;
39918
39919 /* Extend to SImode using a paradoxical SUBREG. */
39920 tmp1 = gen_reg_rtx (SImode);
39921 emit_move_insn (tmp1, gen_lowpart (SImode, val));
39922
39923 /* Insert the SImode value as low element of a V4SImode vector. */
39924 tmp2 = gen_reg_rtx (V4SImode);
39925 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
39926 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
39927
39928 ok = (expand_vec_perm_1 (&dperm)
39929 || expand_vec_perm_broadcast_1 (&dperm));
39930 gcc_assert (ok);
39931 return ok;
39932 }
39933 goto widen;
39934
39935 case V16QImode:
39936 if (TARGET_AVX2)
39937 return ix86_vector_duplicate_value (mode, target, val);
39938
39939 if (TARGET_SSE2)
39940 goto permute;
39941 goto widen;
39942
39943 widen:
39944 /* Replicate the value once into the next wider mode and recurse. */
39945 {
39946 enum machine_mode smode, wsmode, wvmode;
39947 rtx x;
39948
39949 smode = GET_MODE_INNER (mode);
39950 wvmode = get_mode_wider_vector (mode);
39951 wsmode = GET_MODE_INNER (wvmode);
39952
39953 val = convert_modes (wsmode, smode, val, true);
39954 x = expand_simple_binop (wsmode, ASHIFT, val,
39955 GEN_INT (GET_MODE_BITSIZE (smode)),
39956 NULL_RTX, 1, OPTAB_LIB_WIDEN);
39957 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
39958
39959 x = gen_reg_rtx (wvmode);
39960 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
39961 gcc_assert (ok);
39962 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
39963 return ok;
39964 }
39965
39966 case V16HImode:
39967 case V32QImode:
39968 if (TARGET_AVX2)
39969 return ix86_vector_duplicate_value (mode, target, val);
39970 else
39971 {
39972 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
39973 rtx x = gen_reg_rtx (hvmode);
39974
39975 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
39976 gcc_assert (ok);
39977
39978 x = gen_rtx_VEC_CONCAT (mode, x, x);
39979 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39980 }
39981 return true;
39982
39983 case V64QImode:
39984 case V32HImode:
39985 if (TARGET_AVX512BW)
39986 return ix86_vector_duplicate_value (mode, target, val);
39987 else
39988 {
39989 enum machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
39990 rtx x = gen_reg_rtx (hvmode);
39991
39992 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
39993 gcc_assert (ok);
39994
39995 x = gen_rtx_VEC_CONCAT (mode, x, x);
39996 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39997 }
39998 return true;
39999
40000 default:
40001 return false;
40002 }
40003 }
40004
40005 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
40006 whose ONE_VAR element is VAR, and other elements are zero. Return true
40007 if successful. */
40008
40009 static bool
40010 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
40011 rtx target, rtx var, int one_var)
40012 {
40013 enum machine_mode vsimode;
40014 rtx new_target;
40015 rtx x, tmp;
40016 bool use_vector_set = false;
40017
40018 switch (mode)
40019 {
40020 case V2DImode:
40021 /* For SSE4.1, we normally use vector set. But if the second
40022 element is zero and inter-unit moves are OK, we use movq
40023 instead. */
40024 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
40025 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
40026 && one_var == 0));
40027 break;
40028 case V16QImode:
40029 case V4SImode:
40030 case V4SFmode:
40031 use_vector_set = TARGET_SSE4_1;
40032 break;
40033 case V8HImode:
40034 use_vector_set = TARGET_SSE2;
40035 break;
40036 case V4HImode:
40037 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
40038 break;
40039 case V32QImode:
40040 case V16HImode:
40041 case V8SImode:
40042 case V8SFmode:
40043 case V4DFmode:
40044 use_vector_set = TARGET_AVX;
40045 break;
40046 case V4DImode:
40047 /* Use ix86_expand_vector_set in 64bit mode only. */
40048 use_vector_set = TARGET_AVX && TARGET_64BIT;
40049 break;
40050 default:
40051 break;
40052 }
40053
40054 if (use_vector_set)
40055 {
40056 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
40057 var = force_reg (GET_MODE_INNER (mode), var);
40058 ix86_expand_vector_set (mmx_ok, target, var, one_var);
40059 return true;
40060 }
40061
40062 switch (mode)
40063 {
40064 case V2SFmode:
40065 case V2SImode:
40066 if (!mmx_ok)
40067 return false;
40068 /* FALLTHRU */
40069
40070 case V2DFmode:
40071 case V2DImode:
40072 if (one_var != 0)
40073 return false;
40074 var = force_reg (GET_MODE_INNER (mode), var);
40075 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
40076 emit_insn (gen_rtx_SET (VOIDmode, target, x));
40077 return true;
40078
40079 case V4SFmode:
40080 case V4SImode:
40081 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
40082 new_target = gen_reg_rtx (mode);
40083 else
40084 new_target = target;
40085 var = force_reg (GET_MODE_INNER (mode), var);
40086 x = gen_rtx_VEC_DUPLICATE (mode, var);
40087 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
40088 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
40089 if (one_var != 0)
40090 {
40091 /* We need to shuffle the value to the correct position, so
40092 create a new pseudo to store the intermediate result. */
40093
40094 /* With SSE2, we can use the integer shuffle insns. */
40095 if (mode != V4SFmode && TARGET_SSE2)
40096 {
40097 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
40098 const1_rtx,
40099 GEN_INT (one_var == 1 ? 0 : 1),
40100 GEN_INT (one_var == 2 ? 0 : 1),
40101 GEN_INT (one_var == 3 ? 0 : 1)));
40102 if (target != new_target)
40103 emit_move_insn (target, new_target);
40104 return true;
40105 }
40106
40107 /* Otherwise convert the intermediate result to V4SFmode and
40108 use the SSE1 shuffle instructions. */
40109 if (mode != V4SFmode)
40110 {
40111 tmp = gen_reg_rtx (V4SFmode);
40112 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
40113 }
40114 else
40115 tmp = new_target;
40116
40117 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
40118 const1_rtx,
40119 GEN_INT (one_var == 1 ? 0 : 1),
40120 GEN_INT (one_var == 2 ? 0+4 : 1+4),
40121 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
40122
40123 if (mode != V4SFmode)
40124 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
40125 else if (tmp != target)
40126 emit_move_insn (target, tmp);
40127 }
40128 else if (target != new_target)
40129 emit_move_insn (target, new_target);
40130 return true;
40131
40132 case V8HImode:
40133 case V16QImode:
40134 vsimode = V4SImode;
40135 goto widen;
40136 case V4HImode:
40137 case V8QImode:
40138 if (!mmx_ok)
40139 return false;
40140 vsimode = V2SImode;
40141 goto widen;
40142 widen:
40143 if (one_var != 0)
40144 return false;
40145
40146 /* Zero extend the variable element to SImode and recurse. */
40147 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
40148
40149 x = gen_reg_rtx (vsimode);
40150 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
40151 var, one_var))
40152 gcc_unreachable ();
40153
40154 emit_move_insn (target, gen_lowpart (mode, x));
40155 return true;
40156
40157 default:
40158 return false;
40159 }
40160 }
40161
40162 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
40163 consisting of the values in VALS. It is known that all elements
40164 except ONE_VAR are constants. Return true if successful. */
40165
40166 static bool
40167 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
40168 rtx target, rtx vals, int one_var)
40169 {
40170 rtx var = XVECEXP (vals, 0, one_var);
40171 enum machine_mode wmode;
40172 rtx const_vec, x;
40173
40174 const_vec = copy_rtx (vals);
40175 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
40176 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
40177
40178 switch (mode)
40179 {
40180 case V2DFmode:
40181 case V2DImode:
40182 case V2SFmode:
40183 case V2SImode:
40184 /* For the two element vectors, it's just as easy to use
40185 the general case. */
40186 return false;
40187
40188 case V4DImode:
40189 /* Use ix86_expand_vector_set in 64bit mode only. */
40190 if (!TARGET_64BIT)
40191 return false;
40192 case V4DFmode:
40193 case V8SFmode:
40194 case V8SImode:
40195 case V16HImode:
40196 case V32QImode:
40197 case V4SFmode:
40198 case V4SImode:
40199 case V8HImode:
40200 case V4HImode:
40201 break;
40202
40203 case V16QImode:
40204 if (TARGET_SSE4_1)
40205 break;
40206 wmode = V8HImode;
40207 goto widen;
40208 case V8QImode:
40209 wmode = V4HImode;
40210 goto widen;
40211 widen:
40212 /* There's no way to set one QImode entry easily. Combine
40213 the variable value with its adjacent constant value, and
40214 promote to an HImode set. */
40215 x = XVECEXP (vals, 0, one_var ^ 1);
40216 if (one_var & 1)
40217 {
40218 var = convert_modes (HImode, QImode, var, true);
40219 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
40220 NULL_RTX, 1, OPTAB_LIB_WIDEN);
40221 x = GEN_INT (INTVAL (x) & 0xff);
40222 }
40223 else
40224 {
40225 var = convert_modes (HImode, QImode, var, true);
40226 x = gen_int_mode (INTVAL (x) << 8, HImode);
40227 }
40228 if (x != const0_rtx)
40229 var = expand_simple_binop (HImode, IOR, var, x, var,
40230 1, OPTAB_LIB_WIDEN);
40231
40232 x = gen_reg_rtx (wmode);
40233 emit_move_insn (x, gen_lowpart (wmode, const_vec));
40234 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
40235
40236 emit_move_insn (target, gen_lowpart (mode, x));
40237 return true;
40238
40239 default:
40240 return false;
40241 }
40242
40243 emit_move_insn (target, const_vec);
40244 ix86_expand_vector_set (mmx_ok, target, var, one_var);
40245 return true;
40246 }
40247
40248 /* A subroutine of ix86_expand_vector_init_general. Use vector
40249 concatenate to handle the most general case: all values variable,
40250 and none identical. */
40251
40252 static void
40253 ix86_expand_vector_init_concat (enum machine_mode mode,
40254 rtx target, rtx *ops, int n)
40255 {
40256 enum machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
40257 rtx first[16], second[8], third[4];
40258 rtvec v;
40259 int i, j;
40260
40261 switch (n)
40262 {
40263 case 2:
40264 switch (mode)
40265 {
40266 case V16SImode:
40267 cmode = V8SImode;
40268 break;
40269 case V16SFmode:
40270 cmode = V8SFmode;
40271 break;
40272 case V8DImode:
40273 cmode = V4DImode;
40274 break;
40275 case V8DFmode:
40276 cmode = V4DFmode;
40277 break;
40278 case V8SImode:
40279 cmode = V4SImode;
40280 break;
40281 case V8SFmode:
40282 cmode = V4SFmode;
40283 break;
40284 case V4DImode:
40285 cmode = V2DImode;
40286 break;
40287 case V4DFmode:
40288 cmode = V2DFmode;
40289 break;
40290 case V4SImode:
40291 cmode = V2SImode;
40292 break;
40293 case V4SFmode:
40294 cmode = V2SFmode;
40295 break;
40296 case V2DImode:
40297 cmode = DImode;
40298 break;
40299 case V2SImode:
40300 cmode = SImode;
40301 break;
40302 case V2DFmode:
40303 cmode = DFmode;
40304 break;
40305 case V2SFmode:
40306 cmode = SFmode;
40307 break;
40308 default:
40309 gcc_unreachable ();
40310 }
40311
40312 if (!register_operand (ops[1], cmode))
40313 ops[1] = force_reg (cmode, ops[1]);
40314 if (!register_operand (ops[0], cmode))
40315 ops[0] = force_reg (cmode, ops[0]);
40316 emit_insn (gen_rtx_SET (VOIDmode, target,
40317 gen_rtx_VEC_CONCAT (mode, ops[0],
40318 ops[1])));
40319 break;
40320
40321 case 4:
40322 switch (mode)
40323 {
40324 case V4DImode:
40325 cmode = V2DImode;
40326 break;
40327 case V4DFmode:
40328 cmode = V2DFmode;
40329 break;
40330 case V4SImode:
40331 cmode = V2SImode;
40332 break;
40333 case V4SFmode:
40334 cmode = V2SFmode;
40335 break;
40336 default:
40337 gcc_unreachable ();
40338 }
40339 goto half;
40340
40341 case 8:
40342 switch (mode)
40343 {
40344 case V8DImode:
40345 cmode = V2DImode;
40346 hmode = V4DImode;
40347 break;
40348 case V8DFmode:
40349 cmode = V2DFmode;
40350 hmode = V4DFmode;
40351 break;
40352 case V8SImode:
40353 cmode = V2SImode;
40354 hmode = V4SImode;
40355 break;
40356 case V8SFmode:
40357 cmode = V2SFmode;
40358 hmode = V4SFmode;
40359 break;
40360 default:
40361 gcc_unreachable ();
40362 }
40363 goto half;
40364
40365 case 16:
40366 switch (mode)
40367 {
40368 case V16SImode:
40369 cmode = V2SImode;
40370 hmode = V4SImode;
40371 gmode = V8SImode;
40372 break;
40373 case V16SFmode:
40374 cmode = V2SFmode;
40375 hmode = V4SFmode;
40376 gmode = V8SFmode;
40377 break;
40378 default:
40379 gcc_unreachable ();
40380 }
40381 goto half;
40382
40383 half:
40384 /* FIXME: We process inputs backward to help RA. PR 36222. */
40385 i = n - 1;
40386 j = (n >> 1) - 1;
40387 for (; i > 0; i -= 2, j--)
40388 {
40389 first[j] = gen_reg_rtx (cmode);
40390 v = gen_rtvec (2, ops[i - 1], ops[i]);
40391 ix86_expand_vector_init (false, first[j],
40392 gen_rtx_PARALLEL (cmode, v));
40393 }
40394
40395 n >>= 1;
40396 if (n > 4)
40397 {
40398 gcc_assert (hmode != VOIDmode);
40399 gcc_assert (gmode != VOIDmode);
40400 for (i = j = 0; i < n; i += 2, j++)
40401 {
40402 second[j] = gen_reg_rtx (hmode);
40403 ix86_expand_vector_init_concat (hmode, second [j],
40404 &first [i], 2);
40405 }
40406 n >>= 1;
40407 for (i = j = 0; i < n; i += 2, j++)
40408 {
40409 third[j] = gen_reg_rtx (gmode);
40410 ix86_expand_vector_init_concat (gmode, third[j],
40411 &second[i], 2);
40412 }
40413 n >>= 1;
40414 ix86_expand_vector_init_concat (mode, target, third, n);
40415 }
40416 else if (n > 2)
40417 {
40418 gcc_assert (hmode != VOIDmode);
40419 for (i = j = 0; i < n; i += 2, j++)
40420 {
40421 second[j] = gen_reg_rtx (hmode);
40422 ix86_expand_vector_init_concat (hmode, second [j],
40423 &first [i], 2);
40424 }
40425 n >>= 1;
40426 ix86_expand_vector_init_concat (mode, target, second, n);
40427 }
40428 else
40429 ix86_expand_vector_init_concat (mode, target, first, n);
40430 break;
40431
40432 default:
40433 gcc_unreachable ();
40434 }
40435 }
40436
40437 /* A subroutine of ix86_expand_vector_init_general. Use vector
40438 interleave to handle the most general case: all values variable,
40439 and none identical. */
40440
40441 static void
40442 ix86_expand_vector_init_interleave (enum machine_mode mode,
40443 rtx target, rtx *ops, int n)
40444 {
40445 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
40446 int i, j;
40447 rtx op0, op1;
40448 rtx (*gen_load_even) (rtx, rtx, rtx);
40449 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
40450 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
40451
40452 switch (mode)
40453 {
40454 case V8HImode:
40455 gen_load_even = gen_vec_setv8hi;
40456 gen_interleave_first_low = gen_vec_interleave_lowv4si;
40457 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40458 inner_mode = HImode;
40459 first_imode = V4SImode;
40460 second_imode = V2DImode;
40461 third_imode = VOIDmode;
40462 break;
40463 case V16QImode:
40464 gen_load_even = gen_vec_setv16qi;
40465 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
40466 gen_interleave_second_low = gen_vec_interleave_lowv4si;
40467 inner_mode = QImode;
40468 first_imode = V8HImode;
40469 second_imode = V4SImode;
40470 third_imode = V2DImode;
40471 break;
40472 default:
40473 gcc_unreachable ();
40474 }
40475
40476 for (i = 0; i < n; i++)
40477 {
40478 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
40479 op0 = gen_reg_rtx (SImode);
40480 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
40481
40482 /* Insert the SImode value as low element of V4SImode vector. */
40483 op1 = gen_reg_rtx (V4SImode);
40484 op0 = gen_rtx_VEC_MERGE (V4SImode,
40485 gen_rtx_VEC_DUPLICATE (V4SImode,
40486 op0),
40487 CONST0_RTX (V4SImode),
40488 const1_rtx);
40489 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
40490
40491 /* Cast the V4SImode vector back to a vector in orignal mode. */
40492 op0 = gen_reg_rtx (mode);
40493 emit_move_insn (op0, gen_lowpart (mode, op1));
40494
40495 /* Load even elements into the second position. */
40496 emit_insn (gen_load_even (op0,
40497 force_reg (inner_mode,
40498 ops [i + i + 1]),
40499 const1_rtx));
40500
40501 /* Cast vector to FIRST_IMODE vector. */
40502 ops[i] = gen_reg_rtx (first_imode);
40503 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
40504 }
40505
40506 /* Interleave low FIRST_IMODE vectors. */
40507 for (i = j = 0; i < n; i += 2, j++)
40508 {
40509 op0 = gen_reg_rtx (first_imode);
40510 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
40511
40512 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
40513 ops[j] = gen_reg_rtx (second_imode);
40514 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
40515 }
40516
40517 /* Interleave low SECOND_IMODE vectors. */
40518 switch (second_imode)
40519 {
40520 case V4SImode:
40521 for (i = j = 0; i < n / 2; i += 2, j++)
40522 {
40523 op0 = gen_reg_rtx (second_imode);
40524 emit_insn (gen_interleave_second_low (op0, ops[i],
40525 ops[i + 1]));
40526
40527 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
40528 vector. */
40529 ops[j] = gen_reg_rtx (third_imode);
40530 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
40531 }
40532 second_imode = V2DImode;
40533 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40534 /* FALLTHRU */
40535
40536 case V2DImode:
40537 op0 = gen_reg_rtx (second_imode);
40538 emit_insn (gen_interleave_second_low (op0, ops[0],
40539 ops[1]));
40540
40541 /* Cast the SECOND_IMODE vector back to a vector on original
40542 mode. */
40543 emit_insn (gen_rtx_SET (VOIDmode, target,
40544 gen_lowpart (mode, op0)));
40545 break;
40546
40547 default:
40548 gcc_unreachable ();
40549 }
40550 }
40551
40552 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
40553 all values variable, and none identical. */
40554
40555 static void
40556 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
40557 rtx target, rtx vals)
40558 {
40559 rtx ops[64], op0, op1, op2, op3, op4, op5;
40560 enum machine_mode half_mode = VOIDmode;
40561 enum machine_mode quarter_mode = VOIDmode;
40562 int n, i;
40563
40564 switch (mode)
40565 {
40566 case V2SFmode:
40567 case V2SImode:
40568 if (!mmx_ok && !TARGET_SSE)
40569 break;
40570 /* FALLTHRU */
40571
40572 case V16SImode:
40573 case V16SFmode:
40574 case V8DFmode:
40575 case V8DImode:
40576 case V8SFmode:
40577 case V8SImode:
40578 case V4DFmode:
40579 case V4DImode:
40580 case V4SFmode:
40581 case V4SImode:
40582 case V2DFmode:
40583 case V2DImode:
40584 n = GET_MODE_NUNITS (mode);
40585 for (i = 0; i < n; i++)
40586 ops[i] = XVECEXP (vals, 0, i);
40587 ix86_expand_vector_init_concat (mode, target, ops, n);
40588 return;
40589
40590 case V32QImode:
40591 half_mode = V16QImode;
40592 goto half;
40593
40594 case V16HImode:
40595 half_mode = V8HImode;
40596 goto half;
40597
40598 half:
40599 n = GET_MODE_NUNITS (mode);
40600 for (i = 0; i < n; i++)
40601 ops[i] = XVECEXP (vals, 0, i);
40602 op0 = gen_reg_rtx (half_mode);
40603 op1 = gen_reg_rtx (half_mode);
40604 ix86_expand_vector_init_interleave (half_mode, op0, ops,
40605 n >> 2);
40606 ix86_expand_vector_init_interleave (half_mode, op1,
40607 &ops [n >> 1], n >> 2);
40608 emit_insn (gen_rtx_SET (VOIDmode, target,
40609 gen_rtx_VEC_CONCAT (mode, op0, op1)));
40610 return;
40611
40612 case V64QImode:
40613 quarter_mode = V16QImode;
40614 half_mode = V32QImode;
40615 goto quarter;
40616
40617 case V32HImode:
40618 quarter_mode = V8HImode;
40619 half_mode = V16HImode;
40620 goto quarter;
40621
40622 quarter:
40623 n = GET_MODE_NUNITS (mode);
40624 for (i = 0; i < n; i++)
40625 ops[i] = XVECEXP (vals, 0, i);
40626 op0 = gen_reg_rtx (quarter_mode);
40627 op1 = gen_reg_rtx (quarter_mode);
40628 op2 = gen_reg_rtx (quarter_mode);
40629 op3 = gen_reg_rtx (quarter_mode);
40630 op4 = gen_reg_rtx (half_mode);
40631 op5 = gen_reg_rtx (half_mode);
40632 ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
40633 n >> 3);
40634 ix86_expand_vector_init_interleave (quarter_mode, op1,
40635 &ops [n >> 2], n >> 3);
40636 ix86_expand_vector_init_interleave (quarter_mode, op2,
40637 &ops [n >> 1], n >> 3);
40638 ix86_expand_vector_init_interleave (quarter_mode, op3,
40639 &ops [(n >> 1) | (n >> 2)], n >> 3);
40640 emit_insn (gen_rtx_SET (VOIDmode, op4,
40641 gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
40642 emit_insn (gen_rtx_SET (VOIDmode, op5,
40643 gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
40644 emit_insn (gen_rtx_SET (VOIDmode, target,
40645 gen_rtx_VEC_CONCAT (mode, op4, op5)));
40646 return;
40647
40648 case V16QImode:
40649 if (!TARGET_SSE4_1)
40650 break;
40651 /* FALLTHRU */
40652
40653 case V8HImode:
40654 if (!TARGET_SSE2)
40655 break;
40656
40657 /* Don't use ix86_expand_vector_init_interleave if we can't
40658 move from GPR to SSE register directly. */
40659 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
40660 break;
40661
40662 n = GET_MODE_NUNITS (mode);
40663 for (i = 0; i < n; i++)
40664 ops[i] = XVECEXP (vals, 0, i);
40665 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
40666 return;
40667
40668 case V4HImode:
40669 case V8QImode:
40670 break;
40671
40672 default:
40673 gcc_unreachable ();
40674 }
40675
40676 {
40677 int i, j, n_elts, n_words, n_elt_per_word;
40678 enum machine_mode inner_mode;
40679 rtx words[4], shift;
40680
40681 inner_mode = GET_MODE_INNER (mode);
40682 n_elts = GET_MODE_NUNITS (mode);
40683 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
40684 n_elt_per_word = n_elts / n_words;
40685 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
40686
40687 for (i = 0; i < n_words; ++i)
40688 {
40689 rtx word = NULL_RTX;
40690
40691 for (j = 0; j < n_elt_per_word; ++j)
40692 {
40693 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
40694 elt = convert_modes (word_mode, inner_mode, elt, true);
40695
40696 if (j == 0)
40697 word = elt;
40698 else
40699 {
40700 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
40701 word, 1, OPTAB_LIB_WIDEN);
40702 word = expand_simple_binop (word_mode, IOR, word, elt,
40703 word, 1, OPTAB_LIB_WIDEN);
40704 }
40705 }
40706
40707 words[i] = word;
40708 }
40709
40710 if (n_words == 1)
40711 emit_move_insn (target, gen_lowpart (mode, words[0]));
40712 else if (n_words == 2)
40713 {
40714 rtx tmp = gen_reg_rtx (mode);
40715 emit_clobber (tmp);
40716 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
40717 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
40718 emit_move_insn (target, tmp);
40719 }
40720 else if (n_words == 4)
40721 {
40722 rtx tmp = gen_reg_rtx (V4SImode);
40723 gcc_assert (word_mode == SImode);
40724 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
40725 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
40726 emit_move_insn (target, gen_lowpart (mode, tmp));
40727 }
40728 else
40729 gcc_unreachable ();
40730 }
40731 }
40732
40733 /* Initialize vector TARGET via VALS. Suppress the use of MMX
40734 instructions unless MMX_OK is true. */
40735
40736 void
40737 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
40738 {
40739 enum machine_mode mode = GET_MODE (target);
40740 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40741 int n_elts = GET_MODE_NUNITS (mode);
40742 int n_var = 0, one_var = -1;
40743 bool all_same = true, all_const_zero = true;
40744 int i;
40745 rtx x;
40746
40747 for (i = 0; i < n_elts; ++i)
40748 {
40749 x = XVECEXP (vals, 0, i);
40750 if (!(CONST_INT_P (x)
40751 || GET_CODE (x) == CONST_DOUBLE
40752 || GET_CODE (x) == CONST_FIXED))
40753 n_var++, one_var = i;
40754 else if (x != CONST0_RTX (inner_mode))
40755 all_const_zero = false;
40756 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
40757 all_same = false;
40758 }
40759
40760 /* Constants are best loaded from the constant pool. */
40761 if (n_var == 0)
40762 {
40763 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
40764 return;
40765 }
40766
40767 /* If all values are identical, broadcast the value. */
40768 if (all_same
40769 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
40770 XVECEXP (vals, 0, 0)))
40771 return;
40772
40773 /* Values where only one field is non-constant are best loaded from
40774 the pool and overwritten via move later. */
40775 if (n_var == 1)
40776 {
40777 if (all_const_zero
40778 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
40779 XVECEXP (vals, 0, one_var),
40780 one_var))
40781 return;
40782
40783 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
40784 return;
40785 }
40786
40787 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
40788 }
40789
40790 void
40791 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
40792 {
40793 enum machine_mode mode = GET_MODE (target);
40794 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40795 enum machine_mode half_mode;
40796 bool use_vec_merge = false;
40797 rtx tmp;
40798 static rtx (*gen_extract[6][2]) (rtx, rtx)
40799 = {
40800 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
40801 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
40802 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
40803 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
40804 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
40805 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
40806 };
40807 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
40808 = {
40809 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
40810 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
40811 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
40812 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
40813 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
40814 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
40815 };
40816 int i, j, n;
40817
40818 switch (mode)
40819 {
40820 case V2SFmode:
40821 case V2SImode:
40822 if (mmx_ok)
40823 {
40824 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40825 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
40826 if (elt == 0)
40827 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40828 else
40829 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40830 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40831 return;
40832 }
40833 break;
40834
40835 case V2DImode:
40836 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
40837 if (use_vec_merge)
40838 break;
40839
40840 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40841 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
40842 if (elt == 0)
40843 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40844 else
40845 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40846 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40847 return;
40848
40849 case V2DFmode:
40850 {
40851 rtx op0, op1;
40852
40853 /* For the two element vectors, we implement a VEC_CONCAT with
40854 the extraction of the other element. */
40855
40856 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
40857 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
40858
40859 if (elt == 0)
40860 op0 = val, op1 = tmp;
40861 else
40862 op0 = tmp, op1 = val;
40863
40864 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
40865 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40866 }
40867 return;
40868
40869 case V4SFmode:
40870 use_vec_merge = TARGET_SSE4_1;
40871 if (use_vec_merge)
40872 break;
40873
40874 switch (elt)
40875 {
40876 case 0:
40877 use_vec_merge = true;
40878 break;
40879
40880 case 1:
40881 /* tmp = target = A B C D */
40882 tmp = copy_to_reg (target);
40883 /* target = A A B B */
40884 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
40885 /* target = X A B B */
40886 ix86_expand_vector_set (false, target, val, 0);
40887 /* target = A X C D */
40888 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40889 const1_rtx, const0_rtx,
40890 GEN_INT (2+4), GEN_INT (3+4)));
40891 return;
40892
40893 case 2:
40894 /* tmp = target = A B C D */
40895 tmp = copy_to_reg (target);
40896 /* tmp = X B C D */
40897 ix86_expand_vector_set (false, tmp, val, 0);
40898 /* target = A B X D */
40899 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40900 const0_rtx, const1_rtx,
40901 GEN_INT (0+4), GEN_INT (3+4)));
40902 return;
40903
40904 case 3:
40905 /* tmp = target = A B C D */
40906 tmp = copy_to_reg (target);
40907 /* tmp = X B C D */
40908 ix86_expand_vector_set (false, tmp, val, 0);
40909 /* target = A B X D */
40910 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40911 const0_rtx, const1_rtx,
40912 GEN_INT (2+4), GEN_INT (0+4)));
40913 return;
40914
40915 default:
40916 gcc_unreachable ();
40917 }
40918 break;
40919
40920 case V4SImode:
40921 use_vec_merge = TARGET_SSE4_1;
40922 if (use_vec_merge)
40923 break;
40924
40925 /* Element 0 handled by vec_merge below. */
40926 if (elt == 0)
40927 {
40928 use_vec_merge = true;
40929 break;
40930 }
40931
40932 if (TARGET_SSE2)
40933 {
40934 /* With SSE2, use integer shuffles to swap element 0 and ELT,
40935 store into element 0, then shuffle them back. */
40936
40937 rtx order[4];
40938
40939 order[0] = GEN_INT (elt);
40940 order[1] = const1_rtx;
40941 order[2] = const2_rtx;
40942 order[3] = GEN_INT (3);
40943 order[elt] = const0_rtx;
40944
40945 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40946 order[1], order[2], order[3]));
40947
40948 ix86_expand_vector_set (false, target, val, 0);
40949
40950 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40951 order[1], order[2], order[3]));
40952 }
40953 else
40954 {
40955 /* For SSE1, we have to reuse the V4SF code. */
40956 rtx t = gen_reg_rtx (V4SFmode);
40957 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
40958 emit_move_insn (target, gen_lowpart (mode, t));
40959 }
40960 return;
40961
40962 case V8HImode:
40963 use_vec_merge = TARGET_SSE2;
40964 break;
40965 case V4HImode:
40966 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40967 break;
40968
40969 case V16QImode:
40970 use_vec_merge = TARGET_SSE4_1;
40971 break;
40972
40973 case V8QImode:
40974 break;
40975
40976 case V32QImode:
40977 half_mode = V16QImode;
40978 j = 0;
40979 n = 16;
40980 goto half;
40981
40982 case V16HImode:
40983 half_mode = V8HImode;
40984 j = 1;
40985 n = 8;
40986 goto half;
40987
40988 case V8SImode:
40989 half_mode = V4SImode;
40990 j = 2;
40991 n = 4;
40992 goto half;
40993
40994 case V4DImode:
40995 half_mode = V2DImode;
40996 j = 3;
40997 n = 2;
40998 goto half;
40999
41000 case V8SFmode:
41001 half_mode = V4SFmode;
41002 j = 4;
41003 n = 4;
41004 goto half;
41005
41006 case V4DFmode:
41007 half_mode = V2DFmode;
41008 j = 5;
41009 n = 2;
41010 goto half;
41011
41012 half:
41013 /* Compute offset. */
41014 i = elt / n;
41015 elt %= n;
41016
41017 gcc_assert (i <= 1);
41018
41019 /* Extract the half. */
41020 tmp = gen_reg_rtx (half_mode);
41021 emit_insn (gen_extract[j][i] (tmp, target));
41022
41023 /* Put val in tmp at elt. */
41024 ix86_expand_vector_set (false, tmp, val, elt);
41025
41026 /* Put it back. */
41027 emit_insn (gen_insert[j][i] (target, target, tmp));
41028 return;
41029
41030 case V8DFmode:
41031 if (TARGET_AVX512F)
41032 {
41033 tmp = gen_reg_rtx (mode);
41034 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41035 gen_rtx_VEC_DUPLICATE (mode, val)));
41036 emit_insn (gen_avx512f_blendmv8df (target, tmp, target,
41037 force_reg (QImode, GEN_INT (1 << elt))));
41038 return;
41039 }
41040 else
41041 break;
41042 case V8DImode:
41043 if (TARGET_AVX512F)
41044 {
41045 tmp = gen_reg_rtx (mode);
41046 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41047 gen_rtx_VEC_DUPLICATE (mode, val)));
41048 emit_insn (gen_avx512f_blendmv8di (target, tmp, target,
41049 force_reg (QImode, GEN_INT (1 << elt))));
41050 return;
41051 }
41052 else
41053 break;
41054 case V16SFmode:
41055 if (TARGET_AVX512F)
41056 {
41057 tmp = gen_reg_rtx (mode);
41058 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41059 gen_rtx_VEC_DUPLICATE (mode, val)));
41060 emit_insn (gen_avx512f_blendmv16sf (target, tmp, target,
41061 force_reg (HImode, GEN_INT (1 << elt))));
41062 return;
41063 }
41064 else
41065 break;
41066 case V16SImode:
41067 if (TARGET_AVX512F)
41068 {
41069 tmp = gen_reg_rtx (mode);
41070 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41071 gen_rtx_VEC_DUPLICATE (mode, val)));
41072 emit_insn (gen_avx512f_blendmv16si (target, tmp, target,
41073 force_reg (HImode, GEN_INT (1 << elt))));
41074 return;
41075 }
41076 else
41077 break;
41078 case V32HImode:
41079 if (TARGET_AVX512F && TARGET_AVX512BW)
41080 {
41081 tmp = gen_reg_rtx (mode);
41082 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41083 gen_rtx_VEC_DUPLICATE (mode, val)));
41084 emit_insn (gen_avx512bw_blendmv32hi (target, tmp, target,
41085 force_reg (SImode, GEN_INT (1 << elt))));
41086 return;
41087 }
41088 else
41089 break;
41090 case V64QImode:
41091 if (TARGET_AVX512F && TARGET_AVX512BW)
41092 {
41093 tmp = gen_reg_rtx (mode);
41094 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41095 gen_rtx_VEC_DUPLICATE (mode, val)));
41096 emit_insn (gen_avx512bw_blendmv64qi (target, tmp, target,
41097 force_reg (DImode, GEN_INT (1 << elt))));
41098 return;
41099 }
41100 else
41101 break;
41102
41103 default:
41104 break;
41105 }
41106
41107 if (use_vec_merge)
41108 {
41109 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
41110 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
41111 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
41112 }
41113 else
41114 {
41115 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
41116
41117 emit_move_insn (mem, target);
41118
41119 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
41120 emit_move_insn (tmp, val);
41121
41122 emit_move_insn (target, mem);
41123 }
41124 }
41125
41126 void
41127 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
41128 {
41129 enum machine_mode mode = GET_MODE (vec);
41130 enum machine_mode inner_mode = GET_MODE_INNER (mode);
41131 bool use_vec_extr = false;
41132 rtx tmp;
41133
41134 switch (mode)
41135 {
41136 case V2SImode:
41137 case V2SFmode:
41138 if (!mmx_ok)
41139 break;
41140 /* FALLTHRU */
41141
41142 case V2DFmode:
41143 case V2DImode:
41144 use_vec_extr = true;
41145 break;
41146
41147 case V4SFmode:
41148 use_vec_extr = TARGET_SSE4_1;
41149 if (use_vec_extr)
41150 break;
41151
41152 switch (elt)
41153 {
41154 case 0:
41155 tmp = vec;
41156 break;
41157
41158 case 1:
41159 case 3:
41160 tmp = gen_reg_rtx (mode);
41161 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
41162 GEN_INT (elt), GEN_INT (elt),
41163 GEN_INT (elt+4), GEN_INT (elt+4)));
41164 break;
41165
41166 case 2:
41167 tmp = gen_reg_rtx (mode);
41168 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
41169 break;
41170
41171 default:
41172 gcc_unreachable ();
41173 }
41174 vec = tmp;
41175 use_vec_extr = true;
41176 elt = 0;
41177 break;
41178
41179 case V4SImode:
41180 use_vec_extr = TARGET_SSE4_1;
41181 if (use_vec_extr)
41182 break;
41183
41184 if (TARGET_SSE2)
41185 {
41186 switch (elt)
41187 {
41188 case 0:
41189 tmp = vec;
41190 break;
41191
41192 case 1:
41193 case 3:
41194 tmp = gen_reg_rtx (mode);
41195 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
41196 GEN_INT (elt), GEN_INT (elt),
41197 GEN_INT (elt), GEN_INT (elt)));
41198 break;
41199
41200 case 2:
41201 tmp = gen_reg_rtx (mode);
41202 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
41203 break;
41204
41205 default:
41206 gcc_unreachable ();
41207 }
41208 vec = tmp;
41209 use_vec_extr = true;
41210 elt = 0;
41211 }
41212 else
41213 {
41214 /* For SSE1, we have to reuse the V4SF code. */
41215 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
41216 gen_lowpart (V4SFmode, vec), elt);
41217 return;
41218 }
41219 break;
41220
41221 case V8HImode:
41222 use_vec_extr = TARGET_SSE2;
41223 break;
41224 case V4HImode:
41225 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
41226 break;
41227
41228 case V16QImode:
41229 use_vec_extr = TARGET_SSE4_1;
41230 break;
41231
41232 case V8SFmode:
41233 if (TARGET_AVX)
41234 {
41235 tmp = gen_reg_rtx (V4SFmode);
41236 if (elt < 4)
41237 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
41238 else
41239 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
41240 ix86_expand_vector_extract (false, target, tmp, elt & 3);
41241 return;
41242 }
41243 break;
41244
41245 case V4DFmode:
41246 if (TARGET_AVX)
41247 {
41248 tmp = gen_reg_rtx (V2DFmode);
41249 if (elt < 2)
41250 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
41251 else
41252 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
41253 ix86_expand_vector_extract (false, target, tmp, elt & 1);
41254 return;
41255 }
41256 break;
41257
41258 case V32QImode:
41259 if (TARGET_AVX)
41260 {
41261 tmp = gen_reg_rtx (V16QImode);
41262 if (elt < 16)
41263 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
41264 else
41265 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
41266 ix86_expand_vector_extract (false, target, tmp, elt & 15);
41267 return;
41268 }
41269 break;
41270
41271 case V16HImode:
41272 if (TARGET_AVX)
41273 {
41274 tmp = gen_reg_rtx (V8HImode);
41275 if (elt < 8)
41276 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
41277 else
41278 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
41279 ix86_expand_vector_extract (false, target, tmp, elt & 7);
41280 return;
41281 }
41282 break;
41283
41284 case V8SImode:
41285 if (TARGET_AVX)
41286 {
41287 tmp = gen_reg_rtx (V4SImode);
41288 if (elt < 4)
41289 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
41290 else
41291 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
41292 ix86_expand_vector_extract (false, target, tmp, elt & 3);
41293 return;
41294 }
41295 break;
41296
41297 case V4DImode:
41298 if (TARGET_AVX)
41299 {
41300 tmp = gen_reg_rtx (V2DImode);
41301 if (elt < 2)
41302 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
41303 else
41304 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
41305 ix86_expand_vector_extract (false, target, tmp, elt & 1);
41306 return;
41307 }
41308 break;
41309
41310 case V32HImode:
41311 if (TARGET_AVX512BW)
41312 {
41313 tmp = gen_reg_rtx (V16HImode);
41314 if (elt < 16)
41315 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
41316 else
41317 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
41318 ix86_expand_vector_extract (false, target, tmp, elt & 15);
41319 return;
41320 }
41321 break;
41322
41323 case V64QImode:
41324 if (TARGET_AVX512BW)
41325 {
41326 tmp = gen_reg_rtx (V32QImode);
41327 if (elt < 32)
41328 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
41329 else
41330 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
41331 ix86_expand_vector_extract (false, target, tmp, elt & 31);
41332 return;
41333 }
41334 break;
41335
41336 case V16SFmode:
41337 tmp = gen_reg_rtx (V8SFmode);
41338 if (elt < 8)
41339 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
41340 else
41341 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
41342 ix86_expand_vector_extract (false, target, tmp, elt & 7);
41343 return;
41344
41345 case V8DFmode:
41346 tmp = gen_reg_rtx (V4DFmode);
41347 if (elt < 4)
41348 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
41349 else
41350 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
41351 ix86_expand_vector_extract (false, target, tmp, elt & 3);
41352 return;
41353
41354 case V16SImode:
41355 tmp = gen_reg_rtx (V8SImode);
41356 if (elt < 8)
41357 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
41358 else
41359 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
41360 ix86_expand_vector_extract (false, target, tmp, elt & 7);
41361 return;
41362
41363 case V8DImode:
41364 tmp = gen_reg_rtx (V4DImode);
41365 if (elt < 4)
41366 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
41367 else
41368 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
41369 ix86_expand_vector_extract (false, target, tmp, elt & 3);
41370 return;
41371
41372 case V8QImode:
41373 /* ??? Could extract the appropriate HImode element and shift. */
41374 default:
41375 break;
41376 }
41377
41378 if (use_vec_extr)
41379 {
41380 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
41381 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
41382
41383 /* Let the rtl optimizers know about the zero extension performed. */
41384 if (inner_mode == QImode || inner_mode == HImode)
41385 {
41386 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
41387 target = gen_lowpart (SImode, target);
41388 }
41389
41390 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
41391 }
41392 else
41393 {
41394 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
41395
41396 emit_move_insn (mem, vec);
41397
41398 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
41399 emit_move_insn (target, tmp);
41400 }
41401 }
41402
41403 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
41404 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
41405 The upper bits of DEST are undefined, though they shouldn't cause
41406 exceptions (some bits from src or all zeros are ok). */
41407
41408 static void
41409 emit_reduc_half (rtx dest, rtx src, int i)
41410 {
41411 rtx tem, d = dest;
41412 switch (GET_MODE (src))
41413 {
41414 case V4SFmode:
41415 if (i == 128)
41416 tem = gen_sse_movhlps (dest, src, src);
41417 else
41418 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
41419 GEN_INT (1 + 4), GEN_INT (1 + 4));
41420 break;
41421 case V2DFmode:
41422 tem = gen_vec_interleave_highv2df (dest, src, src);
41423 break;
41424 case V16QImode:
41425 case V8HImode:
41426 case V4SImode:
41427 case V2DImode:
41428 d = gen_reg_rtx (V1TImode);
41429 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
41430 GEN_INT (i / 2));
41431 break;
41432 case V8SFmode:
41433 if (i == 256)
41434 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
41435 else
41436 tem = gen_avx_shufps256 (dest, src, src,
41437 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
41438 break;
41439 case V4DFmode:
41440 if (i == 256)
41441 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
41442 else
41443 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
41444 break;
41445 case V32QImode:
41446 case V16HImode:
41447 case V8SImode:
41448 case V4DImode:
41449 if (i == 256)
41450 {
41451 if (GET_MODE (dest) != V4DImode)
41452 d = gen_reg_rtx (V4DImode);
41453 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
41454 gen_lowpart (V4DImode, src),
41455 const1_rtx);
41456 }
41457 else
41458 {
41459 d = gen_reg_rtx (V2TImode);
41460 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
41461 GEN_INT (i / 2));
41462 }
41463 break;
41464 case V64QImode:
41465 case V32HImode:
41466 case V16SImode:
41467 case V16SFmode:
41468 case V8DImode:
41469 case V8DFmode:
41470 if (i > 128)
41471 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
41472 gen_lowpart (V16SImode, src),
41473 gen_lowpart (V16SImode, src),
41474 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
41475 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
41476 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
41477 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
41478 GEN_INT (0xC), GEN_INT (0xD),
41479 GEN_INT (0xE), GEN_INT (0xF),
41480 GEN_INT (0x10), GEN_INT (0x11),
41481 GEN_INT (0x12), GEN_INT (0x13),
41482 GEN_INT (0x14), GEN_INT (0x15),
41483 GEN_INT (0x16), GEN_INT (0x17));
41484 else
41485 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
41486 gen_lowpart (V16SImode, src),
41487 GEN_INT (i == 128 ? 0x2 : 0x1),
41488 GEN_INT (0x3),
41489 GEN_INT (0x3),
41490 GEN_INT (0x3),
41491 GEN_INT (i == 128 ? 0x6 : 0x5),
41492 GEN_INT (0x7),
41493 GEN_INT (0x7),
41494 GEN_INT (0x7),
41495 GEN_INT (i == 128 ? 0xA : 0x9),
41496 GEN_INT (0xB),
41497 GEN_INT (0xB),
41498 GEN_INT (0xB),
41499 GEN_INT (i == 128 ? 0xE : 0xD),
41500 GEN_INT (0xF),
41501 GEN_INT (0xF),
41502 GEN_INT (0xF));
41503 break;
41504 default:
41505 gcc_unreachable ();
41506 }
41507 emit_insn (tem);
41508 if (d != dest)
41509 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
41510 }
41511
41512 /* Expand a vector reduction. FN is the binary pattern to reduce;
41513 DEST is the destination; IN is the input vector. */
41514
41515 void
41516 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
41517 {
41518 rtx half, dst, vec = in;
41519 enum machine_mode mode = GET_MODE (in);
41520 int i;
41521
41522 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
41523 if (TARGET_SSE4_1
41524 && mode == V8HImode
41525 && fn == gen_uminv8hi3)
41526 {
41527 emit_insn (gen_sse4_1_phminposuw (dest, in));
41528 return;
41529 }
41530
41531 for (i = GET_MODE_BITSIZE (mode);
41532 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
41533 i >>= 1)
41534 {
41535 half = gen_reg_rtx (mode);
41536 emit_reduc_half (half, vec, i);
41537 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
41538 dst = dest;
41539 else
41540 dst = gen_reg_rtx (mode);
41541 emit_insn (fn (dst, half, vec));
41542 vec = dst;
41543 }
41544 }
41545 \f
41546 /* Target hook for scalar_mode_supported_p. */
41547 static bool
41548 ix86_scalar_mode_supported_p (enum machine_mode mode)
41549 {
41550 if (DECIMAL_FLOAT_MODE_P (mode))
41551 return default_decimal_float_supported_p ();
41552 else if (mode == TFmode)
41553 return true;
41554 else
41555 return default_scalar_mode_supported_p (mode);
41556 }
41557
41558 /* Implements target hook vector_mode_supported_p. */
41559 static bool
41560 ix86_vector_mode_supported_p (enum machine_mode mode)
41561 {
41562 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
41563 return true;
41564 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
41565 return true;
41566 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
41567 return true;
41568 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
41569 return true;
41570 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
41571 return true;
41572 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
41573 return true;
41574 return false;
41575 }
41576
41577 /* Implement target hook libgcc_floating_mode_supported_p. */
41578 static bool
41579 ix86_libgcc_floating_mode_supported_p (enum machine_mode mode)
41580 {
41581 switch (mode)
41582 {
41583 case SFmode:
41584 case DFmode:
41585 case XFmode:
41586 return true;
41587
41588 case TFmode:
41589 #ifdef IX86_NO_LIBGCC_TFMODE
41590 return false;
41591 #elif defined IX86_MAYBE_NO_LIBGCC_TFMODE
41592 return TARGET_LONG_DOUBLE_128;
41593 #else
41594 return true;
41595 #endif
41596
41597 default:
41598 return false;
41599 }
41600 }
41601
41602 /* Target hook for c_mode_for_suffix. */
41603 static enum machine_mode
41604 ix86_c_mode_for_suffix (char suffix)
41605 {
41606 if (suffix == 'q')
41607 return TFmode;
41608 if (suffix == 'w')
41609 return XFmode;
41610
41611 return VOIDmode;
41612 }
41613
41614 /* Worker function for TARGET_MD_ASM_CLOBBERS.
41615
41616 We do this in the new i386 backend to maintain source compatibility
41617 with the old cc0-based compiler. */
41618
41619 static tree
41620 ix86_md_asm_clobbers (tree, tree, tree clobbers)
41621 {
41622 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
41623 clobbers);
41624 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
41625 clobbers);
41626 return clobbers;
41627 }
41628
41629 /* Implements target vector targetm.asm.encode_section_info. */
41630
41631 static void ATTRIBUTE_UNUSED
41632 ix86_encode_section_info (tree decl, rtx rtl, int first)
41633 {
41634 default_encode_section_info (decl, rtl, first);
41635
41636 if (TREE_CODE (decl) == VAR_DECL
41637 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
41638 && ix86_in_large_data_p (decl))
41639 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
41640 }
41641
41642 /* Worker function for REVERSE_CONDITION. */
41643
41644 enum rtx_code
41645 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
41646 {
41647 return (mode != CCFPmode && mode != CCFPUmode
41648 ? reverse_condition (code)
41649 : reverse_condition_maybe_unordered (code));
41650 }
41651
41652 /* Output code to perform an x87 FP register move, from OPERANDS[1]
41653 to OPERANDS[0]. */
41654
41655 const char *
41656 output_387_reg_move (rtx insn, rtx *operands)
41657 {
41658 if (REG_P (operands[0]))
41659 {
41660 if (REG_P (operands[1])
41661 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41662 {
41663 if (REGNO (operands[0]) == FIRST_STACK_REG)
41664 return output_387_ffreep (operands, 0);
41665 return "fstp\t%y0";
41666 }
41667 if (STACK_TOP_P (operands[0]))
41668 return "fld%Z1\t%y1";
41669 return "fst\t%y0";
41670 }
41671 else if (MEM_P (operands[0]))
41672 {
41673 gcc_assert (REG_P (operands[1]));
41674 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41675 return "fstp%Z0\t%y0";
41676 else
41677 {
41678 /* There is no non-popping store to memory for XFmode.
41679 So if we need one, follow the store with a load. */
41680 if (GET_MODE (operands[0]) == XFmode)
41681 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
41682 else
41683 return "fst%Z0\t%y0";
41684 }
41685 }
41686 else
41687 gcc_unreachable();
41688 }
41689
41690 /* Output code to perform a conditional jump to LABEL, if C2 flag in
41691 FP status register is set. */
41692
41693 void
41694 ix86_emit_fp_unordered_jump (rtx label)
41695 {
41696 rtx reg = gen_reg_rtx (HImode);
41697 rtx temp;
41698
41699 emit_insn (gen_x86_fnstsw_1 (reg));
41700
41701 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
41702 {
41703 emit_insn (gen_x86_sahf_1 (reg));
41704
41705 temp = gen_rtx_REG (CCmode, FLAGS_REG);
41706 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
41707 }
41708 else
41709 {
41710 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
41711
41712 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
41713 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
41714 }
41715
41716 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
41717 gen_rtx_LABEL_REF (VOIDmode, label),
41718 pc_rtx);
41719 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
41720
41721 emit_jump_insn (temp);
41722 predict_jump (REG_BR_PROB_BASE * 10 / 100);
41723 }
41724
41725 /* Output code to perform a log1p XFmode calculation. */
41726
41727 void ix86_emit_i387_log1p (rtx op0, rtx op1)
41728 {
41729 rtx_code_label *label1 = gen_label_rtx ();
41730 rtx_code_label *label2 = gen_label_rtx ();
41731
41732 rtx tmp = gen_reg_rtx (XFmode);
41733 rtx tmp2 = gen_reg_rtx (XFmode);
41734 rtx test;
41735
41736 emit_insn (gen_absxf2 (tmp, op1));
41737 test = gen_rtx_GE (VOIDmode, tmp,
41738 CONST_DOUBLE_FROM_REAL_VALUE (
41739 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
41740 XFmode));
41741 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
41742
41743 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41744 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
41745 emit_jump (label2);
41746
41747 emit_label (label1);
41748 emit_move_insn (tmp, CONST1_RTX (XFmode));
41749 emit_insn (gen_addxf3 (tmp, op1, tmp));
41750 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41751 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
41752
41753 emit_label (label2);
41754 }
41755
41756 /* Emit code for round calculation. */
41757 void ix86_emit_i387_round (rtx op0, rtx op1)
41758 {
41759 enum machine_mode inmode = GET_MODE (op1);
41760 enum machine_mode outmode = GET_MODE (op0);
41761 rtx e1, e2, res, tmp, tmp1, half;
41762 rtx scratch = gen_reg_rtx (HImode);
41763 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
41764 rtx_code_label *jump_label = gen_label_rtx ();
41765 rtx insn;
41766 rtx (*gen_abs) (rtx, rtx);
41767 rtx (*gen_neg) (rtx, rtx);
41768
41769 switch (inmode)
41770 {
41771 case SFmode:
41772 gen_abs = gen_abssf2;
41773 break;
41774 case DFmode:
41775 gen_abs = gen_absdf2;
41776 break;
41777 case XFmode:
41778 gen_abs = gen_absxf2;
41779 break;
41780 default:
41781 gcc_unreachable ();
41782 }
41783
41784 switch (outmode)
41785 {
41786 case SFmode:
41787 gen_neg = gen_negsf2;
41788 break;
41789 case DFmode:
41790 gen_neg = gen_negdf2;
41791 break;
41792 case XFmode:
41793 gen_neg = gen_negxf2;
41794 break;
41795 case HImode:
41796 gen_neg = gen_neghi2;
41797 break;
41798 case SImode:
41799 gen_neg = gen_negsi2;
41800 break;
41801 case DImode:
41802 gen_neg = gen_negdi2;
41803 break;
41804 default:
41805 gcc_unreachable ();
41806 }
41807
41808 e1 = gen_reg_rtx (inmode);
41809 e2 = gen_reg_rtx (inmode);
41810 res = gen_reg_rtx (outmode);
41811
41812 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
41813
41814 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
41815
41816 /* scratch = fxam(op1) */
41817 emit_insn (gen_rtx_SET (VOIDmode, scratch,
41818 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
41819 UNSPEC_FXAM)));
41820 /* e1 = fabs(op1) */
41821 emit_insn (gen_abs (e1, op1));
41822
41823 /* e2 = e1 + 0.5 */
41824 half = force_reg (inmode, half);
41825 emit_insn (gen_rtx_SET (VOIDmode, e2,
41826 gen_rtx_PLUS (inmode, e1, half)));
41827
41828 /* res = floor(e2) */
41829 if (inmode != XFmode)
41830 {
41831 tmp1 = gen_reg_rtx (XFmode);
41832
41833 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
41834 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
41835 }
41836 else
41837 tmp1 = e2;
41838
41839 switch (outmode)
41840 {
41841 case SFmode:
41842 case DFmode:
41843 {
41844 rtx tmp0 = gen_reg_rtx (XFmode);
41845
41846 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
41847
41848 emit_insn (gen_rtx_SET (VOIDmode, res,
41849 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
41850 UNSPEC_TRUNC_NOOP)));
41851 }
41852 break;
41853 case XFmode:
41854 emit_insn (gen_frndintxf2_floor (res, tmp1));
41855 break;
41856 case HImode:
41857 emit_insn (gen_lfloorxfhi2 (res, tmp1));
41858 break;
41859 case SImode:
41860 emit_insn (gen_lfloorxfsi2 (res, tmp1));
41861 break;
41862 case DImode:
41863 emit_insn (gen_lfloorxfdi2 (res, tmp1));
41864 break;
41865 default:
41866 gcc_unreachable ();
41867 }
41868
41869 /* flags = signbit(a) */
41870 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
41871
41872 /* if (flags) then res = -res */
41873 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
41874 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
41875 gen_rtx_LABEL_REF (VOIDmode, jump_label),
41876 pc_rtx);
41877 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41878 predict_jump (REG_BR_PROB_BASE * 50 / 100);
41879 JUMP_LABEL (insn) = jump_label;
41880
41881 emit_insn (gen_neg (res, res));
41882
41883 emit_label (jump_label);
41884 LABEL_NUSES (jump_label) = 1;
41885
41886 emit_move_insn (op0, res);
41887 }
41888
41889 /* Output code to perform a Newton-Rhapson approximation of a single precision
41890 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
41891
41892 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
41893 {
41894 rtx x0, x1, e0, e1;
41895
41896 x0 = gen_reg_rtx (mode);
41897 e0 = gen_reg_rtx (mode);
41898 e1 = gen_reg_rtx (mode);
41899 x1 = gen_reg_rtx (mode);
41900
41901 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
41902
41903 b = force_reg (mode, b);
41904
41905 /* x0 = rcp(b) estimate */
41906 if (mode == V16SFmode || mode == V8DFmode)
41907 emit_insn (gen_rtx_SET (VOIDmode, x0,
41908 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41909 UNSPEC_RCP14)));
41910 else
41911 emit_insn (gen_rtx_SET (VOIDmode, x0,
41912 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41913 UNSPEC_RCP)));
41914
41915 /* e0 = x0 * b */
41916 emit_insn (gen_rtx_SET (VOIDmode, e0,
41917 gen_rtx_MULT (mode, x0, b)));
41918
41919 /* e0 = x0 * e0 */
41920 emit_insn (gen_rtx_SET (VOIDmode, e0,
41921 gen_rtx_MULT (mode, x0, e0)));
41922
41923 /* e1 = x0 + x0 */
41924 emit_insn (gen_rtx_SET (VOIDmode, e1,
41925 gen_rtx_PLUS (mode, x0, x0)));
41926
41927 /* x1 = e1 - e0 */
41928 emit_insn (gen_rtx_SET (VOIDmode, x1,
41929 gen_rtx_MINUS (mode, e1, e0)));
41930
41931 /* res = a * x1 */
41932 emit_insn (gen_rtx_SET (VOIDmode, res,
41933 gen_rtx_MULT (mode, a, x1)));
41934 }
41935
41936 /* Output code to perform a Newton-Rhapson approximation of a
41937 single precision floating point [reciprocal] square root. */
41938
41939 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
41940 bool recip)
41941 {
41942 rtx x0, e0, e1, e2, e3, mthree, mhalf;
41943 REAL_VALUE_TYPE r;
41944 int unspec;
41945
41946 x0 = gen_reg_rtx (mode);
41947 e0 = gen_reg_rtx (mode);
41948 e1 = gen_reg_rtx (mode);
41949 e2 = gen_reg_rtx (mode);
41950 e3 = gen_reg_rtx (mode);
41951
41952 real_from_integer (&r, VOIDmode, -3, SIGNED);
41953 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41954
41955 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
41956 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41957 unspec = UNSPEC_RSQRT;
41958
41959 if (VECTOR_MODE_P (mode))
41960 {
41961 mthree = ix86_build_const_vector (mode, true, mthree);
41962 mhalf = ix86_build_const_vector (mode, true, mhalf);
41963 /* There is no 512-bit rsqrt. There is however rsqrt14. */
41964 if (GET_MODE_SIZE (mode) == 64)
41965 unspec = UNSPEC_RSQRT14;
41966 }
41967
41968 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
41969 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
41970
41971 a = force_reg (mode, a);
41972
41973 /* x0 = rsqrt(a) estimate */
41974 emit_insn (gen_rtx_SET (VOIDmode, x0,
41975 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
41976 unspec)));
41977
41978 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
41979 if (!recip)
41980 {
41981 rtx zero, mask;
41982
41983 zero = gen_reg_rtx (mode);
41984 mask = gen_reg_rtx (mode);
41985
41986 zero = force_reg (mode, CONST0_RTX(mode));
41987
41988 /* Handle masked compare. */
41989 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
41990 {
41991 mask = gen_reg_rtx (HImode);
41992 /* Imm value 0x4 corresponds to not-equal comparison. */
41993 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
41994 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
41995 }
41996 else
41997 {
41998 emit_insn (gen_rtx_SET (VOIDmode, mask,
41999 gen_rtx_NE (mode, zero, a)));
42000
42001 emit_insn (gen_rtx_SET (VOIDmode, x0,
42002 gen_rtx_AND (mode, x0, mask)));
42003 }
42004 }
42005
42006 /* e0 = x0 * a */
42007 emit_insn (gen_rtx_SET (VOIDmode, e0,
42008 gen_rtx_MULT (mode, x0, a)));
42009 /* e1 = e0 * x0 */
42010 emit_insn (gen_rtx_SET (VOIDmode, e1,
42011 gen_rtx_MULT (mode, e0, x0)));
42012
42013 /* e2 = e1 - 3. */
42014 mthree = force_reg (mode, mthree);
42015 emit_insn (gen_rtx_SET (VOIDmode, e2,
42016 gen_rtx_PLUS (mode, e1, mthree)));
42017
42018 mhalf = force_reg (mode, mhalf);
42019 if (recip)
42020 /* e3 = -.5 * x0 */
42021 emit_insn (gen_rtx_SET (VOIDmode, e3,
42022 gen_rtx_MULT (mode, x0, mhalf)));
42023 else
42024 /* e3 = -.5 * e0 */
42025 emit_insn (gen_rtx_SET (VOIDmode, e3,
42026 gen_rtx_MULT (mode, e0, mhalf)));
42027 /* ret = e2 * e3 */
42028 emit_insn (gen_rtx_SET (VOIDmode, res,
42029 gen_rtx_MULT (mode, e2, e3)));
42030 }
42031
42032 #ifdef TARGET_SOLARIS
42033 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
42034
42035 static void
42036 i386_solaris_elf_named_section (const char *name, unsigned int flags,
42037 tree decl)
42038 {
42039 /* With Binutils 2.15, the "@unwind" marker must be specified on
42040 every occurrence of the ".eh_frame" section, not just the first
42041 one. */
42042 if (TARGET_64BIT
42043 && strcmp (name, ".eh_frame") == 0)
42044 {
42045 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
42046 flags & SECTION_WRITE ? "aw" : "a");
42047 return;
42048 }
42049
42050 #ifndef USE_GAS
42051 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
42052 {
42053 solaris_elf_asm_comdat_section (name, flags, decl);
42054 return;
42055 }
42056 #endif
42057
42058 default_elf_asm_named_section (name, flags, decl);
42059 }
42060 #endif /* TARGET_SOLARIS */
42061
42062 /* Return the mangling of TYPE if it is an extended fundamental type. */
42063
42064 static const char *
42065 ix86_mangle_type (const_tree type)
42066 {
42067 type = TYPE_MAIN_VARIANT (type);
42068
42069 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
42070 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
42071 return NULL;
42072
42073 switch (TYPE_MODE (type))
42074 {
42075 case TFmode:
42076 /* __float128 is "g". */
42077 return "g";
42078 case XFmode:
42079 /* "long double" or __float80 is "e". */
42080 return "e";
42081 default:
42082 return NULL;
42083 }
42084 }
42085
42086 /* For 32-bit code we can save PIC register setup by using
42087 __stack_chk_fail_local hidden function instead of calling
42088 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
42089 register, so it is better to call __stack_chk_fail directly. */
42090
42091 static tree ATTRIBUTE_UNUSED
42092 ix86_stack_protect_fail (void)
42093 {
42094 return TARGET_64BIT
42095 ? default_external_stack_protect_fail ()
42096 : default_hidden_stack_protect_fail ();
42097 }
42098
42099 /* Select a format to encode pointers in exception handling data. CODE
42100 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
42101 true if the symbol may be affected by dynamic relocations.
42102
42103 ??? All x86 object file formats are capable of representing this.
42104 After all, the relocation needed is the same as for the call insn.
42105 Whether or not a particular assembler allows us to enter such, I
42106 guess we'll have to see. */
42107 int
42108 asm_preferred_eh_data_format (int code, int global)
42109 {
42110 if (flag_pic)
42111 {
42112 int type = DW_EH_PE_sdata8;
42113 if (!TARGET_64BIT
42114 || ix86_cmodel == CM_SMALL_PIC
42115 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
42116 type = DW_EH_PE_sdata4;
42117 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
42118 }
42119 if (ix86_cmodel == CM_SMALL
42120 || (ix86_cmodel == CM_MEDIUM && code))
42121 return DW_EH_PE_udata4;
42122 return DW_EH_PE_absptr;
42123 }
42124 \f
42125 /* Expand copysign from SIGN to the positive value ABS_VALUE
42126 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
42127 the sign-bit. */
42128 static void
42129 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
42130 {
42131 enum machine_mode mode = GET_MODE (sign);
42132 rtx sgn = gen_reg_rtx (mode);
42133 if (mask == NULL_RTX)
42134 {
42135 enum machine_mode vmode;
42136
42137 if (mode == SFmode)
42138 vmode = V4SFmode;
42139 else if (mode == DFmode)
42140 vmode = V2DFmode;
42141 else
42142 vmode = mode;
42143
42144 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
42145 if (!VECTOR_MODE_P (mode))
42146 {
42147 /* We need to generate a scalar mode mask in this case. */
42148 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
42149 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
42150 mask = gen_reg_rtx (mode);
42151 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
42152 }
42153 }
42154 else
42155 mask = gen_rtx_NOT (mode, mask);
42156 emit_insn (gen_rtx_SET (VOIDmode, sgn,
42157 gen_rtx_AND (mode, mask, sign)));
42158 emit_insn (gen_rtx_SET (VOIDmode, result,
42159 gen_rtx_IOR (mode, abs_value, sgn)));
42160 }
42161
42162 /* Expand fabs (OP0) and return a new rtx that holds the result. The
42163 mask for masking out the sign-bit is stored in *SMASK, if that is
42164 non-null. */
42165 static rtx
42166 ix86_expand_sse_fabs (rtx op0, rtx *smask)
42167 {
42168 enum machine_mode vmode, mode = GET_MODE (op0);
42169 rtx xa, mask;
42170
42171 xa = gen_reg_rtx (mode);
42172 if (mode == SFmode)
42173 vmode = V4SFmode;
42174 else if (mode == DFmode)
42175 vmode = V2DFmode;
42176 else
42177 vmode = mode;
42178 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
42179 if (!VECTOR_MODE_P (mode))
42180 {
42181 /* We need to generate a scalar mode mask in this case. */
42182 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
42183 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
42184 mask = gen_reg_rtx (mode);
42185 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
42186 }
42187 emit_insn (gen_rtx_SET (VOIDmode, xa,
42188 gen_rtx_AND (mode, op0, mask)));
42189
42190 if (smask)
42191 *smask = mask;
42192
42193 return xa;
42194 }
42195
42196 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
42197 swapping the operands if SWAP_OPERANDS is true. The expanded
42198 code is a forward jump to a newly created label in case the
42199 comparison is true. The generated label rtx is returned. */
42200 static rtx_code_label *
42201 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
42202 bool swap_operands)
42203 {
42204 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
42205 rtx_code_label *label;
42206 rtx tmp;
42207
42208 if (swap_operands)
42209 {
42210 tmp = op0;
42211 op0 = op1;
42212 op1 = tmp;
42213 }
42214
42215 label = gen_label_rtx ();
42216 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
42217 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42218 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
42219 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
42220 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
42221 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
42222 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
42223 JUMP_LABEL (tmp) = label;
42224
42225 return label;
42226 }
42227
42228 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
42229 using comparison code CODE. Operands are swapped for the comparison if
42230 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
42231 static rtx
42232 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
42233 bool swap_operands)
42234 {
42235 rtx (*insn)(rtx, rtx, rtx, rtx);
42236 enum machine_mode mode = GET_MODE (op0);
42237 rtx mask = gen_reg_rtx (mode);
42238
42239 if (swap_operands)
42240 {
42241 rtx tmp = op0;
42242 op0 = op1;
42243 op1 = tmp;
42244 }
42245
42246 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
42247
42248 emit_insn (insn (mask, op0, op1,
42249 gen_rtx_fmt_ee (code, mode, op0, op1)));
42250 return mask;
42251 }
42252
42253 /* Generate and return a rtx of mode MODE for 2**n where n is the number
42254 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
42255 static rtx
42256 ix86_gen_TWO52 (enum machine_mode mode)
42257 {
42258 REAL_VALUE_TYPE TWO52r;
42259 rtx TWO52;
42260
42261 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
42262 TWO52 = const_double_from_real_value (TWO52r, mode);
42263 TWO52 = force_reg (mode, TWO52);
42264
42265 return TWO52;
42266 }
42267
42268 /* Expand SSE sequence for computing lround from OP1 storing
42269 into OP0. */
42270 void
42271 ix86_expand_lround (rtx op0, rtx op1)
42272 {
42273 /* C code for the stuff we're doing below:
42274 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
42275 return (long)tmp;
42276 */
42277 enum machine_mode mode = GET_MODE (op1);
42278 const struct real_format *fmt;
42279 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42280 rtx adj;
42281
42282 /* load nextafter (0.5, 0.0) */
42283 fmt = REAL_MODE_FORMAT (mode);
42284 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42285 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42286
42287 /* adj = copysign (0.5, op1) */
42288 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
42289 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
42290
42291 /* adj = op1 + adj */
42292 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
42293
42294 /* op0 = (imode)adj */
42295 expand_fix (op0, adj, 0);
42296 }
42297
42298 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
42299 into OPERAND0. */
42300 void
42301 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
42302 {
42303 /* C code for the stuff we're doing below (for do_floor):
42304 xi = (long)op1;
42305 xi -= (double)xi > op1 ? 1 : 0;
42306 return xi;
42307 */
42308 enum machine_mode fmode = GET_MODE (op1);
42309 enum machine_mode imode = GET_MODE (op0);
42310 rtx ireg, freg, tmp;
42311 rtx_code_label *label;
42312
42313 /* reg = (long)op1 */
42314 ireg = gen_reg_rtx (imode);
42315 expand_fix (ireg, op1, 0);
42316
42317 /* freg = (double)reg */
42318 freg = gen_reg_rtx (fmode);
42319 expand_float (freg, ireg, 0);
42320
42321 /* ireg = (freg > op1) ? ireg - 1 : ireg */
42322 label = ix86_expand_sse_compare_and_jump (UNLE,
42323 freg, op1, !do_floor);
42324 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
42325 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
42326 emit_move_insn (ireg, tmp);
42327
42328 emit_label (label);
42329 LABEL_NUSES (label) = 1;
42330
42331 emit_move_insn (op0, ireg);
42332 }
42333
42334 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
42335 result in OPERAND0. */
42336 void
42337 ix86_expand_rint (rtx operand0, rtx operand1)
42338 {
42339 /* C code for the stuff we're doing below:
42340 xa = fabs (operand1);
42341 if (!isless (xa, 2**52))
42342 return operand1;
42343 xa = xa + 2**52 - 2**52;
42344 return copysign (xa, operand1);
42345 */
42346 enum machine_mode mode = GET_MODE (operand0);
42347 rtx res, xa, TWO52, mask;
42348 rtx_code_label *label;
42349
42350 res = gen_reg_rtx (mode);
42351 emit_move_insn (res, operand1);
42352
42353 /* xa = abs (operand1) */
42354 xa = ix86_expand_sse_fabs (res, &mask);
42355
42356 /* if (!isless (xa, TWO52)) goto label; */
42357 TWO52 = ix86_gen_TWO52 (mode);
42358 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42359
42360 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42361 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
42362
42363 ix86_sse_copysign_to_positive (res, xa, res, mask);
42364
42365 emit_label (label);
42366 LABEL_NUSES (label) = 1;
42367
42368 emit_move_insn (operand0, res);
42369 }
42370
42371 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
42372 into OPERAND0. */
42373 void
42374 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
42375 {
42376 /* C code for the stuff we expand below.
42377 double xa = fabs (x), x2;
42378 if (!isless (xa, TWO52))
42379 return x;
42380 xa = xa + TWO52 - TWO52;
42381 x2 = copysign (xa, x);
42382 Compensate. Floor:
42383 if (x2 > x)
42384 x2 -= 1;
42385 Compensate. Ceil:
42386 if (x2 < x)
42387 x2 -= -1;
42388 return x2;
42389 */
42390 enum machine_mode mode = GET_MODE (operand0);
42391 rtx xa, TWO52, tmp, one, res, mask;
42392 rtx_code_label *label;
42393
42394 TWO52 = ix86_gen_TWO52 (mode);
42395
42396 /* Temporary for holding the result, initialized to the input
42397 operand to ease control flow. */
42398 res = gen_reg_rtx (mode);
42399 emit_move_insn (res, operand1);
42400
42401 /* xa = abs (operand1) */
42402 xa = ix86_expand_sse_fabs (res, &mask);
42403
42404 /* if (!isless (xa, TWO52)) goto label; */
42405 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42406
42407 /* xa = xa + TWO52 - TWO52; */
42408 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42409 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
42410
42411 /* xa = copysign (xa, operand1) */
42412 ix86_sse_copysign_to_positive (xa, xa, res, mask);
42413
42414 /* generate 1.0 or -1.0 */
42415 one = force_reg (mode,
42416 const_double_from_real_value (do_floor
42417 ? dconst1 : dconstm1, mode));
42418
42419 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
42420 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
42421 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42422 gen_rtx_AND (mode, one, tmp)));
42423 /* We always need to subtract here to preserve signed zero. */
42424 tmp = expand_simple_binop (mode, MINUS,
42425 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42426 emit_move_insn (res, tmp);
42427
42428 emit_label (label);
42429 LABEL_NUSES (label) = 1;
42430
42431 emit_move_insn (operand0, res);
42432 }
42433
42434 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
42435 into OPERAND0. */
42436 void
42437 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
42438 {
42439 /* C code for the stuff we expand below.
42440 double xa = fabs (x), x2;
42441 if (!isless (xa, TWO52))
42442 return x;
42443 x2 = (double)(long)x;
42444 Compensate. Floor:
42445 if (x2 > x)
42446 x2 -= 1;
42447 Compensate. Ceil:
42448 if (x2 < x)
42449 x2 += 1;
42450 if (HONOR_SIGNED_ZEROS (mode))
42451 return copysign (x2, x);
42452 return x2;
42453 */
42454 enum machine_mode mode = GET_MODE (operand0);
42455 rtx xa, xi, TWO52, tmp, one, res, mask;
42456 rtx_code_label *label;
42457
42458 TWO52 = ix86_gen_TWO52 (mode);
42459
42460 /* Temporary for holding the result, initialized to the input
42461 operand to ease control flow. */
42462 res = gen_reg_rtx (mode);
42463 emit_move_insn (res, operand1);
42464
42465 /* xa = abs (operand1) */
42466 xa = ix86_expand_sse_fabs (res, &mask);
42467
42468 /* if (!isless (xa, TWO52)) goto label; */
42469 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42470
42471 /* xa = (double)(long)x */
42472 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42473 expand_fix (xi, res, 0);
42474 expand_float (xa, xi, 0);
42475
42476 /* generate 1.0 */
42477 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42478
42479 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
42480 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
42481 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42482 gen_rtx_AND (mode, one, tmp)));
42483 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
42484 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42485 emit_move_insn (res, tmp);
42486
42487 if (HONOR_SIGNED_ZEROS (mode))
42488 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42489
42490 emit_label (label);
42491 LABEL_NUSES (label) = 1;
42492
42493 emit_move_insn (operand0, res);
42494 }
42495
42496 /* Expand SSE sequence for computing round from OPERAND1 storing
42497 into OPERAND0. Sequence that works without relying on DImode truncation
42498 via cvttsd2siq that is only available on 64bit targets. */
42499 void
42500 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
42501 {
42502 /* C code for the stuff we expand below.
42503 double xa = fabs (x), xa2, x2;
42504 if (!isless (xa, TWO52))
42505 return x;
42506 Using the absolute value and copying back sign makes
42507 -0.0 -> -0.0 correct.
42508 xa2 = xa + TWO52 - TWO52;
42509 Compensate.
42510 dxa = xa2 - xa;
42511 if (dxa <= -0.5)
42512 xa2 += 1;
42513 else if (dxa > 0.5)
42514 xa2 -= 1;
42515 x2 = copysign (xa2, x);
42516 return x2;
42517 */
42518 enum machine_mode mode = GET_MODE (operand0);
42519 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
42520 rtx_code_label *label;
42521
42522 TWO52 = ix86_gen_TWO52 (mode);
42523
42524 /* Temporary for holding the result, initialized to the input
42525 operand to ease control flow. */
42526 res = gen_reg_rtx (mode);
42527 emit_move_insn (res, operand1);
42528
42529 /* xa = abs (operand1) */
42530 xa = ix86_expand_sse_fabs (res, &mask);
42531
42532 /* if (!isless (xa, TWO52)) goto label; */
42533 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42534
42535 /* xa2 = xa + TWO52 - TWO52; */
42536 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42537 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
42538
42539 /* dxa = xa2 - xa; */
42540 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
42541
42542 /* generate 0.5, 1.0 and -0.5 */
42543 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
42544 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
42545 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
42546 0, OPTAB_DIRECT);
42547
42548 /* Compensate. */
42549 tmp = gen_reg_rtx (mode);
42550 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
42551 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
42552 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42553 gen_rtx_AND (mode, one, tmp)));
42554 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42555 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
42556 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
42557 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42558 gen_rtx_AND (mode, one, tmp)));
42559 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42560
42561 /* res = copysign (xa2, operand1) */
42562 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
42563
42564 emit_label (label);
42565 LABEL_NUSES (label) = 1;
42566
42567 emit_move_insn (operand0, res);
42568 }
42569
42570 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42571 into OPERAND0. */
42572 void
42573 ix86_expand_trunc (rtx operand0, rtx operand1)
42574 {
42575 /* C code for SSE variant we expand below.
42576 double xa = fabs (x), x2;
42577 if (!isless (xa, TWO52))
42578 return x;
42579 x2 = (double)(long)x;
42580 if (HONOR_SIGNED_ZEROS (mode))
42581 return copysign (x2, x);
42582 return x2;
42583 */
42584 enum machine_mode mode = GET_MODE (operand0);
42585 rtx xa, xi, TWO52, res, mask;
42586 rtx_code_label *label;
42587
42588 TWO52 = ix86_gen_TWO52 (mode);
42589
42590 /* Temporary for holding the result, initialized to the input
42591 operand to ease control flow. */
42592 res = gen_reg_rtx (mode);
42593 emit_move_insn (res, operand1);
42594
42595 /* xa = abs (operand1) */
42596 xa = ix86_expand_sse_fabs (res, &mask);
42597
42598 /* if (!isless (xa, TWO52)) goto label; */
42599 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42600
42601 /* x = (double)(long)x */
42602 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42603 expand_fix (xi, res, 0);
42604 expand_float (res, xi, 0);
42605
42606 if (HONOR_SIGNED_ZEROS (mode))
42607 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42608
42609 emit_label (label);
42610 LABEL_NUSES (label) = 1;
42611
42612 emit_move_insn (operand0, res);
42613 }
42614
42615 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42616 into OPERAND0. */
42617 void
42618 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
42619 {
42620 enum machine_mode mode = GET_MODE (operand0);
42621 rtx xa, mask, TWO52, one, res, smask, tmp;
42622 rtx_code_label *label;
42623
42624 /* C code for SSE variant we expand below.
42625 double xa = fabs (x), x2;
42626 if (!isless (xa, TWO52))
42627 return x;
42628 xa2 = xa + TWO52 - TWO52;
42629 Compensate:
42630 if (xa2 > xa)
42631 xa2 -= 1.0;
42632 x2 = copysign (xa2, x);
42633 return x2;
42634 */
42635
42636 TWO52 = ix86_gen_TWO52 (mode);
42637
42638 /* Temporary for holding the result, initialized to the input
42639 operand to ease control flow. */
42640 res = gen_reg_rtx (mode);
42641 emit_move_insn (res, operand1);
42642
42643 /* xa = abs (operand1) */
42644 xa = ix86_expand_sse_fabs (res, &smask);
42645
42646 /* if (!isless (xa, TWO52)) goto label; */
42647 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42648
42649 /* res = xa + TWO52 - TWO52; */
42650 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42651 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
42652 emit_move_insn (res, tmp);
42653
42654 /* generate 1.0 */
42655 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42656
42657 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
42658 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
42659 emit_insn (gen_rtx_SET (VOIDmode, mask,
42660 gen_rtx_AND (mode, mask, one)));
42661 tmp = expand_simple_binop (mode, MINUS,
42662 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
42663 emit_move_insn (res, tmp);
42664
42665 /* res = copysign (res, operand1) */
42666 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
42667
42668 emit_label (label);
42669 LABEL_NUSES (label) = 1;
42670
42671 emit_move_insn (operand0, res);
42672 }
42673
42674 /* Expand SSE sequence for computing round from OPERAND1 storing
42675 into OPERAND0. */
42676 void
42677 ix86_expand_round (rtx operand0, rtx operand1)
42678 {
42679 /* C code for the stuff we're doing below:
42680 double xa = fabs (x);
42681 if (!isless (xa, TWO52))
42682 return x;
42683 xa = (double)(long)(xa + nextafter (0.5, 0.0));
42684 return copysign (xa, x);
42685 */
42686 enum machine_mode mode = GET_MODE (operand0);
42687 rtx res, TWO52, xa, xi, half, mask;
42688 rtx_code_label *label;
42689 const struct real_format *fmt;
42690 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42691
42692 /* Temporary for holding the result, initialized to the input
42693 operand to ease control flow. */
42694 res = gen_reg_rtx (mode);
42695 emit_move_insn (res, operand1);
42696
42697 TWO52 = ix86_gen_TWO52 (mode);
42698 xa = ix86_expand_sse_fabs (res, &mask);
42699 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42700
42701 /* load nextafter (0.5, 0.0) */
42702 fmt = REAL_MODE_FORMAT (mode);
42703 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42704 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42705
42706 /* xa = xa + 0.5 */
42707 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
42708 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
42709
42710 /* xa = (double)(int64_t)xa */
42711 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42712 expand_fix (xi, xa, 0);
42713 expand_float (xa, xi, 0);
42714
42715 /* res = copysign (xa, operand1) */
42716 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
42717
42718 emit_label (label);
42719 LABEL_NUSES (label) = 1;
42720
42721 emit_move_insn (operand0, res);
42722 }
42723
42724 /* Expand SSE sequence for computing round
42725 from OP1 storing into OP0 using sse4 round insn. */
42726 void
42727 ix86_expand_round_sse4 (rtx op0, rtx op1)
42728 {
42729 enum machine_mode mode = GET_MODE (op0);
42730 rtx e1, e2, res, half;
42731 const struct real_format *fmt;
42732 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42733 rtx (*gen_copysign) (rtx, rtx, rtx);
42734 rtx (*gen_round) (rtx, rtx, rtx);
42735
42736 switch (mode)
42737 {
42738 case SFmode:
42739 gen_copysign = gen_copysignsf3;
42740 gen_round = gen_sse4_1_roundsf2;
42741 break;
42742 case DFmode:
42743 gen_copysign = gen_copysigndf3;
42744 gen_round = gen_sse4_1_rounddf2;
42745 break;
42746 default:
42747 gcc_unreachable ();
42748 }
42749
42750 /* round (a) = trunc (a + copysign (0.5, a)) */
42751
42752 /* load nextafter (0.5, 0.0) */
42753 fmt = REAL_MODE_FORMAT (mode);
42754 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42755 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42756 half = const_double_from_real_value (pred_half, mode);
42757
42758 /* e1 = copysign (0.5, op1) */
42759 e1 = gen_reg_rtx (mode);
42760 emit_insn (gen_copysign (e1, half, op1));
42761
42762 /* e2 = op1 + e1 */
42763 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
42764
42765 /* res = trunc (e2) */
42766 res = gen_reg_rtx (mode);
42767 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
42768
42769 emit_move_insn (op0, res);
42770 }
42771 \f
42772
42773 /* Table of valid machine attributes. */
42774 static const struct attribute_spec ix86_attribute_table[] =
42775 {
42776 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
42777 affects_type_identity } */
42778 /* Stdcall attribute says callee is responsible for popping arguments
42779 if they are not variable. */
42780 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42781 true },
42782 /* Fastcall attribute says callee is responsible for popping arguments
42783 if they are not variable. */
42784 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42785 true },
42786 /* Thiscall attribute says callee is responsible for popping arguments
42787 if they are not variable. */
42788 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42789 true },
42790 /* Cdecl attribute says the callee is a normal C declaration */
42791 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42792 true },
42793 /* Regparm attribute specifies how many integer arguments are to be
42794 passed in registers. */
42795 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
42796 true },
42797 /* Sseregparm attribute says we are using x86_64 calling conventions
42798 for FP arguments. */
42799 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42800 true },
42801 /* The transactional memory builtins are implicitly regparm or fastcall
42802 depending on the ABI. Override the generic do-nothing attribute that
42803 these builtins were declared with. */
42804 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
42805 true },
42806 /* force_align_arg_pointer says this function realigns the stack at entry. */
42807 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
42808 false, true, true, ix86_handle_cconv_attribute, false },
42809 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42810 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
42811 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
42812 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
42813 false },
42814 #endif
42815 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42816 false },
42817 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42818 false },
42819 #ifdef SUBTARGET_ATTRIBUTE_TABLE
42820 SUBTARGET_ATTRIBUTE_TABLE,
42821 #endif
42822 /* ms_abi and sysv_abi calling convention function attributes. */
42823 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42824 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42825 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
42826 false },
42827 { "callee_pop_aggregate_return", 1, 1, false, true, true,
42828 ix86_handle_callee_pop_aggregate_return, true },
42829 /* End element. */
42830 { NULL, 0, 0, false, false, false, NULL, false }
42831 };
42832
42833 /* Implement targetm.vectorize.builtin_vectorization_cost. */
42834 static int
42835 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
42836 tree vectype, int)
42837 {
42838 unsigned elements;
42839
42840 switch (type_of_cost)
42841 {
42842 case scalar_stmt:
42843 return ix86_cost->scalar_stmt_cost;
42844
42845 case scalar_load:
42846 return ix86_cost->scalar_load_cost;
42847
42848 case scalar_store:
42849 return ix86_cost->scalar_store_cost;
42850
42851 case vector_stmt:
42852 return ix86_cost->vec_stmt_cost;
42853
42854 case vector_load:
42855 return ix86_cost->vec_align_load_cost;
42856
42857 case vector_store:
42858 return ix86_cost->vec_store_cost;
42859
42860 case vec_to_scalar:
42861 return ix86_cost->vec_to_scalar_cost;
42862
42863 case scalar_to_vec:
42864 return ix86_cost->scalar_to_vec_cost;
42865
42866 case unaligned_load:
42867 case unaligned_store:
42868 return ix86_cost->vec_unalign_load_cost;
42869
42870 case cond_branch_taken:
42871 return ix86_cost->cond_taken_branch_cost;
42872
42873 case cond_branch_not_taken:
42874 return ix86_cost->cond_not_taken_branch_cost;
42875
42876 case vec_perm:
42877 case vec_promote_demote:
42878 return ix86_cost->vec_stmt_cost;
42879
42880 case vec_construct:
42881 elements = TYPE_VECTOR_SUBPARTS (vectype);
42882 return elements / 2 + 1;
42883
42884 default:
42885 gcc_unreachable ();
42886 }
42887 }
42888
42889 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
42890 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
42891 insn every time. */
42892
42893 static GTY(()) rtx_insn *vselect_insn;
42894
42895 /* Initialize vselect_insn. */
42896
42897 static void
42898 init_vselect_insn (void)
42899 {
42900 unsigned i;
42901 rtx x;
42902
42903 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
42904 for (i = 0; i < MAX_VECT_LEN; ++i)
42905 XVECEXP (x, 0, i) = const0_rtx;
42906 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
42907 const0_rtx), x);
42908 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
42909 start_sequence ();
42910 vselect_insn = emit_insn (x);
42911 end_sequence ();
42912 }
42913
42914 /* Construct (set target (vec_select op0 (parallel perm))) and
42915 return true if that's a valid instruction in the active ISA. */
42916
42917 static bool
42918 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
42919 unsigned nelt, bool testing_p)
42920 {
42921 unsigned int i;
42922 rtx x, save_vconcat;
42923 int icode;
42924
42925 if (vselect_insn == NULL_RTX)
42926 init_vselect_insn ();
42927
42928 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
42929 PUT_NUM_ELEM (XVEC (x, 0), nelt);
42930 for (i = 0; i < nelt; ++i)
42931 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
42932 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42933 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
42934 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
42935 SET_DEST (PATTERN (vselect_insn)) = target;
42936 icode = recog_memoized (vselect_insn);
42937
42938 if (icode >= 0 && !testing_p)
42939 emit_insn (copy_rtx (PATTERN (vselect_insn)));
42940
42941 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
42942 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
42943 INSN_CODE (vselect_insn) = -1;
42944
42945 return icode >= 0;
42946 }
42947
42948 /* Similar, but generate a vec_concat from op0 and op1 as well. */
42949
42950 static bool
42951 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
42952 const unsigned char *perm, unsigned nelt,
42953 bool testing_p)
42954 {
42955 enum machine_mode v2mode;
42956 rtx x;
42957 bool ok;
42958
42959 if (vselect_insn == NULL_RTX)
42960 init_vselect_insn ();
42961
42962 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
42963 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42964 PUT_MODE (x, v2mode);
42965 XEXP (x, 0) = op0;
42966 XEXP (x, 1) = op1;
42967 ok = expand_vselect (target, x, perm, nelt, testing_p);
42968 XEXP (x, 0) = const0_rtx;
42969 XEXP (x, 1) = const0_rtx;
42970 return ok;
42971 }
42972
42973 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42974 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
42975
42976 static bool
42977 expand_vec_perm_blend (struct expand_vec_perm_d *d)
42978 {
42979 enum machine_mode vmode = d->vmode;
42980 unsigned i, mask, nelt = d->nelt;
42981 rtx target, op0, op1, x;
42982 rtx rperm[32], vperm;
42983
42984 if (d->one_operand_p)
42985 return false;
42986 if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
42987 && GET_MODE_SIZE (GET_MODE_INNER (vmode)) >= 4)
42988 ;
42989 else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
42990 ;
42991 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
42992 ;
42993 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
42994 ;
42995 else
42996 return false;
42997
42998 /* This is a blend, not a permute. Elements must stay in their
42999 respective lanes. */
43000 for (i = 0; i < nelt; ++i)
43001 {
43002 unsigned e = d->perm[i];
43003 if (!(e == i || e == i + nelt))
43004 return false;
43005 }
43006
43007 if (d->testing_p)
43008 return true;
43009
43010 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
43011 decision should be extracted elsewhere, so that we only try that
43012 sequence once all budget==3 options have been tried. */
43013 target = d->target;
43014 op0 = d->op0;
43015 op1 = d->op1;
43016 mask = 0;
43017
43018 switch (vmode)
43019 {
43020 case V8DFmode:
43021 case V16SFmode:
43022 case V4DFmode:
43023 case V8SFmode:
43024 case V2DFmode:
43025 case V4SFmode:
43026 case V8HImode:
43027 case V8SImode:
43028 case V32HImode:
43029 case V64QImode:
43030 case V16SImode:
43031 case V8DImode:
43032 for (i = 0; i < nelt; ++i)
43033 mask |= (d->perm[i] >= nelt) << i;
43034 break;
43035
43036 case V2DImode:
43037 for (i = 0; i < 2; ++i)
43038 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
43039 vmode = V8HImode;
43040 goto do_subreg;
43041
43042 case V4SImode:
43043 for (i = 0; i < 4; ++i)
43044 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
43045 vmode = V8HImode;
43046 goto do_subreg;
43047
43048 case V16QImode:
43049 /* See if bytes move in pairs so we can use pblendw with
43050 an immediate argument, rather than pblendvb with a vector
43051 argument. */
43052 for (i = 0; i < 16; i += 2)
43053 if (d->perm[i] + 1 != d->perm[i + 1])
43054 {
43055 use_pblendvb:
43056 for (i = 0; i < nelt; ++i)
43057 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
43058
43059 finish_pblendvb:
43060 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
43061 vperm = force_reg (vmode, vperm);
43062
43063 if (GET_MODE_SIZE (vmode) == 16)
43064 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
43065 else
43066 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
43067 if (target != d->target)
43068 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
43069 return true;
43070 }
43071
43072 for (i = 0; i < 8; ++i)
43073 mask |= (d->perm[i * 2] >= 16) << i;
43074 vmode = V8HImode;
43075 /* FALLTHRU */
43076
43077 do_subreg:
43078 target = gen_reg_rtx (vmode);
43079 op0 = gen_lowpart (vmode, op0);
43080 op1 = gen_lowpart (vmode, op1);
43081 break;
43082
43083 case V32QImode:
43084 /* See if bytes move in pairs. If not, vpblendvb must be used. */
43085 for (i = 0; i < 32; i += 2)
43086 if (d->perm[i] + 1 != d->perm[i + 1])
43087 goto use_pblendvb;
43088 /* See if bytes move in quadruplets. If yes, vpblendd
43089 with immediate can be used. */
43090 for (i = 0; i < 32; i += 4)
43091 if (d->perm[i] + 2 != d->perm[i + 2])
43092 break;
43093 if (i < 32)
43094 {
43095 /* See if bytes move the same in both lanes. If yes,
43096 vpblendw with immediate can be used. */
43097 for (i = 0; i < 16; i += 2)
43098 if (d->perm[i] + 16 != d->perm[i + 16])
43099 goto use_pblendvb;
43100
43101 /* Use vpblendw. */
43102 for (i = 0; i < 16; ++i)
43103 mask |= (d->perm[i * 2] >= 32) << i;
43104 vmode = V16HImode;
43105 goto do_subreg;
43106 }
43107
43108 /* Use vpblendd. */
43109 for (i = 0; i < 8; ++i)
43110 mask |= (d->perm[i * 4] >= 32) << i;
43111 vmode = V8SImode;
43112 goto do_subreg;
43113
43114 case V16HImode:
43115 /* See if words move in pairs. If yes, vpblendd can be used. */
43116 for (i = 0; i < 16; i += 2)
43117 if (d->perm[i] + 1 != d->perm[i + 1])
43118 break;
43119 if (i < 16)
43120 {
43121 /* See if words move the same in both lanes. If not,
43122 vpblendvb must be used. */
43123 for (i = 0; i < 8; i++)
43124 if (d->perm[i] + 8 != d->perm[i + 8])
43125 {
43126 /* Use vpblendvb. */
43127 for (i = 0; i < 32; ++i)
43128 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
43129
43130 vmode = V32QImode;
43131 nelt = 32;
43132 target = gen_reg_rtx (vmode);
43133 op0 = gen_lowpart (vmode, op0);
43134 op1 = gen_lowpart (vmode, op1);
43135 goto finish_pblendvb;
43136 }
43137
43138 /* Use vpblendw. */
43139 for (i = 0; i < 16; ++i)
43140 mask |= (d->perm[i] >= 16) << i;
43141 break;
43142 }
43143
43144 /* Use vpblendd. */
43145 for (i = 0; i < 8; ++i)
43146 mask |= (d->perm[i * 2] >= 16) << i;
43147 vmode = V8SImode;
43148 goto do_subreg;
43149
43150 case V4DImode:
43151 /* Use vpblendd. */
43152 for (i = 0; i < 4; ++i)
43153 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
43154 vmode = V8SImode;
43155 goto do_subreg;
43156
43157 default:
43158 gcc_unreachable ();
43159 }
43160
43161 /* This matches five different patterns with the different modes. */
43162 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
43163 x = gen_rtx_SET (VOIDmode, target, x);
43164 emit_insn (x);
43165 if (target != d->target)
43166 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
43167
43168 return true;
43169 }
43170
43171 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
43172 in terms of the variable form of vpermilps.
43173
43174 Note that we will have already failed the immediate input vpermilps,
43175 which requires that the high and low part shuffle be identical; the
43176 variable form doesn't require that. */
43177
43178 static bool
43179 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
43180 {
43181 rtx rperm[8], vperm;
43182 unsigned i;
43183
43184 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
43185 return false;
43186
43187 /* We can only permute within the 128-bit lane. */
43188 for (i = 0; i < 8; ++i)
43189 {
43190 unsigned e = d->perm[i];
43191 if (i < 4 ? e >= 4 : e < 4)
43192 return false;
43193 }
43194
43195 if (d->testing_p)
43196 return true;
43197
43198 for (i = 0; i < 8; ++i)
43199 {
43200 unsigned e = d->perm[i];
43201
43202 /* Within each 128-bit lane, the elements of op0 are numbered
43203 from 0 and the elements of op1 are numbered from 4. */
43204 if (e >= 8 + 4)
43205 e -= 8;
43206 else if (e >= 4)
43207 e -= 4;
43208
43209 rperm[i] = GEN_INT (e);
43210 }
43211
43212 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
43213 vperm = force_reg (V8SImode, vperm);
43214 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
43215
43216 return true;
43217 }
43218
43219 /* Return true if permutation D can be performed as VMODE permutation
43220 instead. */
43221
43222 static bool
43223 valid_perm_using_mode_p (enum machine_mode vmode, struct expand_vec_perm_d *d)
43224 {
43225 unsigned int i, j, chunk;
43226
43227 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
43228 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
43229 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
43230 return false;
43231
43232 if (GET_MODE_NUNITS (vmode) >= d->nelt)
43233 return true;
43234
43235 chunk = d->nelt / GET_MODE_NUNITS (vmode);
43236 for (i = 0; i < d->nelt; i += chunk)
43237 if (d->perm[i] & (chunk - 1))
43238 return false;
43239 else
43240 for (j = 1; j < chunk; ++j)
43241 if (d->perm[i] + j != d->perm[i + j])
43242 return false;
43243
43244 return true;
43245 }
43246
43247 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
43248 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
43249
43250 static bool
43251 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
43252 {
43253 unsigned i, nelt, eltsz, mask;
43254 unsigned char perm[64];
43255 enum machine_mode vmode = V16QImode;
43256 rtx rperm[64], vperm, target, op0, op1;
43257
43258 nelt = d->nelt;
43259
43260 if (!d->one_operand_p)
43261 {
43262 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
43263 {
43264 if (TARGET_AVX2
43265 && valid_perm_using_mode_p (V2TImode, d))
43266 {
43267 if (d->testing_p)
43268 return true;
43269
43270 /* Use vperm2i128 insn. The pattern uses
43271 V4DImode instead of V2TImode. */
43272 target = d->target;
43273 if (d->vmode != V4DImode)
43274 target = gen_reg_rtx (V4DImode);
43275 op0 = gen_lowpart (V4DImode, d->op0);
43276 op1 = gen_lowpart (V4DImode, d->op1);
43277 rperm[0]
43278 = GEN_INT ((d->perm[0] / (nelt / 2))
43279 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
43280 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
43281 if (target != d->target)
43282 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
43283 return true;
43284 }
43285 return false;
43286 }
43287 }
43288 else
43289 {
43290 if (GET_MODE_SIZE (d->vmode) == 16)
43291 {
43292 if (!TARGET_SSSE3)
43293 return false;
43294 }
43295 else if (GET_MODE_SIZE (d->vmode) == 32)
43296 {
43297 if (!TARGET_AVX2)
43298 return false;
43299
43300 /* V4DImode should be already handled through
43301 expand_vselect by vpermq instruction. */
43302 gcc_assert (d->vmode != V4DImode);
43303
43304 vmode = V32QImode;
43305 if (d->vmode == V8SImode
43306 || d->vmode == V16HImode
43307 || d->vmode == V32QImode)
43308 {
43309 /* First see if vpermq can be used for
43310 V8SImode/V16HImode/V32QImode. */
43311 if (valid_perm_using_mode_p (V4DImode, d))
43312 {
43313 for (i = 0; i < 4; i++)
43314 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
43315 if (d->testing_p)
43316 return true;
43317 target = gen_reg_rtx (V4DImode);
43318 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
43319 perm, 4, false))
43320 {
43321 emit_move_insn (d->target,
43322 gen_lowpart (d->vmode, target));
43323 return true;
43324 }
43325 return false;
43326 }
43327
43328 /* Next see if vpermd can be used. */
43329 if (valid_perm_using_mode_p (V8SImode, d))
43330 vmode = V8SImode;
43331 }
43332 /* Or if vpermps can be used. */
43333 else if (d->vmode == V8SFmode)
43334 vmode = V8SImode;
43335
43336 if (vmode == V32QImode)
43337 {
43338 /* vpshufb only works intra lanes, it is not
43339 possible to shuffle bytes in between the lanes. */
43340 for (i = 0; i < nelt; ++i)
43341 if ((d->perm[i] ^ i) & (nelt / 2))
43342 return false;
43343 }
43344 }
43345 else if (GET_MODE_SIZE (d->vmode) == 64)
43346 {
43347 if (!TARGET_AVX512BW)
43348 return false;
43349 if (vmode == V64QImode)
43350 {
43351 /* vpshufb only works intra lanes, it is not
43352 possible to shuffle bytes in between the lanes. */
43353 for (i = 0; i < nelt; ++i)
43354 if ((d->perm[i] ^ i) & (nelt / 4))
43355 return false;
43356 }
43357 }
43358 else
43359 return false;
43360 }
43361
43362 if (d->testing_p)
43363 return true;
43364
43365 if (vmode == V8SImode)
43366 for (i = 0; i < 8; ++i)
43367 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
43368 else
43369 {
43370 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
43371 if (!d->one_operand_p)
43372 mask = 2 * nelt - 1;
43373 else if (vmode == V16QImode)
43374 mask = nelt - 1;
43375 else if (vmode == V64QImode)
43376 mask = nelt / 4 - 1;
43377 else
43378 mask = nelt / 2 - 1;
43379
43380 for (i = 0; i < nelt; ++i)
43381 {
43382 unsigned j, e = d->perm[i] & mask;
43383 for (j = 0; j < eltsz; ++j)
43384 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
43385 }
43386 }
43387
43388 vperm = gen_rtx_CONST_VECTOR (vmode,
43389 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
43390 vperm = force_reg (vmode, vperm);
43391
43392 target = d->target;
43393 if (d->vmode != vmode)
43394 target = gen_reg_rtx (vmode);
43395 op0 = gen_lowpart (vmode, d->op0);
43396 if (d->one_operand_p)
43397 {
43398 if (vmode == V16QImode)
43399 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
43400 else if (vmode == V32QImode)
43401 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
43402 else if (vmode == V64QImode)
43403 emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
43404 else if (vmode == V8SFmode)
43405 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
43406 else
43407 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
43408 }
43409 else
43410 {
43411 op1 = gen_lowpart (vmode, d->op1);
43412 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
43413 }
43414 if (target != d->target)
43415 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
43416
43417 return true;
43418 }
43419
43420 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
43421 in a single instruction. */
43422
43423 static bool
43424 expand_vec_perm_1 (struct expand_vec_perm_d *d)
43425 {
43426 unsigned i, nelt = d->nelt;
43427 unsigned char perm2[MAX_VECT_LEN];
43428
43429 /* Check plain VEC_SELECT first, because AVX has instructions that could
43430 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
43431 input where SEL+CONCAT may not. */
43432 if (d->one_operand_p)
43433 {
43434 int mask = nelt - 1;
43435 bool identity_perm = true;
43436 bool broadcast_perm = true;
43437
43438 for (i = 0; i < nelt; i++)
43439 {
43440 perm2[i] = d->perm[i] & mask;
43441 if (perm2[i] != i)
43442 identity_perm = false;
43443 if (perm2[i])
43444 broadcast_perm = false;
43445 }
43446
43447 if (identity_perm)
43448 {
43449 if (!d->testing_p)
43450 emit_move_insn (d->target, d->op0);
43451 return true;
43452 }
43453 else if (broadcast_perm && TARGET_AVX2)
43454 {
43455 /* Use vpbroadcast{b,w,d}. */
43456 rtx (*gen) (rtx, rtx) = NULL;
43457 switch (d->vmode)
43458 {
43459 case V64QImode:
43460 if (TARGET_AVX512BW)
43461 gen = gen_avx512bw_vec_dupv64qi;
43462 break;
43463 case V32QImode:
43464 gen = gen_avx2_pbroadcastv32qi_1;
43465 break;
43466 case V32HImode:
43467 if (TARGET_AVX512BW)
43468 gen = gen_avx512bw_vec_dupv32hi;
43469 break;
43470 case V16HImode:
43471 gen = gen_avx2_pbroadcastv16hi_1;
43472 break;
43473 case V16SImode:
43474 if (TARGET_AVX512F)
43475 gen = gen_avx512f_vec_dupv16si;
43476 break;
43477 case V8SImode:
43478 gen = gen_avx2_pbroadcastv8si_1;
43479 break;
43480 case V16QImode:
43481 gen = gen_avx2_pbroadcastv16qi;
43482 break;
43483 case V8HImode:
43484 gen = gen_avx2_pbroadcastv8hi;
43485 break;
43486 case V16SFmode:
43487 if (TARGET_AVX512F)
43488 gen = gen_avx512f_vec_dupv16sf;
43489 break;
43490 case V8SFmode:
43491 gen = gen_avx2_vec_dupv8sf_1;
43492 break;
43493 case V8DFmode:
43494 if (TARGET_AVX512F)
43495 gen = gen_avx512f_vec_dupv8df;
43496 break;
43497 case V8DImode:
43498 if (TARGET_AVX512F)
43499 gen = gen_avx512f_vec_dupv8di;
43500 break;
43501 /* For other modes prefer other shuffles this function creates. */
43502 default: break;
43503 }
43504 if (gen != NULL)
43505 {
43506 if (!d->testing_p)
43507 emit_insn (gen (d->target, d->op0));
43508 return true;
43509 }
43510 }
43511
43512 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
43513 return true;
43514
43515 /* There are plenty of patterns in sse.md that are written for
43516 SEL+CONCAT and are not replicated for a single op. Perhaps
43517 that should be changed, to avoid the nastiness here. */
43518
43519 /* Recognize interleave style patterns, which means incrementing
43520 every other permutation operand. */
43521 for (i = 0; i < nelt; i += 2)
43522 {
43523 perm2[i] = d->perm[i] & mask;
43524 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
43525 }
43526 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
43527 d->testing_p))
43528 return true;
43529
43530 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
43531 if (nelt >= 4)
43532 {
43533 for (i = 0; i < nelt; i += 4)
43534 {
43535 perm2[i + 0] = d->perm[i + 0] & mask;
43536 perm2[i + 1] = d->perm[i + 1] & mask;
43537 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
43538 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
43539 }
43540
43541 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
43542 d->testing_p))
43543 return true;
43544 }
43545 }
43546
43547 /* Finally, try the fully general two operand permute. */
43548 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
43549 d->testing_p))
43550 return true;
43551
43552 /* Recognize interleave style patterns with reversed operands. */
43553 if (!d->one_operand_p)
43554 {
43555 for (i = 0; i < nelt; ++i)
43556 {
43557 unsigned e = d->perm[i];
43558 if (e >= nelt)
43559 e -= nelt;
43560 else
43561 e += nelt;
43562 perm2[i] = e;
43563 }
43564
43565 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
43566 d->testing_p))
43567 return true;
43568 }
43569
43570 /* Try the SSE4.1 blend variable merge instructions. */
43571 if (expand_vec_perm_blend (d))
43572 return true;
43573
43574 /* Try one of the AVX vpermil variable permutations. */
43575 if (expand_vec_perm_vpermil (d))
43576 return true;
43577
43578 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
43579 vpshufb, vpermd, vpermps or vpermq variable permutation. */
43580 if (expand_vec_perm_pshufb (d))
43581 return true;
43582
43583 /* Try the AVX2 vpalignr instruction. */
43584 if (expand_vec_perm_palignr (d, true))
43585 return true;
43586
43587 /* Try the AVX512F vpermi2 instructions. */
43588 if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
43589 return true;
43590
43591 return false;
43592 }
43593
43594 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
43595 in terms of a pair of pshuflw + pshufhw instructions. */
43596
43597 static bool
43598 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
43599 {
43600 unsigned char perm2[MAX_VECT_LEN];
43601 unsigned i;
43602 bool ok;
43603
43604 if (d->vmode != V8HImode || !d->one_operand_p)
43605 return false;
43606
43607 /* The two permutations only operate in 64-bit lanes. */
43608 for (i = 0; i < 4; ++i)
43609 if (d->perm[i] >= 4)
43610 return false;
43611 for (i = 4; i < 8; ++i)
43612 if (d->perm[i] < 4)
43613 return false;
43614
43615 if (d->testing_p)
43616 return true;
43617
43618 /* Emit the pshuflw. */
43619 memcpy (perm2, d->perm, 4);
43620 for (i = 4; i < 8; ++i)
43621 perm2[i] = i;
43622 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
43623 gcc_assert (ok);
43624
43625 /* Emit the pshufhw. */
43626 memcpy (perm2 + 4, d->perm + 4, 4);
43627 for (i = 0; i < 4; ++i)
43628 perm2[i] = i;
43629 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
43630 gcc_assert (ok);
43631
43632 return true;
43633 }
43634
43635 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43636 the permutation using the SSSE3 palignr instruction. This succeeds
43637 when all of the elements in PERM fit within one vector and we merely
43638 need to shift them down so that a single vector permutation has a
43639 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
43640 the vpalignr instruction itself can perform the requested permutation. */
43641
43642 static bool
43643 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
43644 {
43645 unsigned i, nelt = d->nelt;
43646 unsigned min, max, minswap, maxswap;
43647 bool in_order, ok, swap = false;
43648 rtx shift, target;
43649 struct expand_vec_perm_d dcopy;
43650
43651 /* Even with AVX, palignr only operates on 128-bit vectors,
43652 in AVX2 palignr operates on both 128-bit lanes. */
43653 if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
43654 && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
43655 return false;
43656
43657 min = 2 * nelt;
43658 max = 0;
43659 minswap = 2 * nelt;
43660 maxswap = 0;
43661 for (i = 0; i < nelt; ++i)
43662 {
43663 unsigned e = d->perm[i];
43664 unsigned eswap = d->perm[i] ^ nelt;
43665 if (GET_MODE_SIZE (d->vmode) == 32)
43666 {
43667 e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
43668 eswap = e ^ (nelt / 2);
43669 }
43670 if (e < min)
43671 min = e;
43672 if (e > max)
43673 max = e;
43674 if (eswap < minswap)
43675 minswap = eswap;
43676 if (eswap > maxswap)
43677 maxswap = eswap;
43678 }
43679 if (min == 0
43680 || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
43681 {
43682 if (d->one_operand_p
43683 || minswap == 0
43684 || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
43685 ? nelt / 2 : nelt))
43686 return false;
43687 swap = true;
43688 min = minswap;
43689 max = maxswap;
43690 }
43691
43692 /* Given that we have SSSE3, we know we'll be able to implement the
43693 single operand permutation after the palignr with pshufb for
43694 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
43695 first. */
43696 if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
43697 return true;
43698
43699 dcopy = *d;
43700 if (swap)
43701 {
43702 dcopy.op0 = d->op1;
43703 dcopy.op1 = d->op0;
43704 for (i = 0; i < nelt; ++i)
43705 dcopy.perm[i] ^= nelt;
43706 }
43707
43708 in_order = true;
43709 for (i = 0; i < nelt; ++i)
43710 {
43711 unsigned e = dcopy.perm[i];
43712 if (GET_MODE_SIZE (d->vmode) == 32
43713 && e >= nelt
43714 && (e & (nelt / 2 - 1)) < min)
43715 e = e - min - (nelt / 2);
43716 else
43717 e = e - min;
43718 if (e != i)
43719 in_order = false;
43720 dcopy.perm[i] = e;
43721 }
43722 dcopy.one_operand_p = true;
43723
43724 if (single_insn_only_p && !in_order)
43725 return false;
43726
43727 /* For AVX2, test whether we can permute the result in one instruction. */
43728 if (d->testing_p)
43729 {
43730 if (in_order)
43731 return true;
43732 dcopy.op1 = dcopy.op0;
43733 return expand_vec_perm_1 (&dcopy);
43734 }
43735
43736 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
43737 if (GET_MODE_SIZE (d->vmode) == 16)
43738 {
43739 target = gen_reg_rtx (TImode);
43740 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
43741 gen_lowpart (TImode, dcopy.op0), shift));
43742 }
43743 else
43744 {
43745 target = gen_reg_rtx (V2TImode);
43746 emit_insn (gen_avx2_palignrv2ti (target,
43747 gen_lowpart (V2TImode, dcopy.op1),
43748 gen_lowpart (V2TImode, dcopy.op0),
43749 shift));
43750 }
43751
43752 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
43753
43754 /* Test for the degenerate case where the alignment by itself
43755 produces the desired permutation. */
43756 if (in_order)
43757 {
43758 emit_move_insn (d->target, dcopy.op0);
43759 return true;
43760 }
43761
43762 ok = expand_vec_perm_1 (&dcopy);
43763 gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
43764
43765 return ok;
43766 }
43767
43768 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
43769 the permutation using the SSE4_1 pblendv instruction. Potentially
43770 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
43771
43772 static bool
43773 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
43774 {
43775 unsigned i, which, nelt = d->nelt;
43776 struct expand_vec_perm_d dcopy, dcopy1;
43777 enum machine_mode vmode = d->vmode;
43778 bool ok;
43779
43780 /* Use the same checks as in expand_vec_perm_blend. */
43781 if (d->one_operand_p)
43782 return false;
43783 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
43784 ;
43785 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
43786 ;
43787 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
43788 ;
43789 else
43790 return false;
43791
43792 /* Figure out where permutation elements stay not in their
43793 respective lanes. */
43794 for (i = 0, which = 0; i < nelt; ++i)
43795 {
43796 unsigned e = d->perm[i];
43797 if (e != i)
43798 which |= (e < nelt ? 1 : 2);
43799 }
43800 /* We can pblend the part where elements stay not in their
43801 respective lanes only when these elements are all in one
43802 half of a permutation.
43803 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
43804 lanes, but both 8 and 9 >= 8
43805 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
43806 respective lanes and 8 >= 8, but 2 not. */
43807 if (which != 1 && which != 2)
43808 return false;
43809 if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
43810 return true;
43811
43812 /* First we apply one operand permutation to the part where
43813 elements stay not in their respective lanes. */
43814 dcopy = *d;
43815 if (which == 2)
43816 dcopy.op0 = dcopy.op1 = d->op1;
43817 else
43818 dcopy.op0 = dcopy.op1 = d->op0;
43819 dcopy.one_operand_p = true;
43820
43821 for (i = 0; i < nelt; ++i)
43822 dcopy.perm[i] = d->perm[i] & (nelt - 1);
43823
43824 ok = expand_vec_perm_1 (&dcopy);
43825 if (GET_MODE_SIZE (vmode) != 16 && !ok)
43826 return false;
43827 else
43828 gcc_assert (ok);
43829 if (d->testing_p)
43830 return true;
43831
43832 /* Next we put permuted elements into their positions. */
43833 dcopy1 = *d;
43834 if (which == 2)
43835 dcopy1.op1 = dcopy.target;
43836 else
43837 dcopy1.op0 = dcopy.target;
43838
43839 for (i = 0; i < nelt; ++i)
43840 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
43841
43842 ok = expand_vec_perm_blend (&dcopy1);
43843 gcc_assert (ok);
43844
43845 return true;
43846 }
43847
43848 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
43849
43850 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43851 a two vector permutation into a single vector permutation by using
43852 an interleave operation to merge the vectors. */
43853
43854 static bool
43855 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
43856 {
43857 struct expand_vec_perm_d dremap, dfinal;
43858 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
43859 unsigned HOST_WIDE_INT contents;
43860 unsigned char remap[2 * MAX_VECT_LEN];
43861 rtx_insn *seq;
43862 bool ok, same_halves = false;
43863
43864 if (GET_MODE_SIZE (d->vmode) == 16)
43865 {
43866 if (d->one_operand_p)
43867 return false;
43868 }
43869 else if (GET_MODE_SIZE (d->vmode) == 32)
43870 {
43871 if (!TARGET_AVX)
43872 return false;
43873 /* For 32-byte modes allow even d->one_operand_p.
43874 The lack of cross-lane shuffling in some instructions
43875 might prevent a single insn shuffle. */
43876 dfinal = *d;
43877 dfinal.testing_p = true;
43878 /* If expand_vec_perm_interleave3 can expand this into
43879 a 3 insn sequence, give up and let it be expanded as
43880 3 insn sequence. While that is one insn longer,
43881 it doesn't need a memory operand and in the common
43882 case that both interleave low and high permutations
43883 with the same operands are adjacent needs 4 insns
43884 for both after CSE. */
43885 if (expand_vec_perm_interleave3 (&dfinal))
43886 return false;
43887 }
43888 else
43889 return false;
43890
43891 /* Examine from whence the elements come. */
43892 contents = 0;
43893 for (i = 0; i < nelt; ++i)
43894 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
43895
43896 memset (remap, 0xff, sizeof (remap));
43897 dremap = *d;
43898
43899 if (GET_MODE_SIZE (d->vmode) == 16)
43900 {
43901 unsigned HOST_WIDE_INT h1, h2, h3, h4;
43902
43903 /* Split the two input vectors into 4 halves. */
43904 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
43905 h2 = h1 << nelt2;
43906 h3 = h2 << nelt2;
43907 h4 = h3 << nelt2;
43908
43909 /* If the elements from the low halves use interleave low, and similarly
43910 for interleave high. If the elements are from mis-matched halves, we
43911 can use shufps for V4SF/V4SI or do a DImode shuffle. */
43912 if ((contents & (h1 | h3)) == contents)
43913 {
43914 /* punpckl* */
43915 for (i = 0; i < nelt2; ++i)
43916 {
43917 remap[i] = i * 2;
43918 remap[i + nelt] = i * 2 + 1;
43919 dremap.perm[i * 2] = i;
43920 dremap.perm[i * 2 + 1] = i + nelt;
43921 }
43922 if (!TARGET_SSE2 && d->vmode == V4SImode)
43923 dremap.vmode = V4SFmode;
43924 }
43925 else if ((contents & (h2 | h4)) == contents)
43926 {
43927 /* punpckh* */
43928 for (i = 0; i < nelt2; ++i)
43929 {
43930 remap[i + nelt2] = i * 2;
43931 remap[i + nelt + nelt2] = i * 2 + 1;
43932 dremap.perm[i * 2] = i + nelt2;
43933 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
43934 }
43935 if (!TARGET_SSE2 && d->vmode == V4SImode)
43936 dremap.vmode = V4SFmode;
43937 }
43938 else if ((contents & (h1 | h4)) == contents)
43939 {
43940 /* shufps */
43941 for (i = 0; i < nelt2; ++i)
43942 {
43943 remap[i] = i;
43944 remap[i + nelt + nelt2] = i + nelt2;
43945 dremap.perm[i] = i;
43946 dremap.perm[i + nelt2] = i + nelt + nelt2;
43947 }
43948 if (nelt != 4)
43949 {
43950 /* shufpd */
43951 dremap.vmode = V2DImode;
43952 dremap.nelt = 2;
43953 dremap.perm[0] = 0;
43954 dremap.perm[1] = 3;
43955 }
43956 }
43957 else if ((contents & (h2 | h3)) == contents)
43958 {
43959 /* shufps */
43960 for (i = 0; i < nelt2; ++i)
43961 {
43962 remap[i + nelt2] = i;
43963 remap[i + nelt] = i + nelt2;
43964 dremap.perm[i] = i + nelt2;
43965 dremap.perm[i + nelt2] = i + nelt;
43966 }
43967 if (nelt != 4)
43968 {
43969 /* shufpd */
43970 dremap.vmode = V2DImode;
43971 dremap.nelt = 2;
43972 dremap.perm[0] = 1;
43973 dremap.perm[1] = 2;
43974 }
43975 }
43976 else
43977 return false;
43978 }
43979 else
43980 {
43981 unsigned int nelt4 = nelt / 4, nzcnt = 0;
43982 unsigned HOST_WIDE_INT q[8];
43983 unsigned int nonzero_halves[4];
43984
43985 /* Split the two input vectors into 8 quarters. */
43986 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
43987 for (i = 1; i < 8; ++i)
43988 q[i] = q[0] << (nelt4 * i);
43989 for (i = 0; i < 4; ++i)
43990 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
43991 {
43992 nonzero_halves[nzcnt] = i;
43993 ++nzcnt;
43994 }
43995
43996 if (nzcnt == 1)
43997 {
43998 gcc_assert (d->one_operand_p);
43999 nonzero_halves[1] = nonzero_halves[0];
44000 same_halves = true;
44001 }
44002 else if (d->one_operand_p)
44003 {
44004 gcc_assert (nonzero_halves[0] == 0);
44005 gcc_assert (nonzero_halves[1] == 1);
44006 }
44007
44008 if (nzcnt <= 2)
44009 {
44010 if (d->perm[0] / nelt2 == nonzero_halves[1])
44011 {
44012 /* Attempt to increase the likelihood that dfinal
44013 shuffle will be intra-lane. */
44014 char tmph = nonzero_halves[0];
44015 nonzero_halves[0] = nonzero_halves[1];
44016 nonzero_halves[1] = tmph;
44017 }
44018
44019 /* vperm2f128 or vperm2i128. */
44020 for (i = 0; i < nelt2; ++i)
44021 {
44022 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
44023 remap[i + nonzero_halves[0] * nelt2] = i;
44024 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
44025 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
44026 }
44027
44028 if (d->vmode != V8SFmode
44029 && d->vmode != V4DFmode
44030 && d->vmode != V8SImode)
44031 {
44032 dremap.vmode = V8SImode;
44033 dremap.nelt = 8;
44034 for (i = 0; i < 4; ++i)
44035 {
44036 dremap.perm[i] = i + nonzero_halves[0] * 4;
44037 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
44038 }
44039 }
44040 }
44041 else if (d->one_operand_p)
44042 return false;
44043 else if (TARGET_AVX2
44044 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
44045 {
44046 /* vpunpckl* */
44047 for (i = 0; i < nelt4; ++i)
44048 {
44049 remap[i] = i * 2;
44050 remap[i + nelt] = i * 2 + 1;
44051 remap[i + nelt2] = i * 2 + nelt2;
44052 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
44053 dremap.perm[i * 2] = i;
44054 dremap.perm[i * 2 + 1] = i + nelt;
44055 dremap.perm[i * 2 + nelt2] = i + nelt2;
44056 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
44057 }
44058 }
44059 else if (TARGET_AVX2
44060 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
44061 {
44062 /* vpunpckh* */
44063 for (i = 0; i < nelt4; ++i)
44064 {
44065 remap[i + nelt4] = i * 2;
44066 remap[i + nelt + nelt4] = i * 2 + 1;
44067 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
44068 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
44069 dremap.perm[i * 2] = i + nelt4;
44070 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
44071 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
44072 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
44073 }
44074 }
44075 else
44076 return false;
44077 }
44078
44079 /* Use the remapping array set up above to move the elements from their
44080 swizzled locations into their final destinations. */
44081 dfinal = *d;
44082 for (i = 0; i < nelt; ++i)
44083 {
44084 unsigned e = remap[d->perm[i]];
44085 gcc_assert (e < nelt);
44086 /* If same_halves is true, both halves of the remapped vector are the
44087 same. Avoid cross-lane accesses if possible. */
44088 if (same_halves && i >= nelt2)
44089 {
44090 gcc_assert (e < nelt2);
44091 dfinal.perm[i] = e + nelt2;
44092 }
44093 else
44094 dfinal.perm[i] = e;
44095 }
44096 if (!d->testing_p)
44097 {
44098 dremap.target = gen_reg_rtx (dremap.vmode);
44099 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
44100 }
44101 dfinal.op1 = dfinal.op0;
44102 dfinal.one_operand_p = true;
44103
44104 /* Test if the final remap can be done with a single insn. For V4SFmode or
44105 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
44106 start_sequence ();
44107 ok = expand_vec_perm_1 (&dfinal);
44108 seq = get_insns ();
44109 end_sequence ();
44110
44111 if (!ok)
44112 return false;
44113
44114 if (d->testing_p)
44115 return true;
44116
44117 if (dremap.vmode != dfinal.vmode)
44118 {
44119 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
44120 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
44121 }
44122
44123 ok = expand_vec_perm_1 (&dremap);
44124 gcc_assert (ok);
44125
44126 emit_insn (seq);
44127 return true;
44128 }
44129
44130 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
44131 a single vector cross-lane permutation into vpermq followed
44132 by any of the single insn permutations. */
44133
44134 static bool
44135 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
44136 {
44137 struct expand_vec_perm_d dremap, dfinal;
44138 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
44139 unsigned contents[2];
44140 bool ok;
44141
44142 if (!(TARGET_AVX2
44143 && (d->vmode == V32QImode || d->vmode == V16HImode)
44144 && d->one_operand_p))
44145 return false;
44146
44147 contents[0] = 0;
44148 contents[1] = 0;
44149 for (i = 0; i < nelt2; ++i)
44150 {
44151 contents[0] |= 1u << (d->perm[i] / nelt4);
44152 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
44153 }
44154
44155 for (i = 0; i < 2; ++i)
44156 {
44157 unsigned int cnt = 0;
44158 for (j = 0; j < 4; ++j)
44159 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
44160 return false;
44161 }
44162
44163 if (d->testing_p)
44164 return true;
44165
44166 dremap = *d;
44167 dremap.vmode = V4DImode;
44168 dremap.nelt = 4;
44169 dremap.target = gen_reg_rtx (V4DImode);
44170 dremap.op0 = gen_lowpart (V4DImode, d->op0);
44171 dremap.op1 = dremap.op0;
44172 dremap.one_operand_p = true;
44173 for (i = 0; i < 2; ++i)
44174 {
44175 unsigned int cnt = 0;
44176 for (j = 0; j < 4; ++j)
44177 if ((contents[i] & (1u << j)) != 0)
44178 dremap.perm[2 * i + cnt++] = j;
44179 for (; cnt < 2; ++cnt)
44180 dremap.perm[2 * i + cnt] = 0;
44181 }
44182
44183 dfinal = *d;
44184 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
44185 dfinal.op1 = dfinal.op0;
44186 dfinal.one_operand_p = true;
44187 for (i = 0, j = 0; i < nelt; ++i)
44188 {
44189 if (i == nelt2)
44190 j = 2;
44191 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
44192 if ((d->perm[i] / nelt4) == dremap.perm[j])
44193 ;
44194 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
44195 dfinal.perm[i] |= nelt4;
44196 else
44197 gcc_unreachable ();
44198 }
44199
44200 ok = expand_vec_perm_1 (&dremap);
44201 gcc_assert (ok);
44202
44203 ok = expand_vec_perm_1 (&dfinal);
44204 gcc_assert (ok);
44205
44206 return true;
44207 }
44208
44209 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
44210 a vector permutation using two instructions, vperm2f128 resp.
44211 vperm2i128 followed by any single in-lane permutation. */
44212
44213 static bool
44214 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
44215 {
44216 struct expand_vec_perm_d dfirst, dsecond;
44217 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
44218 bool ok;
44219
44220 if (!TARGET_AVX
44221 || GET_MODE_SIZE (d->vmode) != 32
44222 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
44223 return false;
44224
44225 dsecond = *d;
44226 dsecond.one_operand_p = false;
44227 dsecond.testing_p = true;
44228
44229 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
44230 immediate. For perm < 16 the second permutation uses
44231 d->op0 as first operand, for perm >= 16 it uses d->op1
44232 as first operand. The second operand is the result of
44233 vperm2[fi]128. */
44234 for (perm = 0; perm < 32; perm++)
44235 {
44236 /* Ignore permutations which do not move anything cross-lane. */
44237 if (perm < 16)
44238 {
44239 /* The second shuffle for e.g. V4DFmode has
44240 0123 and ABCD operands.
44241 Ignore AB23, as 23 is already in the second lane
44242 of the first operand. */
44243 if ((perm & 0xc) == (1 << 2)) continue;
44244 /* And 01CD, as 01 is in the first lane of the first
44245 operand. */
44246 if ((perm & 3) == 0) continue;
44247 /* And 4567, as then the vperm2[fi]128 doesn't change
44248 anything on the original 4567 second operand. */
44249 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
44250 }
44251 else
44252 {
44253 /* The second shuffle for e.g. V4DFmode has
44254 4567 and ABCD operands.
44255 Ignore AB67, as 67 is already in the second lane
44256 of the first operand. */
44257 if ((perm & 0xc) == (3 << 2)) continue;
44258 /* And 45CD, as 45 is in the first lane of the first
44259 operand. */
44260 if ((perm & 3) == 2) continue;
44261 /* And 0123, as then the vperm2[fi]128 doesn't change
44262 anything on the original 0123 first operand. */
44263 if ((perm & 0xf) == (1 << 2)) continue;
44264 }
44265
44266 for (i = 0; i < nelt; i++)
44267 {
44268 j = d->perm[i] / nelt2;
44269 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
44270 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
44271 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
44272 dsecond.perm[i] = d->perm[i] & (nelt - 1);
44273 else
44274 break;
44275 }
44276
44277 if (i == nelt)
44278 {
44279 start_sequence ();
44280 ok = expand_vec_perm_1 (&dsecond);
44281 end_sequence ();
44282 }
44283 else
44284 ok = false;
44285
44286 if (ok)
44287 {
44288 if (d->testing_p)
44289 return true;
44290
44291 /* Found a usable second shuffle. dfirst will be
44292 vperm2f128 on d->op0 and d->op1. */
44293 dsecond.testing_p = false;
44294 dfirst = *d;
44295 dfirst.target = gen_reg_rtx (d->vmode);
44296 for (i = 0; i < nelt; i++)
44297 dfirst.perm[i] = (i & (nelt2 - 1))
44298 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
44299
44300 canonicalize_perm (&dfirst);
44301 ok = expand_vec_perm_1 (&dfirst);
44302 gcc_assert (ok);
44303
44304 /* And dsecond is some single insn shuffle, taking
44305 d->op0 and result of vperm2f128 (if perm < 16) or
44306 d->op1 and result of vperm2f128 (otherwise). */
44307 if (perm >= 16)
44308 dsecond.op0 = dsecond.op1;
44309 dsecond.op1 = dfirst.target;
44310
44311 ok = expand_vec_perm_1 (&dsecond);
44312 gcc_assert (ok);
44313
44314 return true;
44315 }
44316
44317 /* For one operand, the only useful vperm2f128 permutation is 0x01
44318 aka lanes swap. */
44319 if (d->one_operand_p)
44320 return false;
44321 }
44322
44323 return false;
44324 }
44325
44326 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
44327 a two vector permutation using 2 intra-lane interleave insns
44328 and cross-lane shuffle for 32-byte vectors. */
44329
44330 static bool
44331 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
44332 {
44333 unsigned i, nelt;
44334 rtx (*gen) (rtx, rtx, rtx);
44335
44336 if (d->one_operand_p)
44337 return false;
44338 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
44339 ;
44340 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
44341 ;
44342 else
44343 return false;
44344
44345 nelt = d->nelt;
44346 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
44347 return false;
44348 for (i = 0; i < nelt; i += 2)
44349 if (d->perm[i] != d->perm[0] + i / 2
44350 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
44351 return false;
44352
44353 if (d->testing_p)
44354 return true;
44355
44356 switch (d->vmode)
44357 {
44358 case V32QImode:
44359 if (d->perm[0])
44360 gen = gen_vec_interleave_highv32qi;
44361 else
44362 gen = gen_vec_interleave_lowv32qi;
44363 break;
44364 case V16HImode:
44365 if (d->perm[0])
44366 gen = gen_vec_interleave_highv16hi;
44367 else
44368 gen = gen_vec_interleave_lowv16hi;
44369 break;
44370 case V8SImode:
44371 if (d->perm[0])
44372 gen = gen_vec_interleave_highv8si;
44373 else
44374 gen = gen_vec_interleave_lowv8si;
44375 break;
44376 case V4DImode:
44377 if (d->perm[0])
44378 gen = gen_vec_interleave_highv4di;
44379 else
44380 gen = gen_vec_interleave_lowv4di;
44381 break;
44382 case V8SFmode:
44383 if (d->perm[0])
44384 gen = gen_vec_interleave_highv8sf;
44385 else
44386 gen = gen_vec_interleave_lowv8sf;
44387 break;
44388 case V4DFmode:
44389 if (d->perm[0])
44390 gen = gen_vec_interleave_highv4df;
44391 else
44392 gen = gen_vec_interleave_lowv4df;
44393 break;
44394 default:
44395 gcc_unreachable ();
44396 }
44397
44398 emit_insn (gen (d->target, d->op0, d->op1));
44399 return true;
44400 }
44401
44402 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
44403 a single vector permutation using a single intra-lane vector
44404 permutation, vperm2f128 swapping the lanes and vblend* insn blending
44405 the non-swapped and swapped vectors together. */
44406
44407 static bool
44408 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
44409 {
44410 struct expand_vec_perm_d dfirst, dsecond;
44411 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
44412 rtx_insn *seq;
44413 bool ok;
44414 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
44415
44416 if (!TARGET_AVX
44417 || TARGET_AVX2
44418 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
44419 || !d->one_operand_p)
44420 return false;
44421
44422 dfirst = *d;
44423 for (i = 0; i < nelt; i++)
44424 dfirst.perm[i] = 0xff;
44425 for (i = 0, msk = 0; i < nelt; i++)
44426 {
44427 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
44428 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
44429 return false;
44430 dfirst.perm[j] = d->perm[i];
44431 if (j != i)
44432 msk |= (1 << i);
44433 }
44434 for (i = 0; i < nelt; i++)
44435 if (dfirst.perm[i] == 0xff)
44436 dfirst.perm[i] = i;
44437
44438 if (!d->testing_p)
44439 dfirst.target = gen_reg_rtx (dfirst.vmode);
44440
44441 start_sequence ();
44442 ok = expand_vec_perm_1 (&dfirst);
44443 seq = get_insns ();
44444 end_sequence ();
44445
44446 if (!ok)
44447 return false;
44448
44449 if (d->testing_p)
44450 return true;
44451
44452 emit_insn (seq);
44453
44454 dsecond = *d;
44455 dsecond.op0 = dfirst.target;
44456 dsecond.op1 = dfirst.target;
44457 dsecond.one_operand_p = true;
44458 dsecond.target = gen_reg_rtx (dsecond.vmode);
44459 for (i = 0; i < nelt; i++)
44460 dsecond.perm[i] = i ^ nelt2;
44461
44462 ok = expand_vec_perm_1 (&dsecond);
44463 gcc_assert (ok);
44464
44465 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
44466 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
44467 return true;
44468 }
44469
44470 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
44471 permutation using two vperm2f128, followed by a vshufpd insn blending
44472 the two vectors together. */
44473
44474 static bool
44475 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
44476 {
44477 struct expand_vec_perm_d dfirst, dsecond, dthird;
44478 bool ok;
44479
44480 if (!TARGET_AVX || (d->vmode != V4DFmode))
44481 return false;
44482
44483 if (d->testing_p)
44484 return true;
44485
44486 dfirst = *d;
44487 dsecond = *d;
44488 dthird = *d;
44489
44490 dfirst.perm[0] = (d->perm[0] & ~1);
44491 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
44492 dfirst.perm[2] = (d->perm[2] & ~1);
44493 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
44494 dsecond.perm[0] = (d->perm[1] & ~1);
44495 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
44496 dsecond.perm[2] = (d->perm[3] & ~1);
44497 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
44498 dthird.perm[0] = (d->perm[0] % 2);
44499 dthird.perm[1] = (d->perm[1] % 2) + 4;
44500 dthird.perm[2] = (d->perm[2] % 2) + 2;
44501 dthird.perm[3] = (d->perm[3] % 2) + 6;
44502
44503 dfirst.target = gen_reg_rtx (dfirst.vmode);
44504 dsecond.target = gen_reg_rtx (dsecond.vmode);
44505 dthird.op0 = dfirst.target;
44506 dthird.op1 = dsecond.target;
44507 dthird.one_operand_p = false;
44508
44509 canonicalize_perm (&dfirst);
44510 canonicalize_perm (&dsecond);
44511
44512 ok = expand_vec_perm_1 (&dfirst)
44513 && expand_vec_perm_1 (&dsecond)
44514 && expand_vec_perm_1 (&dthird);
44515
44516 gcc_assert (ok);
44517
44518 return true;
44519 }
44520
44521 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
44522 permutation with two pshufb insns and an ior. We should have already
44523 failed all two instruction sequences. */
44524
44525 static bool
44526 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
44527 {
44528 rtx rperm[2][16], vperm, l, h, op, m128;
44529 unsigned int i, nelt, eltsz;
44530
44531 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
44532 return false;
44533 gcc_assert (!d->one_operand_p);
44534
44535 if (d->testing_p)
44536 return true;
44537
44538 nelt = d->nelt;
44539 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44540
44541 /* Generate two permutation masks. If the required element is within
44542 the given vector it is shuffled into the proper lane. If the required
44543 element is in the other vector, force a zero into the lane by setting
44544 bit 7 in the permutation mask. */
44545 m128 = GEN_INT (-128);
44546 for (i = 0; i < nelt; ++i)
44547 {
44548 unsigned j, e = d->perm[i];
44549 unsigned which = (e >= nelt);
44550 if (e >= nelt)
44551 e -= nelt;
44552
44553 for (j = 0; j < eltsz; ++j)
44554 {
44555 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
44556 rperm[1-which][i*eltsz + j] = m128;
44557 }
44558 }
44559
44560 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
44561 vperm = force_reg (V16QImode, vperm);
44562
44563 l = gen_reg_rtx (V16QImode);
44564 op = gen_lowpart (V16QImode, d->op0);
44565 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
44566
44567 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
44568 vperm = force_reg (V16QImode, vperm);
44569
44570 h = gen_reg_rtx (V16QImode);
44571 op = gen_lowpart (V16QImode, d->op1);
44572 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
44573
44574 op = d->target;
44575 if (d->vmode != V16QImode)
44576 op = gen_reg_rtx (V16QImode);
44577 emit_insn (gen_iorv16qi3 (op, l, h));
44578 if (op != d->target)
44579 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44580
44581 return true;
44582 }
44583
44584 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
44585 with two vpshufb insns, vpermq and vpor. We should have already failed
44586 all two or three instruction sequences. */
44587
44588 static bool
44589 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
44590 {
44591 rtx rperm[2][32], vperm, l, h, hp, op, m128;
44592 unsigned int i, nelt, eltsz;
44593
44594 if (!TARGET_AVX2
44595 || !d->one_operand_p
44596 || (d->vmode != V32QImode && d->vmode != V16HImode))
44597 return false;
44598
44599 if (d->testing_p)
44600 return true;
44601
44602 nelt = d->nelt;
44603 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44604
44605 /* Generate two permutation masks. If the required element is within
44606 the same lane, it is shuffled in. If the required element from the
44607 other lane, force a zero by setting bit 7 in the permutation mask.
44608 In the other mask the mask has non-negative elements if element
44609 is requested from the other lane, but also moved to the other lane,
44610 so that the result of vpshufb can have the two V2TImode halves
44611 swapped. */
44612 m128 = GEN_INT (-128);
44613 for (i = 0; i < nelt; ++i)
44614 {
44615 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44616 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
44617
44618 for (j = 0; j < eltsz; ++j)
44619 {
44620 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
44621 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
44622 }
44623 }
44624
44625 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
44626 vperm = force_reg (V32QImode, vperm);
44627
44628 h = gen_reg_rtx (V32QImode);
44629 op = gen_lowpart (V32QImode, d->op0);
44630 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
44631
44632 /* Swap the 128-byte lanes of h into hp. */
44633 hp = gen_reg_rtx (V4DImode);
44634 op = gen_lowpart (V4DImode, h);
44635 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
44636 const1_rtx));
44637
44638 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
44639 vperm = force_reg (V32QImode, vperm);
44640
44641 l = gen_reg_rtx (V32QImode);
44642 op = gen_lowpart (V32QImode, d->op0);
44643 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
44644
44645 op = d->target;
44646 if (d->vmode != V32QImode)
44647 op = gen_reg_rtx (V32QImode);
44648 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
44649 if (op != d->target)
44650 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44651
44652 return true;
44653 }
44654
44655 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
44656 and extract-odd permutations of two V32QImode and V16QImode operand
44657 with two vpshufb insns, vpor and vpermq. We should have already
44658 failed all two or three instruction sequences. */
44659
44660 static bool
44661 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
44662 {
44663 rtx rperm[2][32], vperm, l, h, ior, op, m128;
44664 unsigned int i, nelt, eltsz;
44665
44666 if (!TARGET_AVX2
44667 || d->one_operand_p
44668 || (d->vmode != V32QImode && d->vmode != V16HImode))
44669 return false;
44670
44671 for (i = 0; i < d->nelt; ++i)
44672 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
44673 return false;
44674
44675 if (d->testing_p)
44676 return true;
44677
44678 nelt = d->nelt;
44679 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44680
44681 /* Generate two permutation masks. In the first permutation mask
44682 the first quarter will contain indexes for the first half
44683 of the op0, the second quarter will contain bit 7 set, third quarter
44684 will contain indexes for the second half of the op0 and the
44685 last quarter bit 7 set. In the second permutation mask
44686 the first quarter will contain bit 7 set, the second quarter
44687 indexes for the first half of the op1, the third quarter bit 7 set
44688 and last quarter indexes for the second half of the op1.
44689 I.e. the first mask e.g. for V32QImode extract even will be:
44690 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
44691 (all values masked with 0xf except for -128) and second mask
44692 for extract even will be
44693 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
44694 m128 = GEN_INT (-128);
44695 for (i = 0; i < nelt; ++i)
44696 {
44697 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44698 unsigned which = d->perm[i] >= nelt;
44699 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
44700
44701 for (j = 0; j < eltsz; ++j)
44702 {
44703 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
44704 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
44705 }
44706 }
44707
44708 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
44709 vperm = force_reg (V32QImode, vperm);
44710
44711 l = gen_reg_rtx (V32QImode);
44712 op = gen_lowpart (V32QImode, d->op0);
44713 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
44714
44715 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
44716 vperm = force_reg (V32QImode, vperm);
44717
44718 h = gen_reg_rtx (V32QImode);
44719 op = gen_lowpart (V32QImode, d->op1);
44720 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
44721
44722 ior = gen_reg_rtx (V32QImode);
44723 emit_insn (gen_iorv32qi3 (ior, l, h));
44724
44725 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
44726 op = gen_reg_rtx (V4DImode);
44727 ior = gen_lowpart (V4DImode, ior);
44728 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
44729 const1_rtx, GEN_INT (3)));
44730 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44731
44732 return true;
44733 }
44734
44735 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
44736 and extract-odd permutations. */
44737
44738 static bool
44739 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
44740 {
44741 rtx t1, t2, t3, t4, t5;
44742
44743 switch (d->vmode)
44744 {
44745 case V4DFmode:
44746 if (d->testing_p)
44747 break;
44748 t1 = gen_reg_rtx (V4DFmode);
44749 t2 = gen_reg_rtx (V4DFmode);
44750
44751 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44752 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
44753 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
44754
44755 /* Now an unpck[lh]pd will produce the result required. */
44756 if (odd)
44757 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
44758 else
44759 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
44760 emit_insn (t3);
44761 break;
44762
44763 case V8SFmode:
44764 {
44765 int mask = odd ? 0xdd : 0x88;
44766
44767 if (d->testing_p)
44768 break;
44769 t1 = gen_reg_rtx (V8SFmode);
44770 t2 = gen_reg_rtx (V8SFmode);
44771 t3 = gen_reg_rtx (V8SFmode);
44772
44773 /* Shuffle within the 128-bit lanes to produce:
44774 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
44775 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
44776 GEN_INT (mask)));
44777
44778 /* Shuffle the lanes around to produce:
44779 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
44780 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
44781 GEN_INT (0x3)));
44782
44783 /* Shuffle within the 128-bit lanes to produce:
44784 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
44785 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
44786
44787 /* Shuffle within the 128-bit lanes to produce:
44788 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
44789 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
44790
44791 /* Shuffle the lanes around to produce:
44792 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
44793 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
44794 GEN_INT (0x20)));
44795 }
44796 break;
44797
44798 case V2DFmode:
44799 case V4SFmode:
44800 case V2DImode:
44801 case V4SImode:
44802 /* These are always directly implementable by expand_vec_perm_1. */
44803 gcc_unreachable ();
44804
44805 case V8HImode:
44806 if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
44807 return expand_vec_perm_pshufb2 (d);
44808 else
44809 {
44810 if (d->testing_p)
44811 break;
44812 /* We need 2*log2(N)-1 operations to achieve odd/even
44813 with interleave. */
44814 t1 = gen_reg_rtx (V8HImode);
44815 t2 = gen_reg_rtx (V8HImode);
44816 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
44817 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
44818 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
44819 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
44820 if (odd)
44821 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
44822 else
44823 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
44824 emit_insn (t3);
44825 }
44826 break;
44827
44828 case V16QImode:
44829 if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
44830 return expand_vec_perm_pshufb2 (d);
44831 else
44832 {
44833 if (d->testing_p)
44834 break;
44835 t1 = gen_reg_rtx (V16QImode);
44836 t2 = gen_reg_rtx (V16QImode);
44837 t3 = gen_reg_rtx (V16QImode);
44838 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
44839 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
44840 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
44841 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
44842 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
44843 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
44844 if (odd)
44845 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
44846 else
44847 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
44848 emit_insn (t3);
44849 }
44850 break;
44851
44852 case V16HImode:
44853 case V32QImode:
44854 return expand_vec_perm_vpshufb2_vpermq_even_odd (d);
44855
44856 case V4DImode:
44857 if (!TARGET_AVX2)
44858 {
44859 struct expand_vec_perm_d d_copy = *d;
44860 d_copy.vmode = V4DFmode;
44861 if (d->testing_p)
44862 d_copy.target = gen_lowpart (V4DFmode, d->target);
44863 else
44864 d_copy.target = gen_reg_rtx (V4DFmode);
44865 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
44866 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
44867 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44868 {
44869 if (!d->testing_p)
44870 emit_move_insn (d->target,
44871 gen_lowpart (V4DImode, d_copy.target));
44872 return true;
44873 }
44874 return false;
44875 }
44876
44877 if (d->testing_p)
44878 break;
44879
44880 t1 = gen_reg_rtx (V4DImode);
44881 t2 = gen_reg_rtx (V4DImode);
44882
44883 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44884 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
44885 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
44886
44887 /* Now an vpunpck[lh]qdq will produce the result required. */
44888 if (odd)
44889 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
44890 else
44891 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
44892 emit_insn (t3);
44893 break;
44894
44895 case V8SImode:
44896 if (!TARGET_AVX2)
44897 {
44898 struct expand_vec_perm_d d_copy = *d;
44899 d_copy.vmode = V8SFmode;
44900 if (d->testing_p)
44901 d_copy.target = gen_lowpart (V8SFmode, d->target);
44902 else
44903 d_copy.target = gen_reg_rtx (V8SFmode);
44904 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
44905 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
44906 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44907 {
44908 if (!d->testing_p)
44909 emit_move_insn (d->target,
44910 gen_lowpart (V8SImode, d_copy.target));
44911 return true;
44912 }
44913 return false;
44914 }
44915
44916 if (d->testing_p)
44917 break;
44918
44919 t1 = gen_reg_rtx (V8SImode);
44920 t2 = gen_reg_rtx (V8SImode);
44921 t3 = gen_reg_rtx (V4DImode);
44922 t4 = gen_reg_rtx (V4DImode);
44923 t5 = gen_reg_rtx (V4DImode);
44924
44925 /* Shuffle the lanes around into
44926 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
44927 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
44928 gen_lowpart (V4DImode, d->op1),
44929 GEN_INT (0x20)));
44930 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
44931 gen_lowpart (V4DImode, d->op1),
44932 GEN_INT (0x31)));
44933
44934 /* Swap the 2nd and 3rd position in each lane into
44935 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
44936 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
44937 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44938 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
44939 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44940
44941 /* Now an vpunpck[lh]qdq will produce
44942 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
44943 if (odd)
44944 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
44945 gen_lowpart (V4DImode, t2));
44946 else
44947 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
44948 gen_lowpart (V4DImode, t2));
44949 emit_insn (t3);
44950 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
44951 break;
44952
44953 default:
44954 gcc_unreachable ();
44955 }
44956
44957 return true;
44958 }
44959
44960 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44961 extract-even and extract-odd permutations. */
44962
44963 static bool
44964 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
44965 {
44966 unsigned i, odd, nelt = d->nelt;
44967
44968 odd = d->perm[0];
44969 if (odd != 0 && odd != 1)
44970 return false;
44971
44972 for (i = 1; i < nelt; ++i)
44973 if (d->perm[i] != 2 * i + odd)
44974 return false;
44975
44976 return expand_vec_perm_even_odd_1 (d, odd);
44977 }
44978
44979 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
44980 permutations. We assume that expand_vec_perm_1 has already failed. */
44981
44982 static bool
44983 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
44984 {
44985 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
44986 enum machine_mode vmode = d->vmode;
44987 unsigned char perm2[4];
44988 rtx op0 = d->op0, dest;
44989 bool ok;
44990
44991 switch (vmode)
44992 {
44993 case V4DFmode:
44994 case V8SFmode:
44995 /* These are special-cased in sse.md so that we can optionally
44996 use the vbroadcast instruction. They expand to two insns
44997 if the input happens to be in a register. */
44998 gcc_unreachable ();
44999
45000 case V2DFmode:
45001 case V2DImode:
45002 case V4SFmode:
45003 case V4SImode:
45004 /* These are always implementable using standard shuffle patterns. */
45005 gcc_unreachable ();
45006
45007 case V8HImode:
45008 case V16QImode:
45009 /* These can be implemented via interleave. We save one insn by
45010 stopping once we have promoted to V4SImode and then use pshufd. */
45011 if (d->testing_p)
45012 return true;
45013 do
45014 {
45015 rtx dest;
45016 rtx (*gen) (rtx, rtx, rtx)
45017 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
45018 : gen_vec_interleave_lowv8hi;
45019
45020 if (elt >= nelt2)
45021 {
45022 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
45023 : gen_vec_interleave_highv8hi;
45024 elt -= nelt2;
45025 }
45026 nelt2 /= 2;
45027
45028 dest = gen_reg_rtx (vmode);
45029 emit_insn (gen (dest, op0, op0));
45030 vmode = get_mode_wider_vector (vmode);
45031 op0 = gen_lowpart (vmode, dest);
45032 }
45033 while (vmode != V4SImode);
45034
45035 memset (perm2, elt, 4);
45036 dest = gen_reg_rtx (V4SImode);
45037 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
45038 gcc_assert (ok);
45039 if (!d->testing_p)
45040 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
45041 return true;
45042
45043 case V32QImode:
45044 case V16HImode:
45045 case V8SImode:
45046 case V4DImode:
45047 /* For AVX2 broadcasts of the first element vpbroadcast* or
45048 vpermq should be used by expand_vec_perm_1. */
45049 gcc_assert (!TARGET_AVX2 || d->perm[0]);
45050 return false;
45051
45052 default:
45053 gcc_unreachable ();
45054 }
45055 }
45056
45057 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
45058 broadcast permutations. */
45059
45060 static bool
45061 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
45062 {
45063 unsigned i, elt, nelt = d->nelt;
45064
45065 if (!d->one_operand_p)
45066 return false;
45067
45068 elt = d->perm[0];
45069 for (i = 1; i < nelt; ++i)
45070 if (d->perm[i] != elt)
45071 return false;
45072
45073 return expand_vec_perm_broadcast_1 (d);
45074 }
45075
45076 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
45077 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
45078 all the shorter instruction sequences. */
45079
45080 static bool
45081 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
45082 {
45083 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
45084 unsigned int i, nelt, eltsz;
45085 bool used[4];
45086
45087 if (!TARGET_AVX2
45088 || d->one_operand_p
45089 || (d->vmode != V32QImode && d->vmode != V16HImode))
45090 return false;
45091
45092 if (d->testing_p)
45093 return true;
45094
45095 nelt = d->nelt;
45096 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
45097
45098 /* Generate 4 permutation masks. If the required element is within
45099 the same lane, it is shuffled in. If the required element from the
45100 other lane, force a zero by setting bit 7 in the permutation mask.
45101 In the other mask the mask has non-negative elements if element
45102 is requested from the other lane, but also moved to the other lane,
45103 so that the result of vpshufb can have the two V2TImode halves
45104 swapped. */
45105 m128 = GEN_INT (-128);
45106 for (i = 0; i < 32; ++i)
45107 {
45108 rperm[0][i] = m128;
45109 rperm[1][i] = m128;
45110 rperm[2][i] = m128;
45111 rperm[3][i] = m128;
45112 }
45113 used[0] = false;
45114 used[1] = false;
45115 used[2] = false;
45116 used[3] = false;
45117 for (i = 0; i < nelt; ++i)
45118 {
45119 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
45120 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
45121 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
45122
45123 for (j = 0; j < eltsz; ++j)
45124 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
45125 used[which] = true;
45126 }
45127
45128 for (i = 0; i < 2; ++i)
45129 {
45130 if (!used[2 * i + 1])
45131 {
45132 h[i] = NULL_RTX;
45133 continue;
45134 }
45135 vperm = gen_rtx_CONST_VECTOR (V32QImode,
45136 gen_rtvec_v (32, rperm[2 * i + 1]));
45137 vperm = force_reg (V32QImode, vperm);
45138 h[i] = gen_reg_rtx (V32QImode);
45139 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
45140 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
45141 }
45142
45143 /* Swap the 128-byte lanes of h[X]. */
45144 for (i = 0; i < 2; ++i)
45145 {
45146 if (h[i] == NULL_RTX)
45147 continue;
45148 op = gen_reg_rtx (V4DImode);
45149 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
45150 const2_rtx, GEN_INT (3), const0_rtx,
45151 const1_rtx));
45152 h[i] = gen_lowpart (V32QImode, op);
45153 }
45154
45155 for (i = 0; i < 2; ++i)
45156 {
45157 if (!used[2 * i])
45158 {
45159 l[i] = NULL_RTX;
45160 continue;
45161 }
45162 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
45163 vperm = force_reg (V32QImode, vperm);
45164 l[i] = gen_reg_rtx (V32QImode);
45165 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
45166 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
45167 }
45168
45169 for (i = 0; i < 2; ++i)
45170 {
45171 if (h[i] && l[i])
45172 {
45173 op = gen_reg_rtx (V32QImode);
45174 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
45175 l[i] = op;
45176 }
45177 else if (h[i])
45178 l[i] = h[i];
45179 }
45180
45181 gcc_assert (l[0] && l[1]);
45182 op = d->target;
45183 if (d->vmode != V32QImode)
45184 op = gen_reg_rtx (V32QImode);
45185 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
45186 if (op != d->target)
45187 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
45188 return true;
45189 }
45190
45191 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
45192 With all of the interface bits taken care of, perform the expansion
45193 in D and return true on success. */
45194
45195 static bool
45196 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
45197 {
45198 /* Try a single instruction expansion. */
45199 if (expand_vec_perm_1 (d))
45200 return true;
45201
45202 /* Try sequences of two instructions. */
45203
45204 if (expand_vec_perm_pshuflw_pshufhw (d))
45205 return true;
45206
45207 if (expand_vec_perm_palignr (d, false))
45208 return true;
45209
45210 if (expand_vec_perm_interleave2 (d))
45211 return true;
45212
45213 if (expand_vec_perm_broadcast (d))
45214 return true;
45215
45216 if (expand_vec_perm_vpermq_perm_1 (d))
45217 return true;
45218
45219 if (expand_vec_perm_vperm2f128 (d))
45220 return true;
45221
45222 if (expand_vec_perm_pblendv (d))
45223 return true;
45224
45225 /* Try sequences of three instructions. */
45226
45227 if (expand_vec_perm_2vperm2f128_vshuf (d))
45228 return true;
45229
45230 if (expand_vec_perm_pshufb2 (d))
45231 return true;
45232
45233 if (expand_vec_perm_interleave3 (d))
45234 return true;
45235
45236 if (expand_vec_perm_vperm2f128_vblend (d))
45237 return true;
45238
45239 /* Try sequences of four instructions. */
45240
45241 if (expand_vec_perm_vpshufb2_vpermq (d))
45242 return true;
45243
45244 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
45245 return true;
45246
45247 /* ??? Look for narrow permutations whose element orderings would
45248 allow the promotion to a wider mode. */
45249
45250 /* ??? Look for sequences of interleave or a wider permute that place
45251 the data into the correct lanes for a half-vector shuffle like
45252 pshuf[lh]w or vpermilps. */
45253
45254 /* ??? Look for sequences of interleave that produce the desired results.
45255 The combinatorics of punpck[lh] get pretty ugly... */
45256
45257 if (expand_vec_perm_even_odd (d))
45258 return true;
45259
45260 /* Even longer sequences. */
45261 if (expand_vec_perm_vpshufb4_vpermq2 (d))
45262 return true;
45263
45264 return false;
45265 }
45266
45267 /* If a permutation only uses one operand, make it clear. Returns true
45268 if the permutation references both operands. */
45269
45270 static bool
45271 canonicalize_perm (struct expand_vec_perm_d *d)
45272 {
45273 int i, which, nelt = d->nelt;
45274
45275 for (i = which = 0; i < nelt; ++i)
45276 which |= (d->perm[i] < nelt ? 1 : 2);
45277
45278 d->one_operand_p = true;
45279 switch (which)
45280 {
45281 default:
45282 gcc_unreachable();
45283
45284 case 3:
45285 if (!rtx_equal_p (d->op0, d->op1))
45286 {
45287 d->one_operand_p = false;
45288 break;
45289 }
45290 /* The elements of PERM do not suggest that only the first operand
45291 is used, but both operands are identical. Allow easier matching
45292 of the permutation by folding the permutation into the single
45293 input vector. */
45294 /* FALLTHRU */
45295
45296 case 2:
45297 for (i = 0; i < nelt; ++i)
45298 d->perm[i] &= nelt - 1;
45299 d->op0 = d->op1;
45300 break;
45301
45302 case 1:
45303 d->op1 = d->op0;
45304 break;
45305 }
45306
45307 return (which == 3);
45308 }
45309
45310 bool
45311 ix86_expand_vec_perm_const (rtx operands[4])
45312 {
45313 struct expand_vec_perm_d d;
45314 unsigned char perm[MAX_VECT_LEN];
45315 int i, nelt;
45316 bool two_args;
45317 rtx sel;
45318
45319 d.target = operands[0];
45320 d.op0 = operands[1];
45321 d.op1 = operands[2];
45322 sel = operands[3];
45323
45324 d.vmode = GET_MODE (d.target);
45325 gcc_assert (VECTOR_MODE_P (d.vmode));
45326 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
45327 d.testing_p = false;
45328
45329 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
45330 gcc_assert (XVECLEN (sel, 0) == nelt);
45331 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
45332
45333 for (i = 0; i < nelt; ++i)
45334 {
45335 rtx e = XVECEXP (sel, 0, i);
45336 int ei = INTVAL (e) & (2 * nelt - 1);
45337 d.perm[i] = ei;
45338 perm[i] = ei;
45339 }
45340
45341 two_args = canonicalize_perm (&d);
45342
45343 if (ix86_expand_vec_perm_const_1 (&d))
45344 return true;
45345
45346 /* If the selector says both arguments are needed, but the operands are the
45347 same, the above tried to expand with one_operand_p and flattened selector.
45348 If that didn't work, retry without one_operand_p; we succeeded with that
45349 during testing. */
45350 if (two_args && d.one_operand_p)
45351 {
45352 d.one_operand_p = false;
45353 memcpy (d.perm, perm, sizeof (perm));
45354 return ix86_expand_vec_perm_const_1 (&d);
45355 }
45356
45357 return false;
45358 }
45359
45360 /* Implement targetm.vectorize.vec_perm_const_ok. */
45361
45362 static bool
45363 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
45364 const unsigned char *sel)
45365 {
45366 struct expand_vec_perm_d d;
45367 unsigned int i, nelt, which;
45368 bool ret;
45369
45370 d.vmode = vmode;
45371 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
45372 d.testing_p = true;
45373
45374 /* Given sufficient ISA support we can just return true here
45375 for selected vector modes. */
45376 switch (d.vmode)
45377 {
45378 case V16SFmode:
45379 case V16SImode:
45380 case V8DImode:
45381 case V8DFmode:
45382 if (TARGET_AVX512F)
45383 /* All implementable with a single vpermi2 insn. */
45384 return true;
45385 break;
45386 case V32HImode:
45387 if (TARGET_AVX512BW)
45388 /* All implementable with a single vpermi2 insn. */
45389 return true;
45390 break;
45391 case V8SImode:
45392 case V8SFmode:
45393 case V4DFmode:
45394 case V4DImode:
45395 if (TARGET_AVX512VL)
45396 /* All implementable with a single vpermi2 insn. */
45397 return true;
45398 break;
45399 case V16HImode:
45400 if (TARGET_AVX2)
45401 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
45402 return true;
45403 break;
45404 case V32QImode:
45405 if (TARGET_AVX2)
45406 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
45407 return true;
45408 break;
45409 case V4SImode:
45410 case V4SFmode:
45411 case V8HImode:
45412 case V16QImode:
45413 /* All implementable with a single vpperm insn. */
45414 if (TARGET_XOP)
45415 return true;
45416 /* All implementable with 2 pshufb + 1 ior. */
45417 if (TARGET_SSSE3)
45418 return true;
45419 break;
45420 case V2DImode:
45421 case V2DFmode:
45422 /* All implementable with shufpd or unpck[lh]pd. */
45423 return true;
45424 default:
45425 return false;
45426 }
45427
45428 /* Extract the values from the vector CST into the permutation
45429 array in D. */
45430 memcpy (d.perm, sel, nelt);
45431 for (i = which = 0; i < nelt; ++i)
45432 {
45433 unsigned char e = d.perm[i];
45434 gcc_assert (e < 2 * nelt);
45435 which |= (e < nelt ? 1 : 2);
45436 }
45437
45438 /* For all elements from second vector, fold the elements to first. */
45439 if (which == 2)
45440 for (i = 0; i < nelt; ++i)
45441 d.perm[i] -= nelt;
45442
45443 /* Check whether the mask can be applied to the vector type. */
45444 d.one_operand_p = (which != 3);
45445
45446 /* Implementable with shufps or pshufd. */
45447 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
45448 return true;
45449
45450 /* Otherwise we have to go through the motions and see if we can
45451 figure out how to generate the requested permutation. */
45452 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
45453 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
45454 if (!d.one_operand_p)
45455 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
45456
45457 start_sequence ();
45458 ret = ix86_expand_vec_perm_const_1 (&d);
45459 end_sequence ();
45460
45461 return ret;
45462 }
45463
45464 void
45465 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
45466 {
45467 struct expand_vec_perm_d d;
45468 unsigned i, nelt;
45469
45470 d.target = targ;
45471 d.op0 = op0;
45472 d.op1 = op1;
45473 d.vmode = GET_MODE (targ);
45474 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
45475 d.one_operand_p = false;
45476 d.testing_p = false;
45477
45478 for (i = 0; i < nelt; ++i)
45479 d.perm[i] = i * 2 + odd;
45480
45481 /* We'll either be able to implement the permutation directly... */
45482 if (expand_vec_perm_1 (&d))
45483 return;
45484
45485 /* ... or we use the special-case patterns. */
45486 expand_vec_perm_even_odd_1 (&d, odd);
45487 }
45488
45489 static void
45490 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
45491 {
45492 struct expand_vec_perm_d d;
45493 unsigned i, nelt, base;
45494 bool ok;
45495
45496 d.target = targ;
45497 d.op0 = op0;
45498 d.op1 = op1;
45499 d.vmode = GET_MODE (targ);
45500 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
45501 d.one_operand_p = false;
45502 d.testing_p = false;
45503
45504 base = high_p ? nelt / 2 : 0;
45505 for (i = 0; i < nelt / 2; ++i)
45506 {
45507 d.perm[i * 2] = i + base;
45508 d.perm[i * 2 + 1] = i + base + nelt;
45509 }
45510
45511 /* Note that for AVX this isn't one instruction. */
45512 ok = ix86_expand_vec_perm_const_1 (&d);
45513 gcc_assert (ok);
45514 }
45515
45516
45517 /* Expand a vector operation CODE for a V*QImode in terms of the
45518 same operation on V*HImode. */
45519
45520 void
45521 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
45522 {
45523 enum machine_mode qimode = GET_MODE (dest);
45524 enum machine_mode himode;
45525 rtx (*gen_il) (rtx, rtx, rtx);
45526 rtx (*gen_ih) (rtx, rtx, rtx);
45527 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
45528 struct expand_vec_perm_d d;
45529 bool ok, full_interleave;
45530 bool uns_p = false;
45531 int i;
45532
45533 switch (qimode)
45534 {
45535 case V16QImode:
45536 himode = V8HImode;
45537 gen_il = gen_vec_interleave_lowv16qi;
45538 gen_ih = gen_vec_interleave_highv16qi;
45539 break;
45540 case V32QImode:
45541 himode = V16HImode;
45542 gen_il = gen_avx2_interleave_lowv32qi;
45543 gen_ih = gen_avx2_interleave_highv32qi;
45544 break;
45545 case V64QImode:
45546 himode = V32HImode;
45547 gen_il = gen_avx512bw_interleave_lowv64qi;
45548 gen_ih = gen_avx512bw_interleave_highv64qi;
45549 break;
45550 default:
45551 gcc_unreachable ();
45552 }
45553
45554 op2_l = op2_h = op2;
45555 switch (code)
45556 {
45557 case MULT:
45558 /* Unpack data such that we've got a source byte in each low byte of
45559 each word. We don't care what goes into the high byte of each word.
45560 Rather than trying to get zero in there, most convenient is to let
45561 it be a copy of the low byte. */
45562 op2_l = gen_reg_rtx (qimode);
45563 op2_h = gen_reg_rtx (qimode);
45564 emit_insn (gen_il (op2_l, op2, op2));
45565 emit_insn (gen_ih (op2_h, op2, op2));
45566 /* FALLTHRU */
45567
45568 op1_l = gen_reg_rtx (qimode);
45569 op1_h = gen_reg_rtx (qimode);
45570 emit_insn (gen_il (op1_l, op1, op1));
45571 emit_insn (gen_ih (op1_h, op1, op1));
45572 full_interleave = qimode == V16QImode;
45573 break;
45574
45575 case ASHIFT:
45576 case LSHIFTRT:
45577 uns_p = true;
45578 /* FALLTHRU */
45579 case ASHIFTRT:
45580 op1_l = gen_reg_rtx (himode);
45581 op1_h = gen_reg_rtx (himode);
45582 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
45583 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
45584 full_interleave = true;
45585 break;
45586 default:
45587 gcc_unreachable ();
45588 }
45589
45590 /* Perform the operation. */
45591 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
45592 1, OPTAB_DIRECT);
45593 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
45594 1, OPTAB_DIRECT);
45595 gcc_assert (res_l && res_h);
45596
45597 /* Merge the data back into the right place. */
45598 d.target = dest;
45599 d.op0 = gen_lowpart (qimode, res_l);
45600 d.op1 = gen_lowpart (qimode, res_h);
45601 d.vmode = qimode;
45602 d.nelt = GET_MODE_NUNITS (qimode);
45603 d.one_operand_p = false;
45604 d.testing_p = false;
45605
45606 if (full_interleave)
45607 {
45608 /* For SSE2, we used an full interleave, so the desired
45609 results are in the even elements. */
45610 for (i = 0; i < 64; ++i)
45611 d.perm[i] = i * 2;
45612 }
45613 else
45614 {
45615 /* For AVX, the interleave used above was not cross-lane. So the
45616 extraction is evens but with the second and third quarter swapped.
45617 Happily, that is even one insn shorter than even extraction. */
45618 for (i = 0; i < 64; ++i)
45619 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
45620 }
45621
45622 ok = ix86_expand_vec_perm_const_1 (&d);
45623 gcc_assert (ok);
45624
45625 set_unique_reg_note (get_last_insn (), REG_EQUAL,
45626 gen_rtx_fmt_ee (code, qimode, op1, op2));
45627 }
45628
45629 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
45630 if op is CONST_VECTOR with all odd elements equal to their
45631 preceding element. */
45632
45633 static bool
45634 const_vector_equal_evenodd_p (rtx op)
45635 {
45636 enum machine_mode mode = GET_MODE (op);
45637 int i, nunits = GET_MODE_NUNITS (mode);
45638 if (GET_CODE (op) != CONST_VECTOR
45639 || nunits != CONST_VECTOR_NUNITS (op))
45640 return false;
45641 for (i = 0; i < nunits; i += 2)
45642 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
45643 return false;
45644 return true;
45645 }
45646
45647 void
45648 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
45649 bool uns_p, bool odd_p)
45650 {
45651 enum machine_mode mode = GET_MODE (op1);
45652 enum machine_mode wmode = GET_MODE (dest);
45653 rtx x;
45654 rtx orig_op1 = op1, orig_op2 = op2;
45655
45656 if (!nonimmediate_operand (op1, mode))
45657 op1 = force_reg (mode, op1);
45658 if (!nonimmediate_operand (op2, mode))
45659 op2 = force_reg (mode, op2);
45660
45661 /* We only play even/odd games with vectors of SImode. */
45662 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
45663
45664 /* If we're looking for the odd results, shift those members down to
45665 the even slots. For some cpus this is faster than a PSHUFD. */
45666 if (odd_p)
45667 {
45668 /* For XOP use vpmacsdqh, but only for smult, as it is only
45669 signed. */
45670 if (TARGET_XOP && mode == V4SImode && !uns_p)
45671 {
45672 x = force_reg (wmode, CONST0_RTX (wmode));
45673 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
45674 return;
45675 }
45676
45677 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
45678 if (!const_vector_equal_evenodd_p (orig_op1))
45679 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
45680 x, NULL, 1, OPTAB_DIRECT);
45681 if (!const_vector_equal_evenodd_p (orig_op2))
45682 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
45683 x, NULL, 1, OPTAB_DIRECT);
45684 op1 = gen_lowpart (mode, op1);
45685 op2 = gen_lowpart (mode, op2);
45686 }
45687
45688 if (mode == V16SImode)
45689 {
45690 if (uns_p)
45691 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
45692 else
45693 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
45694 }
45695 else if (mode == V8SImode)
45696 {
45697 if (uns_p)
45698 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
45699 else
45700 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
45701 }
45702 else if (uns_p)
45703 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
45704 else if (TARGET_SSE4_1)
45705 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
45706 else
45707 {
45708 rtx s1, s2, t0, t1, t2;
45709
45710 /* The easiest way to implement this without PMULDQ is to go through
45711 the motions as if we are performing a full 64-bit multiply. With
45712 the exception that we need to do less shuffling of the elements. */
45713
45714 /* Compute the sign-extension, aka highparts, of the two operands. */
45715 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
45716 op1, pc_rtx, pc_rtx);
45717 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
45718 op2, pc_rtx, pc_rtx);
45719
45720 /* Multiply LO(A) * HI(B), and vice-versa. */
45721 t1 = gen_reg_rtx (wmode);
45722 t2 = gen_reg_rtx (wmode);
45723 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
45724 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
45725
45726 /* Multiply LO(A) * LO(B). */
45727 t0 = gen_reg_rtx (wmode);
45728 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
45729
45730 /* Combine and shift the highparts into place. */
45731 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
45732 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
45733 1, OPTAB_DIRECT);
45734
45735 /* Combine high and low parts. */
45736 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
45737 return;
45738 }
45739 emit_insn (x);
45740 }
45741
45742 void
45743 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
45744 bool uns_p, bool high_p)
45745 {
45746 enum machine_mode wmode = GET_MODE (dest);
45747 enum machine_mode mode = GET_MODE (op1);
45748 rtx t1, t2, t3, t4, mask;
45749
45750 switch (mode)
45751 {
45752 case V4SImode:
45753 t1 = gen_reg_rtx (mode);
45754 t2 = gen_reg_rtx (mode);
45755 if (TARGET_XOP && !uns_p)
45756 {
45757 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
45758 shuffle the elements once so that all elements are in the right
45759 place for immediate use: { A C B D }. */
45760 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
45761 const1_rtx, GEN_INT (3)));
45762 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
45763 const1_rtx, GEN_INT (3)));
45764 }
45765 else
45766 {
45767 /* Put the elements into place for the multiply. */
45768 ix86_expand_vec_interleave (t1, op1, op1, high_p);
45769 ix86_expand_vec_interleave (t2, op2, op2, high_p);
45770 high_p = false;
45771 }
45772 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
45773 break;
45774
45775 case V8SImode:
45776 /* Shuffle the elements between the lanes. After this we
45777 have { A B E F | C D G H } for each operand. */
45778 t1 = gen_reg_rtx (V4DImode);
45779 t2 = gen_reg_rtx (V4DImode);
45780 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
45781 const0_rtx, const2_rtx,
45782 const1_rtx, GEN_INT (3)));
45783 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
45784 const0_rtx, const2_rtx,
45785 const1_rtx, GEN_INT (3)));
45786
45787 /* Shuffle the elements within the lanes. After this we
45788 have { A A B B | C C D D } or { E E F F | G G H H }. */
45789 t3 = gen_reg_rtx (V8SImode);
45790 t4 = gen_reg_rtx (V8SImode);
45791 mask = GEN_INT (high_p
45792 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
45793 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
45794 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
45795 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
45796
45797 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
45798 break;
45799
45800 case V8HImode:
45801 case V16HImode:
45802 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
45803 uns_p, OPTAB_DIRECT);
45804 t2 = expand_binop (mode,
45805 uns_p ? umul_highpart_optab : smul_highpart_optab,
45806 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
45807 gcc_assert (t1 && t2);
45808
45809 t3 = gen_reg_rtx (mode);
45810 ix86_expand_vec_interleave (t3, t1, t2, high_p);
45811 emit_move_insn (dest, gen_lowpart (wmode, t3));
45812 break;
45813
45814 case V16QImode:
45815 case V32QImode:
45816 case V32HImode:
45817 case V16SImode:
45818 case V64QImode:
45819 t1 = gen_reg_rtx (wmode);
45820 t2 = gen_reg_rtx (wmode);
45821 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
45822 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
45823
45824 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
45825 break;
45826
45827 default:
45828 gcc_unreachable ();
45829 }
45830 }
45831
45832 void
45833 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
45834 {
45835 rtx res_1, res_2, res_3, res_4;
45836
45837 res_1 = gen_reg_rtx (V4SImode);
45838 res_2 = gen_reg_rtx (V4SImode);
45839 res_3 = gen_reg_rtx (V2DImode);
45840 res_4 = gen_reg_rtx (V2DImode);
45841 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
45842 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
45843
45844 /* Move the results in element 2 down to element 1; we don't care
45845 what goes in elements 2 and 3. Then we can merge the parts
45846 back together with an interleave.
45847
45848 Note that two other sequences were tried:
45849 (1) Use interleaves at the start instead of psrldq, which allows
45850 us to use a single shufps to merge things back at the end.
45851 (2) Use shufps here to combine the two vectors, then pshufd to
45852 put the elements in the correct order.
45853 In both cases the cost of the reformatting stall was too high
45854 and the overall sequence slower. */
45855
45856 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
45857 const0_rtx, const2_rtx,
45858 const0_rtx, const0_rtx));
45859 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
45860 const0_rtx, const2_rtx,
45861 const0_rtx, const0_rtx));
45862 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
45863
45864 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
45865 }
45866
45867 void
45868 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
45869 {
45870 enum machine_mode mode = GET_MODE (op0);
45871 rtx t1, t2, t3, t4, t5, t6;
45872
45873 if (TARGET_AVX512DQ && mode == V8DImode)
45874 emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
45875 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
45876 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
45877 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
45878 emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
45879 else if (TARGET_XOP && mode == V2DImode)
45880 {
45881 /* op1: A,B,C,D, op2: E,F,G,H */
45882 op1 = gen_lowpart (V4SImode, op1);
45883 op2 = gen_lowpart (V4SImode, op2);
45884
45885 t1 = gen_reg_rtx (V4SImode);
45886 t2 = gen_reg_rtx (V4SImode);
45887 t3 = gen_reg_rtx (V2DImode);
45888 t4 = gen_reg_rtx (V2DImode);
45889
45890 /* t1: B,A,D,C */
45891 emit_insn (gen_sse2_pshufd_1 (t1, op1,
45892 GEN_INT (1),
45893 GEN_INT (0),
45894 GEN_INT (3),
45895 GEN_INT (2)));
45896
45897 /* t2: (B*E),(A*F),(D*G),(C*H) */
45898 emit_insn (gen_mulv4si3 (t2, t1, op2));
45899
45900 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
45901 emit_insn (gen_xop_phadddq (t3, t2));
45902
45903 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
45904 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
45905
45906 /* Multiply lower parts and add all */
45907 t5 = gen_reg_rtx (V2DImode);
45908 emit_insn (gen_vec_widen_umult_even_v4si (t5,
45909 gen_lowpart (V4SImode, op1),
45910 gen_lowpart (V4SImode, op2)));
45911 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
45912
45913 }
45914 else
45915 {
45916 enum machine_mode nmode;
45917 rtx (*umul) (rtx, rtx, rtx);
45918
45919 if (mode == V2DImode)
45920 {
45921 umul = gen_vec_widen_umult_even_v4si;
45922 nmode = V4SImode;
45923 }
45924 else if (mode == V4DImode)
45925 {
45926 umul = gen_vec_widen_umult_even_v8si;
45927 nmode = V8SImode;
45928 }
45929 else if (mode == V8DImode)
45930 {
45931 umul = gen_vec_widen_umult_even_v16si;
45932 nmode = V16SImode;
45933 }
45934 else
45935 gcc_unreachable ();
45936
45937
45938 /* Multiply low parts. */
45939 t1 = gen_reg_rtx (mode);
45940 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
45941
45942 /* Shift input vectors right 32 bits so we can multiply high parts. */
45943 t6 = GEN_INT (32);
45944 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
45945 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
45946
45947 /* Multiply high parts by low parts. */
45948 t4 = gen_reg_rtx (mode);
45949 t5 = gen_reg_rtx (mode);
45950 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
45951 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
45952
45953 /* Combine and shift the highparts back. */
45954 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
45955 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
45956
45957 /* Combine high and low parts. */
45958 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
45959 }
45960
45961 set_unique_reg_note (get_last_insn (), REG_EQUAL,
45962 gen_rtx_MULT (mode, op1, op2));
45963 }
45964
45965 /* Calculate integer abs() using only SSE2 instructions. */
45966
45967 void
45968 ix86_expand_sse2_abs (rtx target, rtx input)
45969 {
45970 enum machine_mode mode = GET_MODE (target);
45971 rtx tmp0, tmp1, x;
45972
45973 switch (mode)
45974 {
45975 /* For 32-bit signed integer X, the best way to calculate the absolute
45976 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
45977 case V4SImode:
45978 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
45979 GEN_INT (GET_MODE_BITSIZE
45980 (GET_MODE_INNER (mode)) - 1),
45981 NULL, 0, OPTAB_DIRECT);
45982 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
45983 NULL, 0, OPTAB_DIRECT);
45984 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
45985 target, 0, OPTAB_DIRECT);
45986 break;
45987
45988 /* For 16-bit signed integer X, the best way to calculate the absolute
45989 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
45990 case V8HImode:
45991 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45992
45993 x = expand_simple_binop (mode, SMAX, tmp0, input,
45994 target, 0, OPTAB_DIRECT);
45995 break;
45996
45997 /* For 8-bit signed integer X, the best way to calculate the absolute
45998 value of X is min ((unsigned char) X, (unsigned char) (-X)),
45999 as SSE2 provides the PMINUB insn. */
46000 case V16QImode:
46001 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
46002
46003 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
46004 target, 0, OPTAB_DIRECT);
46005 break;
46006
46007 default:
46008 gcc_unreachable ();
46009 }
46010
46011 if (x != target)
46012 emit_move_insn (target, x);
46013 }
46014
46015 /* Expand an insert into a vector register through pinsr insn.
46016 Return true if successful. */
46017
46018 bool
46019 ix86_expand_pinsr (rtx *operands)
46020 {
46021 rtx dst = operands[0];
46022 rtx src = operands[3];
46023
46024 unsigned int size = INTVAL (operands[1]);
46025 unsigned int pos = INTVAL (operands[2]);
46026
46027 if (GET_CODE (dst) == SUBREG)
46028 {
46029 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
46030 dst = SUBREG_REG (dst);
46031 }
46032
46033 if (GET_CODE (src) == SUBREG)
46034 src = SUBREG_REG (src);
46035
46036 switch (GET_MODE (dst))
46037 {
46038 case V16QImode:
46039 case V8HImode:
46040 case V4SImode:
46041 case V2DImode:
46042 {
46043 enum machine_mode srcmode, dstmode;
46044 rtx (*pinsr)(rtx, rtx, rtx, rtx);
46045
46046 srcmode = mode_for_size (size, MODE_INT, 0);
46047
46048 switch (srcmode)
46049 {
46050 case QImode:
46051 if (!TARGET_SSE4_1)
46052 return false;
46053 dstmode = V16QImode;
46054 pinsr = gen_sse4_1_pinsrb;
46055 break;
46056
46057 case HImode:
46058 if (!TARGET_SSE2)
46059 return false;
46060 dstmode = V8HImode;
46061 pinsr = gen_sse2_pinsrw;
46062 break;
46063
46064 case SImode:
46065 if (!TARGET_SSE4_1)
46066 return false;
46067 dstmode = V4SImode;
46068 pinsr = gen_sse4_1_pinsrd;
46069 break;
46070
46071 case DImode:
46072 gcc_assert (TARGET_64BIT);
46073 if (!TARGET_SSE4_1)
46074 return false;
46075 dstmode = V2DImode;
46076 pinsr = gen_sse4_1_pinsrq;
46077 break;
46078
46079 default:
46080 return false;
46081 }
46082
46083 rtx d = dst;
46084 if (GET_MODE (dst) != dstmode)
46085 d = gen_reg_rtx (dstmode);
46086 src = gen_lowpart (srcmode, src);
46087
46088 pos /= size;
46089
46090 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
46091 GEN_INT (1 << pos)));
46092 if (d != dst)
46093 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
46094 return true;
46095 }
46096
46097 default:
46098 return false;
46099 }
46100 }
46101 \f
46102 /* This function returns the calling abi specific va_list type node.
46103 It returns the FNDECL specific va_list type. */
46104
46105 static tree
46106 ix86_fn_abi_va_list (tree fndecl)
46107 {
46108 if (!TARGET_64BIT)
46109 return va_list_type_node;
46110 gcc_assert (fndecl != NULL_TREE);
46111
46112 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
46113 return ms_va_list_type_node;
46114 else
46115 return sysv_va_list_type_node;
46116 }
46117
46118 /* Returns the canonical va_list type specified by TYPE. If there
46119 is no valid TYPE provided, it return NULL_TREE. */
46120
46121 static tree
46122 ix86_canonical_va_list_type (tree type)
46123 {
46124 tree wtype, htype;
46125
46126 /* Resolve references and pointers to va_list type. */
46127 if (TREE_CODE (type) == MEM_REF)
46128 type = TREE_TYPE (type);
46129 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
46130 type = TREE_TYPE (type);
46131 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
46132 type = TREE_TYPE (type);
46133
46134 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
46135 {
46136 wtype = va_list_type_node;
46137 gcc_assert (wtype != NULL_TREE);
46138 htype = type;
46139 if (TREE_CODE (wtype) == ARRAY_TYPE)
46140 {
46141 /* If va_list is an array type, the argument may have decayed
46142 to a pointer type, e.g. by being passed to another function.
46143 In that case, unwrap both types so that we can compare the
46144 underlying records. */
46145 if (TREE_CODE (htype) == ARRAY_TYPE
46146 || POINTER_TYPE_P (htype))
46147 {
46148 wtype = TREE_TYPE (wtype);
46149 htype = TREE_TYPE (htype);
46150 }
46151 }
46152 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
46153 return va_list_type_node;
46154 wtype = sysv_va_list_type_node;
46155 gcc_assert (wtype != NULL_TREE);
46156 htype = type;
46157 if (TREE_CODE (wtype) == ARRAY_TYPE)
46158 {
46159 /* If va_list is an array type, the argument may have decayed
46160 to a pointer type, e.g. by being passed to another function.
46161 In that case, unwrap both types so that we can compare the
46162 underlying records. */
46163 if (TREE_CODE (htype) == ARRAY_TYPE
46164 || POINTER_TYPE_P (htype))
46165 {
46166 wtype = TREE_TYPE (wtype);
46167 htype = TREE_TYPE (htype);
46168 }
46169 }
46170 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
46171 return sysv_va_list_type_node;
46172 wtype = ms_va_list_type_node;
46173 gcc_assert (wtype != NULL_TREE);
46174 htype = type;
46175 if (TREE_CODE (wtype) == ARRAY_TYPE)
46176 {
46177 /* If va_list is an array type, the argument may have decayed
46178 to a pointer type, e.g. by being passed to another function.
46179 In that case, unwrap both types so that we can compare the
46180 underlying records. */
46181 if (TREE_CODE (htype) == ARRAY_TYPE
46182 || POINTER_TYPE_P (htype))
46183 {
46184 wtype = TREE_TYPE (wtype);
46185 htype = TREE_TYPE (htype);
46186 }
46187 }
46188 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
46189 return ms_va_list_type_node;
46190 return NULL_TREE;
46191 }
46192 return std_canonical_va_list_type (type);
46193 }
46194
46195 /* Iterate through the target-specific builtin types for va_list.
46196 IDX denotes the iterator, *PTREE is set to the result type of
46197 the va_list builtin, and *PNAME to its internal type.
46198 Returns zero if there is no element for this index, otherwise
46199 IDX should be increased upon the next call.
46200 Note, do not iterate a base builtin's name like __builtin_va_list.
46201 Used from c_common_nodes_and_builtins. */
46202
46203 static int
46204 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
46205 {
46206 if (TARGET_64BIT)
46207 {
46208 switch (idx)
46209 {
46210 default:
46211 break;
46212
46213 case 0:
46214 *ptree = ms_va_list_type_node;
46215 *pname = "__builtin_ms_va_list";
46216 return 1;
46217
46218 case 1:
46219 *ptree = sysv_va_list_type_node;
46220 *pname = "__builtin_sysv_va_list";
46221 return 1;
46222 }
46223 }
46224
46225 return 0;
46226 }
46227
46228 #undef TARGET_SCHED_DISPATCH
46229 #define TARGET_SCHED_DISPATCH has_dispatch
46230 #undef TARGET_SCHED_DISPATCH_DO
46231 #define TARGET_SCHED_DISPATCH_DO do_dispatch
46232 #undef TARGET_SCHED_REASSOCIATION_WIDTH
46233 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
46234 #undef TARGET_SCHED_REORDER
46235 #define TARGET_SCHED_REORDER ix86_sched_reorder
46236 #undef TARGET_SCHED_ADJUST_PRIORITY
46237 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
46238 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
46239 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
46240 ix86_dependencies_evaluation_hook
46241
46242 /* The size of the dispatch window is the total number of bytes of
46243 object code allowed in a window. */
46244 #define DISPATCH_WINDOW_SIZE 16
46245
46246 /* Number of dispatch windows considered for scheduling. */
46247 #define MAX_DISPATCH_WINDOWS 3
46248
46249 /* Maximum number of instructions in a window. */
46250 #define MAX_INSN 4
46251
46252 /* Maximum number of immediate operands in a window. */
46253 #define MAX_IMM 4
46254
46255 /* Maximum number of immediate bits allowed in a window. */
46256 #define MAX_IMM_SIZE 128
46257
46258 /* Maximum number of 32 bit immediates allowed in a window. */
46259 #define MAX_IMM_32 4
46260
46261 /* Maximum number of 64 bit immediates allowed in a window. */
46262 #define MAX_IMM_64 2
46263
46264 /* Maximum total of loads or prefetches allowed in a window. */
46265 #define MAX_LOAD 2
46266
46267 /* Maximum total of stores allowed in a window. */
46268 #define MAX_STORE 1
46269
46270 #undef BIG
46271 #define BIG 100
46272
46273
46274 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
46275 enum dispatch_group {
46276 disp_no_group = 0,
46277 disp_load,
46278 disp_store,
46279 disp_load_store,
46280 disp_prefetch,
46281 disp_imm,
46282 disp_imm_32,
46283 disp_imm_64,
46284 disp_branch,
46285 disp_cmp,
46286 disp_jcc,
46287 disp_last
46288 };
46289
46290 /* Number of allowable groups in a dispatch window. It is an array
46291 indexed by dispatch_group enum. 100 is used as a big number,
46292 because the number of these kind of operations does not have any
46293 effect in dispatch window, but we need them for other reasons in
46294 the table. */
46295 static unsigned int num_allowable_groups[disp_last] = {
46296 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
46297 };
46298
46299 char group_name[disp_last + 1][16] = {
46300 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
46301 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
46302 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
46303 };
46304
46305 /* Instruction path. */
46306 enum insn_path {
46307 no_path = 0,
46308 path_single, /* Single micro op. */
46309 path_double, /* Double micro op. */
46310 path_multi, /* Instructions with more than 2 micro op.. */
46311 last_path
46312 };
46313
46314 /* sched_insn_info defines a window to the instructions scheduled in
46315 the basic block. It contains a pointer to the insn_info table and
46316 the instruction scheduled.
46317
46318 Windows are allocated for each basic block and are linked
46319 together. */
46320 typedef struct sched_insn_info_s {
46321 rtx insn;
46322 enum dispatch_group group;
46323 enum insn_path path;
46324 int byte_len;
46325 int imm_bytes;
46326 } sched_insn_info;
46327
46328 /* Linked list of dispatch windows. This is a two way list of
46329 dispatch windows of a basic block. It contains information about
46330 the number of uops in the window and the total number of
46331 instructions and of bytes in the object code for this dispatch
46332 window. */
46333 typedef struct dispatch_windows_s {
46334 int num_insn; /* Number of insn in the window. */
46335 int num_uops; /* Number of uops in the window. */
46336 int window_size; /* Number of bytes in the window. */
46337 int window_num; /* Window number between 0 or 1. */
46338 int num_imm; /* Number of immediates in an insn. */
46339 int num_imm_32; /* Number of 32 bit immediates in an insn. */
46340 int num_imm_64; /* Number of 64 bit immediates in an insn. */
46341 int imm_size; /* Total immediates in the window. */
46342 int num_loads; /* Total memory loads in the window. */
46343 int num_stores; /* Total memory stores in the window. */
46344 int violation; /* Violation exists in window. */
46345 sched_insn_info *window; /* Pointer to the window. */
46346 struct dispatch_windows_s *next;
46347 struct dispatch_windows_s *prev;
46348 } dispatch_windows;
46349
46350 /* Immediate valuse used in an insn. */
46351 typedef struct imm_info_s
46352 {
46353 int imm;
46354 int imm32;
46355 int imm64;
46356 } imm_info;
46357
46358 static dispatch_windows *dispatch_window_list;
46359 static dispatch_windows *dispatch_window_list1;
46360
46361 /* Get dispatch group of insn. */
46362
46363 static enum dispatch_group
46364 get_mem_group (rtx_insn *insn)
46365 {
46366 enum attr_memory memory;
46367
46368 if (INSN_CODE (insn) < 0)
46369 return disp_no_group;
46370 memory = get_attr_memory (insn);
46371 if (memory == MEMORY_STORE)
46372 return disp_store;
46373
46374 if (memory == MEMORY_LOAD)
46375 return disp_load;
46376
46377 if (memory == MEMORY_BOTH)
46378 return disp_load_store;
46379
46380 return disp_no_group;
46381 }
46382
46383 /* Return true if insn is a compare instruction. */
46384
46385 static bool
46386 is_cmp (rtx_insn *insn)
46387 {
46388 enum attr_type type;
46389
46390 type = get_attr_type (insn);
46391 return (type == TYPE_TEST
46392 || type == TYPE_ICMP
46393 || type == TYPE_FCMP
46394 || GET_CODE (PATTERN (insn)) == COMPARE);
46395 }
46396
46397 /* Return true if a dispatch violation encountered. */
46398
46399 static bool
46400 dispatch_violation (void)
46401 {
46402 if (dispatch_window_list->next)
46403 return dispatch_window_list->next->violation;
46404 return dispatch_window_list->violation;
46405 }
46406
46407 /* Return true if insn is a branch instruction. */
46408
46409 static bool
46410 is_branch (rtx insn)
46411 {
46412 return (CALL_P (insn) || JUMP_P (insn));
46413 }
46414
46415 /* Return true if insn is a prefetch instruction. */
46416
46417 static bool
46418 is_prefetch (rtx insn)
46419 {
46420 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
46421 }
46422
46423 /* This function initializes a dispatch window and the list container holding a
46424 pointer to the window. */
46425
46426 static void
46427 init_window (int window_num)
46428 {
46429 int i;
46430 dispatch_windows *new_list;
46431
46432 if (window_num == 0)
46433 new_list = dispatch_window_list;
46434 else
46435 new_list = dispatch_window_list1;
46436
46437 new_list->num_insn = 0;
46438 new_list->num_uops = 0;
46439 new_list->window_size = 0;
46440 new_list->next = NULL;
46441 new_list->prev = NULL;
46442 new_list->window_num = window_num;
46443 new_list->num_imm = 0;
46444 new_list->num_imm_32 = 0;
46445 new_list->num_imm_64 = 0;
46446 new_list->imm_size = 0;
46447 new_list->num_loads = 0;
46448 new_list->num_stores = 0;
46449 new_list->violation = false;
46450
46451 for (i = 0; i < MAX_INSN; i++)
46452 {
46453 new_list->window[i].insn = NULL;
46454 new_list->window[i].group = disp_no_group;
46455 new_list->window[i].path = no_path;
46456 new_list->window[i].byte_len = 0;
46457 new_list->window[i].imm_bytes = 0;
46458 }
46459 return;
46460 }
46461
46462 /* This function allocates and initializes a dispatch window and the
46463 list container holding a pointer to the window. */
46464
46465 static dispatch_windows *
46466 allocate_window (void)
46467 {
46468 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
46469 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
46470
46471 return new_list;
46472 }
46473
46474 /* This routine initializes the dispatch scheduling information. It
46475 initiates building dispatch scheduler tables and constructs the
46476 first dispatch window. */
46477
46478 static void
46479 init_dispatch_sched (void)
46480 {
46481 /* Allocate a dispatch list and a window. */
46482 dispatch_window_list = allocate_window ();
46483 dispatch_window_list1 = allocate_window ();
46484 init_window (0);
46485 init_window (1);
46486 }
46487
46488 /* This function returns true if a branch is detected. End of a basic block
46489 does not have to be a branch, but here we assume only branches end a
46490 window. */
46491
46492 static bool
46493 is_end_basic_block (enum dispatch_group group)
46494 {
46495 return group == disp_branch;
46496 }
46497
46498 /* This function is called when the end of a window processing is reached. */
46499
46500 static void
46501 process_end_window (void)
46502 {
46503 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
46504 if (dispatch_window_list->next)
46505 {
46506 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
46507 gcc_assert (dispatch_window_list->window_size
46508 + dispatch_window_list1->window_size <= 48);
46509 init_window (1);
46510 }
46511 init_window (0);
46512 }
46513
46514 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
46515 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
46516 for 48 bytes of instructions. Note that these windows are not dispatch
46517 windows that their sizes are DISPATCH_WINDOW_SIZE. */
46518
46519 static dispatch_windows *
46520 allocate_next_window (int window_num)
46521 {
46522 if (window_num == 0)
46523 {
46524 if (dispatch_window_list->next)
46525 init_window (1);
46526 init_window (0);
46527 return dispatch_window_list;
46528 }
46529
46530 dispatch_window_list->next = dispatch_window_list1;
46531 dispatch_window_list1->prev = dispatch_window_list;
46532
46533 return dispatch_window_list1;
46534 }
46535
46536 /* Increment the number of immediate operands of an instruction. */
46537
46538 static int
46539 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
46540 {
46541 if (*in_rtx == 0)
46542 return 0;
46543
46544 switch ( GET_CODE (*in_rtx))
46545 {
46546 case CONST:
46547 case SYMBOL_REF:
46548 case CONST_INT:
46549 (imm_values->imm)++;
46550 if (x86_64_immediate_operand (*in_rtx, SImode))
46551 (imm_values->imm32)++;
46552 else
46553 (imm_values->imm64)++;
46554 break;
46555
46556 case CONST_DOUBLE:
46557 (imm_values->imm)++;
46558 (imm_values->imm64)++;
46559 break;
46560
46561 case CODE_LABEL:
46562 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
46563 {
46564 (imm_values->imm)++;
46565 (imm_values->imm32)++;
46566 }
46567 break;
46568
46569 default:
46570 break;
46571 }
46572
46573 return 0;
46574 }
46575
46576 /* Compute number of immediate operands of an instruction. */
46577
46578 static void
46579 find_constant (rtx in_rtx, imm_info *imm_values)
46580 {
46581 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
46582 (rtx_function) find_constant_1, (void *) imm_values);
46583 }
46584
46585 /* Return total size of immediate operands of an instruction along with number
46586 of corresponding immediate-operands. It initializes its parameters to zero
46587 befor calling FIND_CONSTANT.
46588 INSN is the input instruction. IMM is the total of immediates.
46589 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
46590 bit immediates. */
46591
46592 static int
46593 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
46594 {
46595 imm_info imm_values = {0, 0, 0};
46596
46597 find_constant (insn, &imm_values);
46598 *imm = imm_values.imm;
46599 *imm32 = imm_values.imm32;
46600 *imm64 = imm_values.imm64;
46601 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
46602 }
46603
46604 /* This function indicates if an operand of an instruction is an
46605 immediate. */
46606
46607 static bool
46608 has_immediate (rtx insn)
46609 {
46610 int num_imm_operand;
46611 int num_imm32_operand;
46612 int num_imm64_operand;
46613
46614 if (insn)
46615 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46616 &num_imm64_operand);
46617 return false;
46618 }
46619
46620 /* Return single or double path for instructions. */
46621
46622 static enum insn_path
46623 get_insn_path (rtx_insn *insn)
46624 {
46625 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
46626
46627 if ((int)path == 0)
46628 return path_single;
46629
46630 if ((int)path == 1)
46631 return path_double;
46632
46633 return path_multi;
46634 }
46635
46636 /* Return insn dispatch group. */
46637
46638 static enum dispatch_group
46639 get_insn_group (rtx_insn *insn)
46640 {
46641 enum dispatch_group group = get_mem_group (insn);
46642 if (group)
46643 return group;
46644
46645 if (is_branch (insn))
46646 return disp_branch;
46647
46648 if (is_cmp (insn))
46649 return disp_cmp;
46650
46651 if (has_immediate (insn))
46652 return disp_imm;
46653
46654 if (is_prefetch (insn))
46655 return disp_prefetch;
46656
46657 return disp_no_group;
46658 }
46659
46660 /* Count number of GROUP restricted instructions in a dispatch
46661 window WINDOW_LIST. */
46662
46663 static int
46664 count_num_restricted (rtx_insn *insn, dispatch_windows *window_list)
46665 {
46666 enum dispatch_group group = get_insn_group (insn);
46667 int imm_size;
46668 int num_imm_operand;
46669 int num_imm32_operand;
46670 int num_imm64_operand;
46671
46672 if (group == disp_no_group)
46673 return 0;
46674
46675 if (group == disp_imm)
46676 {
46677 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46678 &num_imm64_operand);
46679 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
46680 || num_imm_operand + window_list->num_imm > MAX_IMM
46681 || (num_imm32_operand > 0
46682 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
46683 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
46684 || (num_imm64_operand > 0
46685 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
46686 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
46687 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
46688 && num_imm64_operand > 0
46689 && ((window_list->num_imm_64 > 0
46690 && window_list->num_insn >= 2)
46691 || window_list->num_insn >= 3)))
46692 return BIG;
46693
46694 return 1;
46695 }
46696
46697 if ((group == disp_load_store
46698 && (window_list->num_loads >= MAX_LOAD
46699 || window_list->num_stores >= MAX_STORE))
46700 || ((group == disp_load
46701 || group == disp_prefetch)
46702 && window_list->num_loads >= MAX_LOAD)
46703 || (group == disp_store
46704 && window_list->num_stores >= MAX_STORE))
46705 return BIG;
46706
46707 return 1;
46708 }
46709
46710 /* This function returns true if insn satisfies dispatch rules on the
46711 last window scheduled. */
46712
46713 static bool
46714 fits_dispatch_window (rtx_insn *insn)
46715 {
46716 dispatch_windows *window_list = dispatch_window_list;
46717 dispatch_windows *window_list_next = dispatch_window_list->next;
46718 unsigned int num_restrict;
46719 enum dispatch_group group = get_insn_group (insn);
46720 enum insn_path path = get_insn_path (insn);
46721 int sum;
46722
46723 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
46724 instructions should be given the lowest priority in the
46725 scheduling process in Haifa scheduler to make sure they will be
46726 scheduled in the same dispatch window as the reference to them. */
46727 if (group == disp_jcc || group == disp_cmp)
46728 return false;
46729
46730 /* Check nonrestricted. */
46731 if (group == disp_no_group || group == disp_branch)
46732 return true;
46733
46734 /* Get last dispatch window. */
46735 if (window_list_next)
46736 window_list = window_list_next;
46737
46738 if (window_list->window_num == 1)
46739 {
46740 sum = window_list->prev->window_size + window_list->window_size;
46741
46742 if (sum == 32
46743 || (min_insn_size (insn) + sum) >= 48)
46744 /* Window 1 is full. Go for next window. */
46745 return true;
46746 }
46747
46748 num_restrict = count_num_restricted (insn, window_list);
46749
46750 if (num_restrict > num_allowable_groups[group])
46751 return false;
46752
46753 /* See if it fits in the first window. */
46754 if (window_list->window_num == 0)
46755 {
46756 /* The first widow should have only single and double path
46757 uops. */
46758 if (path == path_double
46759 && (window_list->num_uops + 2) > MAX_INSN)
46760 return false;
46761 else if (path != path_single)
46762 return false;
46763 }
46764 return true;
46765 }
46766
46767 /* Add an instruction INSN with NUM_UOPS micro-operations to the
46768 dispatch window WINDOW_LIST. */
46769
46770 static void
46771 add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops)
46772 {
46773 int byte_len = min_insn_size (insn);
46774 int num_insn = window_list->num_insn;
46775 int imm_size;
46776 sched_insn_info *window = window_list->window;
46777 enum dispatch_group group = get_insn_group (insn);
46778 enum insn_path path = get_insn_path (insn);
46779 int num_imm_operand;
46780 int num_imm32_operand;
46781 int num_imm64_operand;
46782
46783 if (!window_list->violation && group != disp_cmp
46784 && !fits_dispatch_window (insn))
46785 window_list->violation = true;
46786
46787 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46788 &num_imm64_operand);
46789
46790 /* Initialize window with new instruction. */
46791 window[num_insn].insn = insn;
46792 window[num_insn].byte_len = byte_len;
46793 window[num_insn].group = group;
46794 window[num_insn].path = path;
46795 window[num_insn].imm_bytes = imm_size;
46796
46797 window_list->window_size += byte_len;
46798 window_list->num_insn = num_insn + 1;
46799 window_list->num_uops = window_list->num_uops + num_uops;
46800 window_list->imm_size += imm_size;
46801 window_list->num_imm += num_imm_operand;
46802 window_list->num_imm_32 += num_imm32_operand;
46803 window_list->num_imm_64 += num_imm64_operand;
46804
46805 if (group == disp_store)
46806 window_list->num_stores += 1;
46807 else if (group == disp_load
46808 || group == disp_prefetch)
46809 window_list->num_loads += 1;
46810 else if (group == disp_load_store)
46811 {
46812 window_list->num_stores += 1;
46813 window_list->num_loads += 1;
46814 }
46815 }
46816
46817 /* Adds a scheduled instruction, INSN, to the current dispatch window.
46818 If the total bytes of instructions or the number of instructions in
46819 the window exceed allowable, it allocates a new window. */
46820
46821 static void
46822 add_to_dispatch_window (rtx_insn *insn)
46823 {
46824 int byte_len;
46825 dispatch_windows *window_list;
46826 dispatch_windows *next_list;
46827 dispatch_windows *window0_list;
46828 enum insn_path path;
46829 enum dispatch_group insn_group;
46830 bool insn_fits;
46831 int num_insn;
46832 int num_uops;
46833 int window_num;
46834 int insn_num_uops;
46835 int sum;
46836
46837 if (INSN_CODE (insn) < 0)
46838 return;
46839
46840 byte_len = min_insn_size (insn);
46841 window_list = dispatch_window_list;
46842 next_list = window_list->next;
46843 path = get_insn_path (insn);
46844 insn_group = get_insn_group (insn);
46845
46846 /* Get the last dispatch window. */
46847 if (next_list)
46848 window_list = dispatch_window_list->next;
46849
46850 if (path == path_single)
46851 insn_num_uops = 1;
46852 else if (path == path_double)
46853 insn_num_uops = 2;
46854 else
46855 insn_num_uops = (int) path;
46856
46857 /* If current window is full, get a new window.
46858 Window number zero is full, if MAX_INSN uops are scheduled in it.
46859 Window number one is full, if window zero's bytes plus window
46860 one's bytes is 32, or if the bytes of the new instruction added
46861 to the total makes it greater than 48, or it has already MAX_INSN
46862 instructions in it. */
46863 num_insn = window_list->num_insn;
46864 num_uops = window_list->num_uops;
46865 window_num = window_list->window_num;
46866 insn_fits = fits_dispatch_window (insn);
46867
46868 if (num_insn >= MAX_INSN
46869 || num_uops + insn_num_uops > MAX_INSN
46870 || !(insn_fits))
46871 {
46872 window_num = ~window_num & 1;
46873 window_list = allocate_next_window (window_num);
46874 }
46875
46876 if (window_num == 0)
46877 {
46878 add_insn_window (insn, window_list, insn_num_uops);
46879 if (window_list->num_insn >= MAX_INSN
46880 && insn_group == disp_branch)
46881 {
46882 process_end_window ();
46883 return;
46884 }
46885 }
46886 else if (window_num == 1)
46887 {
46888 window0_list = window_list->prev;
46889 sum = window0_list->window_size + window_list->window_size;
46890 if (sum == 32
46891 || (byte_len + sum) >= 48)
46892 {
46893 process_end_window ();
46894 window_list = dispatch_window_list;
46895 }
46896
46897 add_insn_window (insn, window_list, insn_num_uops);
46898 }
46899 else
46900 gcc_unreachable ();
46901
46902 if (is_end_basic_block (insn_group))
46903 {
46904 /* End of basic block is reached do end-basic-block process. */
46905 process_end_window ();
46906 return;
46907 }
46908 }
46909
46910 /* Print the dispatch window, WINDOW_NUM, to FILE. */
46911
46912 DEBUG_FUNCTION static void
46913 debug_dispatch_window_file (FILE *file, int window_num)
46914 {
46915 dispatch_windows *list;
46916 int i;
46917
46918 if (window_num == 0)
46919 list = dispatch_window_list;
46920 else
46921 list = dispatch_window_list1;
46922
46923 fprintf (file, "Window #%d:\n", list->window_num);
46924 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
46925 list->num_insn, list->num_uops, list->window_size);
46926 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46927 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
46928
46929 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
46930 list->num_stores);
46931 fprintf (file, " insn info:\n");
46932
46933 for (i = 0; i < MAX_INSN; i++)
46934 {
46935 if (!list->window[i].insn)
46936 break;
46937 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
46938 i, group_name[list->window[i].group],
46939 i, (void *)list->window[i].insn,
46940 i, list->window[i].path,
46941 i, list->window[i].byte_len,
46942 i, list->window[i].imm_bytes);
46943 }
46944 }
46945
46946 /* Print to stdout a dispatch window. */
46947
46948 DEBUG_FUNCTION void
46949 debug_dispatch_window (int window_num)
46950 {
46951 debug_dispatch_window_file (stdout, window_num);
46952 }
46953
46954 /* Print INSN dispatch information to FILE. */
46955
46956 DEBUG_FUNCTION static void
46957 debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn)
46958 {
46959 int byte_len;
46960 enum insn_path path;
46961 enum dispatch_group group;
46962 int imm_size;
46963 int num_imm_operand;
46964 int num_imm32_operand;
46965 int num_imm64_operand;
46966
46967 if (INSN_CODE (insn) < 0)
46968 return;
46969
46970 byte_len = min_insn_size (insn);
46971 path = get_insn_path (insn);
46972 group = get_insn_group (insn);
46973 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46974 &num_imm64_operand);
46975
46976 fprintf (file, " insn info:\n");
46977 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
46978 group_name[group], path, byte_len);
46979 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46980 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
46981 }
46982
46983 /* Print to STDERR the status of the ready list with respect to
46984 dispatch windows. */
46985
46986 DEBUG_FUNCTION void
46987 debug_ready_dispatch (void)
46988 {
46989 int i;
46990 int no_ready = number_in_ready ();
46991
46992 fprintf (stdout, "Number of ready: %d\n", no_ready);
46993
46994 for (i = 0; i < no_ready; i++)
46995 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
46996 }
46997
46998 /* This routine is the driver of the dispatch scheduler. */
46999
47000 static void
47001 do_dispatch (rtx_insn *insn, int mode)
47002 {
47003 if (mode == DISPATCH_INIT)
47004 init_dispatch_sched ();
47005 else if (mode == ADD_TO_DISPATCH_WINDOW)
47006 add_to_dispatch_window (insn);
47007 }
47008
47009 /* Return TRUE if Dispatch Scheduling is supported. */
47010
47011 static bool
47012 has_dispatch (rtx_insn *insn, int action)
47013 {
47014 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
47015 && flag_dispatch_scheduler)
47016 switch (action)
47017 {
47018 default:
47019 return false;
47020
47021 case IS_DISPATCH_ON:
47022 return true;
47023 break;
47024
47025 case IS_CMP:
47026 return is_cmp (insn);
47027
47028 case DISPATCH_VIOLATION:
47029 return dispatch_violation ();
47030
47031 case FITS_DISPATCH_WINDOW:
47032 return fits_dispatch_window (insn);
47033 }
47034
47035 return false;
47036 }
47037
47038 /* Implementation of reassociation_width target hook used by
47039 reassoc phase to identify parallelism level in reassociated
47040 tree. Statements tree_code is passed in OPC. Arguments type
47041 is passed in MODE.
47042
47043 Currently parallel reassociation is enabled for Atom
47044 processors only and we set reassociation width to be 2
47045 because Atom may issue up to 2 instructions per cycle.
47046
47047 Return value should be fixed if parallel reassociation is
47048 enabled for other processors. */
47049
47050 static int
47051 ix86_reassociation_width (unsigned int, enum machine_mode mode)
47052 {
47053 int res = 1;
47054
47055 /* Vector part. */
47056 if (VECTOR_MODE_P (mode))
47057 {
47058 if (TARGET_VECTOR_PARALLEL_EXECUTION)
47059 return 2;
47060 else
47061 return 1;
47062 }
47063
47064 /* Scalar part. */
47065 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
47066 res = 2;
47067 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
47068 res = 2;
47069
47070 return res;
47071 }
47072
47073 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
47074 place emms and femms instructions. */
47075
47076 static enum machine_mode
47077 ix86_preferred_simd_mode (enum machine_mode mode)
47078 {
47079 if (!TARGET_SSE)
47080 return word_mode;
47081
47082 switch (mode)
47083 {
47084 case QImode:
47085 return TARGET_AVX512BW ? V64QImode :
47086 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
47087 case HImode:
47088 return TARGET_AVX512BW ? V32HImode :
47089 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
47090 case SImode:
47091 return TARGET_AVX512F ? V16SImode :
47092 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
47093 case DImode:
47094 return TARGET_AVX512F ? V8DImode :
47095 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
47096
47097 case SFmode:
47098 if (TARGET_AVX512F)
47099 return V16SFmode;
47100 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
47101 return V8SFmode;
47102 else
47103 return V4SFmode;
47104
47105 case DFmode:
47106 if (!TARGET_VECTORIZE_DOUBLE)
47107 return word_mode;
47108 else if (TARGET_AVX512F)
47109 return V8DFmode;
47110 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
47111 return V4DFmode;
47112 else if (TARGET_SSE2)
47113 return V2DFmode;
47114 /* FALLTHRU */
47115
47116 default:
47117 return word_mode;
47118 }
47119 }
47120
47121 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
47122 vectors. If AVX512F is enabled then try vectorizing with 512bit,
47123 256bit and 128bit vectors. */
47124
47125 static unsigned int
47126 ix86_autovectorize_vector_sizes (void)
47127 {
47128 return TARGET_AVX512F ? 64 | 32 | 16 :
47129 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
47130 }
47131
47132 \f
47133
47134 /* Return class of registers which could be used for pseudo of MODE
47135 and of class RCLASS for spilling instead of memory. Return NO_REGS
47136 if it is not possible or non-profitable. */
47137 static reg_class_t
47138 ix86_spill_class (reg_class_t rclass, enum machine_mode mode)
47139 {
47140 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
47141 && (mode == SImode || (TARGET_64BIT && mode == DImode))
47142 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
47143 return ALL_SSE_REGS;
47144 return NO_REGS;
47145 }
47146
47147 /* Implement targetm.vectorize.init_cost. */
47148
47149 static void *
47150 ix86_init_cost (struct loop *)
47151 {
47152 unsigned *cost = XNEWVEC (unsigned, 3);
47153 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
47154 return cost;
47155 }
47156
47157 /* Implement targetm.vectorize.add_stmt_cost. */
47158
47159 static unsigned
47160 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
47161 struct _stmt_vec_info *stmt_info, int misalign,
47162 enum vect_cost_model_location where)
47163 {
47164 unsigned *cost = (unsigned *) data;
47165 unsigned retval = 0;
47166
47167 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
47168 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
47169
47170 /* Statements in an inner loop relative to the loop being
47171 vectorized are weighted more heavily. The value here is
47172 arbitrary and could potentially be improved with analysis. */
47173 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
47174 count *= 50; /* FIXME. */
47175
47176 retval = (unsigned) (count * stmt_cost);
47177
47178 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
47179 for Silvermont as it has out of order integer pipeline and can execute
47180 2 scalar instruction per tick, but has in order SIMD pipeline. */
47181 if (TARGET_SILVERMONT || TARGET_INTEL)
47182 if (stmt_info && stmt_info->stmt)
47183 {
47184 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
47185 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
47186 retval = (retval * 17) / 10;
47187 }
47188
47189 cost[where] += retval;
47190
47191 return retval;
47192 }
47193
47194 /* Implement targetm.vectorize.finish_cost. */
47195
47196 static void
47197 ix86_finish_cost (void *data, unsigned *prologue_cost,
47198 unsigned *body_cost, unsigned *epilogue_cost)
47199 {
47200 unsigned *cost = (unsigned *) data;
47201 *prologue_cost = cost[vect_prologue];
47202 *body_cost = cost[vect_body];
47203 *epilogue_cost = cost[vect_epilogue];
47204 }
47205
47206 /* Implement targetm.vectorize.destroy_cost_data. */
47207
47208 static void
47209 ix86_destroy_cost_data (void *data)
47210 {
47211 free (data);
47212 }
47213
47214 /* Validate target specific memory model bits in VAL. */
47215
47216 static unsigned HOST_WIDE_INT
47217 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
47218 {
47219 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
47220 bool strong;
47221
47222 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
47223 |MEMMODEL_MASK)
47224 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
47225 {
47226 warning (OPT_Winvalid_memory_model,
47227 "Unknown architecture specific memory model");
47228 return MEMMODEL_SEQ_CST;
47229 }
47230 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
47231 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
47232 {
47233 warning (OPT_Winvalid_memory_model,
47234 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
47235 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
47236 }
47237 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
47238 {
47239 warning (OPT_Winvalid_memory_model,
47240 "HLE_RELEASE not used with RELEASE or stronger memory model");
47241 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
47242 }
47243 return val;
47244 }
47245
47246 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
47247 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
47248 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
47249 or number of vecsize_mangle variants that should be emitted. */
47250
47251 static int
47252 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
47253 struct cgraph_simd_clone *clonei,
47254 tree base_type, int num)
47255 {
47256 int ret = 1;
47257
47258 if (clonei->simdlen
47259 && (clonei->simdlen < 2
47260 || clonei->simdlen > 16
47261 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
47262 {
47263 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
47264 "unsupported simdlen %d", clonei->simdlen);
47265 return 0;
47266 }
47267
47268 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
47269 if (TREE_CODE (ret_type) != VOID_TYPE)
47270 switch (TYPE_MODE (ret_type))
47271 {
47272 case QImode:
47273 case HImode:
47274 case SImode:
47275 case DImode:
47276 case SFmode:
47277 case DFmode:
47278 /* case SCmode: */
47279 /* case DCmode: */
47280 break;
47281 default:
47282 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
47283 "unsupported return type %qT for simd\n", ret_type);
47284 return 0;
47285 }
47286
47287 tree t;
47288 int i;
47289
47290 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
47291 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
47292 switch (TYPE_MODE (TREE_TYPE (t)))
47293 {
47294 case QImode:
47295 case HImode:
47296 case SImode:
47297 case DImode:
47298 case SFmode:
47299 case DFmode:
47300 /* case SCmode: */
47301 /* case DCmode: */
47302 break;
47303 default:
47304 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
47305 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
47306 return 0;
47307 }
47308
47309 if (clonei->cilk_elemental)
47310 {
47311 /* Parse here processor clause. If not present, default to 'b'. */
47312 clonei->vecsize_mangle = 'b';
47313 }
47314 else if (!TREE_PUBLIC (node->decl))
47315 {
47316 /* If the function isn't exported, we can pick up just one ISA
47317 for the clones. */
47318 if (TARGET_AVX2)
47319 clonei->vecsize_mangle = 'd';
47320 else if (TARGET_AVX)
47321 clonei->vecsize_mangle = 'c';
47322 else
47323 clonei->vecsize_mangle = 'b';
47324 ret = 1;
47325 }
47326 else
47327 {
47328 clonei->vecsize_mangle = "bcd"[num];
47329 ret = 3;
47330 }
47331 switch (clonei->vecsize_mangle)
47332 {
47333 case 'b':
47334 clonei->vecsize_int = 128;
47335 clonei->vecsize_float = 128;
47336 break;
47337 case 'c':
47338 clonei->vecsize_int = 128;
47339 clonei->vecsize_float = 256;
47340 break;
47341 case 'd':
47342 clonei->vecsize_int = 256;
47343 clonei->vecsize_float = 256;
47344 break;
47345 }
47346 if (clonei->simdlen == 0)
47347 {
47348 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
47349 clonei->simdlen = clonei->vecsize_int;
47350 else
47351 clonei->simdlen = clonei->vecsize_float;
47352 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
47353 if (clonei->simdlen > 16)
47354 clonei->simdlen = 16;
47355 }
47356 return ret;
47357 }
47358
47359 /* Add target attribute to SIMD clone NODE if needed. */
47360
47361 static void
47362 ix86_simd_clone_adjust (struct cgraph_node *node)
47363 {
47364 const char *str = NULL;
47365 gcc_assert (node->decl == cfun->decl);
47366 switch (node->simdclone->vecsize_mangle)
47367 {
47368 case 'b':
47369 if (!TARGET_SSE2)
47370 str = "sse2";
47371 break;
47372 case 'c':
47373 if (!TARGET_AVX)
47374 str = "avx";
47375 break;
47376 case 'd':
47377 if (!TARGET_AVX2)
47378 str = "avx2";
47379 break;
47380 default:
47381 gcc_unreachable ();
47382 }
47383 if (str == NULL)
47384 return;
47385 push_cfun (NULL);
47386 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
47387 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
47388 gcc_assert (ok);
47389 pop_cfun ();
47390 ix86_previous_fndecl = NULL_TREE;
47391 ix86_set_current_function (node->decl);
47392 }
47393
47394 /* If SIMD clone NODE can't be used in a vectorized loop
47395 in current function, return -1, otherwise return a badness of using it
47396 (0 if it is most desirable from vecsize_mangle point of view, 1
47397 slightly less desirable, etc.). */
47398
47399 static int
47400 ix86_simd_clone_usable (struct cgraph_node *node)
47401 {
47402 switch (node->simdclone->vecsize_mangle)
47403 {
47404 case 'b':
47405 if (!TARGET_SSE2)
47406 return -1;
47407 if (!TARGET_AVX)
47408 return 0;
47409 return TARGET_AVX2 ? 2 : 1;
47410 case 'c':
47411 if (!TARGET_AVX)
47412 return -1;
47413 return TARGET_AVX2 ? 1 : 0;
47414 break;
47415 case 'd':
47416 if (!TARGET_AVX2)
47417 return -1;
47418 return 0;
47419 default:
47420 gcc_unreachable ();
47421 }
47422 }
47423
47424 /* This function gives out the number of memory references.
47425 This value determines the unrolling factor for
47426 bdver3 and bdver4 architectures. */
47427
47428 static int
47429 ix86_loop_memcount (rtx *x, unsigned *mem_count)
47430 {
47431 if (*x != NULL_RTX && MEM_P (*x))
47432 {
47433 enum machine_mode mode;
47434 unsigned int n_words;
47435
47436 mode = GET_MODE (*x);
47437 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
47438
47439 if (n_words > 4)
47440 (*mem_count)+=2;
47441 else
47442 (*mem_count)+=1;
47443 }
47444 return 0;
47445 }
47446
47447 /* This function adjusts the unroll factor based on
47448 the hardware capabilities. For ex, bdver3 has
47449 a loop buffer which makes unrolling of smaller
47450 loops less important. This function decides the
47451 unroll factor using number of memory references
47452 (value 32 is used) as a heuristic. */
47453
47454 static unsigned
47455 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
47456 {
47457 basic_block *bbs;
47458 rtx_insn *insn;
47459 unsigned i;
47460 unsigned mem_count = 0;
47461
47462 if (!TARGET_ADJUST_UNROLL)
47463 return nunroll;
47464
47465 /* Count the number of memory references within the loop body. */
47466 bbs = get_loop_body (loop);
47467 for (i = 0; i < loop->num_nodes; i++)
47468 {
47469 for (insn = BB_HEAD (bbs[i]); insn != BB_END (bbs[i]); insn = NEXT_INSN (insn))
47470 if (NONDEBUG_INSN_P (insn))
47471 for_each_rtx_in_insn (&insn, (rtx_function) ix86_loop_memcount,
47472 &mem_count);
47473 }
47474 free (bbs);
47475
47476 if (mem_count && mem_count <=32)
47477 return 32/mem_count;
47478
47479 return nunroll;
47480 }
47481
47482
47483 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
47484
47485 static bool
47486 ix86_float_exceptions_rounding_supported_p (void)
47487 {
47488 /* For x87 floating point with standard excess precision handling,
47489 there is no adddf3 pattern (since x87 floating point only has
47490 XFmode operations) so the default hook implementation gets this
47491 wrong. */
47492 return TARGET_80387 || TARGET_SSE_MATH;
47493 }
47494
47495 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
47496
47497 static void
47498 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
47499 {
47500 if (!TARGET_80387 && !TARGET_SSE_MATH)
47501 return;
47502 tree exceptions_var = create_tmp_var (integer_type_node, NULL);
47503 if (TARGET_80387)
47504 {
47505 tree fenv_index_type = build_index_type (size_int (6));
47506 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
47507 tree fenv_var = create_tmp_var (fenv_type, NULL);
47508 mark_addressable (fenv_var);
47509 tree fenv_ptr = build_pointer_type (fenv_type);
47510 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
47511 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
47512 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
47513 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
47514 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
47515 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
47516 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
47517 tree hold_fnclex = build_call_expr (fnclex, 0);
47518 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
47519 hold_fnclex);
47520 *clear = build_call_expr (fnclex, 0);
47521 tree sw_var = create_tmp_var (short_unsigned_type_node, NULL);
47522 tree fnstsw_call = build_call_expr (fnstsw, 0);
47523 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
47524 sw_var, fnstsw_call);
47525 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
47526 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
47527 exceptions_var, exceptions_x87);
47528 *update = build2 (COMPOUND_EXPR, integer_type_node,
47529 sw_mod, update_mod);
47530 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
47531 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
47532 }
47533 if (TARGET_SSE_MATH)
47534 {
47535 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node, NULL);
47536 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node, NULL);
47537 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
47538 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
47539 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
47540 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
47541 mxcsr_orig_var, stmxcsr_hold_call);
47542 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
47543 mxcsr_orig_var,
47544 build_int_cst (unsigned_type_node, 0x1f80));
47545 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
47546 build_int_cst (unsigned_type_node, 0xffffffc0));
47547 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
47548 mxcsr_mod_var, hold_mod_val);
47549 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
47550 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
47551 hold_assign_orig, hold_assign_mod);
47552 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
47553 ldmxcsr_hold_call);
47554 if (*hold)
47555 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
47556 else
47557 *hold = hold_all;
47558 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
47559 if (*clear)
47560 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
47561 ldmxcsr_clear_call);
47562 else
47563 *clear = ldmxcsr_clear_call;
47564 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
47565 tree exceptions_sse = fold_convert (integer_type_node,
47566 stxmcsr_update_call);
47567 if (*update)
47568 {
47569 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
47570 exceptions_var, exceptions_sse);
47571 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
47572 exceptions_var, exceptions_mod);
47573 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
47574 exceptions_assign);
47575 }
47576 else
47577 *update = build2 (MODIFY_EXPR, integer_type_node,
47578 exceptions_var, exceptions_sse);
47579 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
47580 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
47581 ldmxcsr_update_call);
47582 }
47583 tree atomic_feraiseexcept
47584 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
47585 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
47586 1, exceptions_var);
47587 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
47588 atomic_feraiseexcept_call);
47589 }
47590
47591 /* Initialize the GCC target structure. */
47592 #undef TARGET_RETURN_IN_MEMORY
47593 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
47594
47595 #undef TARGET_LEGITIMIZE_ADDRESS
47596 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
47597
47598 #undef TARGET_ATTRIBUTE_TABLE
47599 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
47600 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
47601 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
47602 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
47603 # undef TARGET_MERGE_DECL_ATTRIBUTES
47604 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
47605 #endif
47606
47607 #undef TARGET_COMP_TYPE_ATTRIBUTES
47608 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
47609
47610 #undef TARGET_INIT_BUILTINS
47611 #define TARGET_INIT_BUILTINS ix86_init_builtins
47612 #undef TARGET_BUILTIN_DECL
47613 #define TARGET_BUILTIN_DECL ix86_builtin_decl
47614 #undef TARGET_EXPAND_BUILTIN
47615 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
47616
47617 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
47618 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
47619 ix86_builtin_vectorized_function
47620
47621 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
47622 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
47623
47624 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
47625 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
47626
47627 #undef TARGET_VECTORIZE_BUILTIN_GATHER
47628 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
47629
47630 #undef TARGET_BUILTIN_RECIPROCAL
47631 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
47632
47633 #undef TARGET_ASM_FUNCTION_EPILOGUE
47634 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
47635
47636 #undef TARGET_ENCODE_SECTION_INFO
47637 #ifndef SUBTARGET_ENCODE_SECTION_INFO
47638 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
47639 #else
47640 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
47641 #endif
47642
47643 #undef TARGET_ASM_OPEN_PAREN
47644 #define TARGET_ASM_OPEN_PAREN ""
47645 #undef TARGET_ASM_CLOSE_PAREN
47646 #define TARGET_ASM_CLOSE_PAREN ""
47647
47648 #undef TARGET_ASM_BYTE_OP
47649 #define TARGET_ASM_BYTE_OP ASM_BYTE
47650
47651 #undef TARGET_ASM_ALIGNED_HI_OP
47652 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
47653 #undef TARGET_ASM_ALIGNED_SI_OP
47654 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
47655 #ifdef ASM_QUAD
47656 #undef TARGET_ASM_ALIGNED_DI_OP
47657 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
47658 #endif
47659
47660 #undef TARGET_PROFILE_BEFORE_PROLOGUE
47661 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
47662
47663 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
47664 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
47665
47666 #undef TARGET_ASM_UNALIGNED_HI_OP
47667 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
47668 #undef TARGET_ASM_UNALIGNED_SI_OP
47669 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
47670 #undef TARGET_ASM_UNALIGNED_DI_OP
47671 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
47672
47673 #undef TARGET_PRINT_OPERAND
47674 #define TARGET_PRINT_OPERAND ix86_print_operand
47675 #undef TARGET_PRINT_OPERAND_ADDRESS
47676 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
47677 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
47678 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
47679 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
47680 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
47681
47682 #undef TARGET_SCHED_INIT_GLOBAL
47683 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
47684 #undef TARGET_SCHED_ADJUST_COST
47685 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
47686 #undef TARGET_SCHED_ISSUE_RATE
47687 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
47688 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
47689 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
47690 ia32_multipass_dfa_lookahead
47691 #undef TARGET_SCHED_MACRO_FUSION_P
47692 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
47693 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
47694 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
47695
47696 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
47697 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
47698
47699 #undef TARGET_MEMMODEL_CHECK
47700 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
47701
47702 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
47703 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
47704
47705 #ifdef HAVE_AS_TLS
47706 #undef TARGET_HAVE_TLS
47707 #define TARGET_HAVE_TLS true
47708 #endif
47709 #undef TARGET_CANNOT_FORCE_CONST_MEM
47710 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
47711 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
47712 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
47713
47714 #undef TARGET_DELEGITIMIZE_ADDRESS
47715 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
47716
47717 #undef TARGET_MS_BITFIELD_LAYOUT_P
47718 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
47719
47720 #if TARGET_MACHO
47721 #undef TARGET_BINDS_LOCAL_P
47722 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
47723 #endif
47724 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
47725 #undef TARGET_BINDS_LOCAL_P
47726 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
47727 #endif
47728
47729 #undef TARGET_ASM_OUTPUT_MI_THUNK
47730 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
47731 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
47732 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
47733
47734 #undef TARGET_ASM_FILE_START
47735 #define TARGET_ASM_FILE_START x86_file_start
47736
47737 #undef TARGET_OPTION_OVERRIDE
47738 #define TARGET_OPTION_OVERRIDE ix86_option_override
47739
47740 #undef TARGET_REGISTER_MOVE_COST
47741 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
47742 #undef TARGET_MEMORY_MOVE_COST
47743 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
47744 #undef TARGET_RTX_COSTS
47745 #define TARGET_RTX_COSTS ix86_rtx_costs
47746 #undef TARGET_ADDRESS_COST
47747 #define TARGET_ADDRESS_COST ix86_address_cost
47748
47749 #undef TARGET_FIXED_CONDITION_CODE_REGS
47750 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
47751 #undef TARGET_CC_MODES_COMPATIBLE
47752 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
47753
47754 #undef TARGET_MACHINE_DEPENDENT_REORG
47755 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
47756
47757 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
47758 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
47759
47760 #undef TARGET_BUILD_BUILTIN_VA_LIST
47761 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
47762
47763 #undef TARGET_FOLD_BUILTIN
47764 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
47765
47766 #undef TARGET_COMPARE_VERSION_PRIORITY
47767 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
47768
47769 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
47770 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
47771 ix86_generate_version_dispatcher_body
47772
47773 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
47774 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
47775 ix86_get_function_versions_dispatcher
47776
47777 #undef TARGET_ENUM_VA_LIST_P
47778 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
47779
47780 #undef TARGET_FN_ABI_VA_LIST
47781 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
47782
47783 #undef TARGET_CANONICAL_VA_LIST_TYPE
47784 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
47785
47786 #undef TARGET_EXPAND_BUILTIN_VA_START
47787 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
47788
47789 #undef TARGET_MD_ASM_CLOBBERS
47790 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
47791
47792 #undef TARGET_PROMOTE_PROTOTYPES
47793 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
47794 #undef TARGET_SETUP_INCOMING_VARARGS
47795 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
47796 #undef TARGET_MUST_PASS_IN_STACK
47797 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
47798 #undef TARGET_FUNCTION_ARG_ADVANCE
47799 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
47800 #undef TARGET_FUNCTION_ARG
47801 #define TARGET_FUNCTION_ARG ix86_function_arg
47802 #undef TARGET_INIT_PIC_REG
47803 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
47804 #undef TARGET_USE_PSEUDO_PIC_REG
47805 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
47806 #undef TARGET_FUNCTION_ARG_BOUNDARY
47807 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
47808 #undef TARGET_PASS_BY_REFERENCE
47809 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
47810 #undef TARGET_INTERNAL_ARG_POINTER
47811 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
47812 #undef TARGET_UPDATE_STACK_BOUNDARY
47813 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
47814 #undef TARGET_GET_DRAP_RTX
47815 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
47816 #undef TARGET_STRICT_ARGUMENT_NAMING
47817 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
47818 #undef TARGET_STATIC_CHAIN
47819 #define TARGET_STATIC_CHAIN ix86_static_chain
47820 #undef TARGET_TRAMPOLINE_INIT
47821 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
47822 #undef TARGET_RETURN_POPS_ARGS
47823 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
47824
47825 #undef TARGET_LEGITIMATE_COMBINED_INSN
47826 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
47827
47828 #undef TARGET_ASAN_SHADOW_OFFSET
47829 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
47830
47831 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
47832 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
47833
47834 #undef TARGET_SCALAR_MODE_SUPPORTED_P
47835 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
47836
47837 #undef TARGET_VECTOR_MODE_SUPPORTED_P
47838 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
47839
47840 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
47841 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
47842 ix86_libgcc_floating_mode_supported_p
47843
47844 #undef TARGET_C_MODE_FOR_SUFFIX
47845 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
47846
47847 #ifdef HAVE_AS_TLS
47848 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
47849 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
47850 #endif
47851
47852 #ifdef SUBTARGET_INSERT_ATTRIBUTES
47853 #undef TARGET_INSERT_ATTRIBUTES
47854 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
47855 #endif
47856
47857 #undef TARGET_MANGLE_TYPE
47858 #define TARGET_MANGLE_TYPE ix86_mangle_type
47859
47860 #if !TARGET_MACHO
47861 #undef TARGET_STACK_PROTECT_FAIL
47862 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
47863 #endif
47864
47865 #undef TARGET_FUNCTION_VALUE
47866 #define TARGET_FUNCTION_VALUE ix86_function_value
47867
47868 #undef TARGET_FUNCTION_VALUE_REGNO_P
47869 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
47870
47871 #undef TARGET_PROMOTE_FUNCTION_MODE
47872 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
47873
47874 #undef TARGET_MEMBER_TYPE_FORCES_BLK
47875 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
47876
47877 #undef TARGET_INSTANTIATE_DECLS
47878 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
47879
47880 #undef TARGET_SECONDARY_RELOAD
47881 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
47882
47883 #undef TARGET_CLASS_MAX_NREGS
47884 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
47885
47886 #undef TARGET_PREFERRED_RELOAD_CLASS
47887 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
47888 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
47889 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
47890 #undef TARGET_CLASS_LIKELY_SPILLED_P
47891 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
47892
47893 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
47894 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
47895 ix86_builtin_vectorization_cost
47896 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
47897 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
47898 ix86_vectorize_vec_perm_const_ok
47899 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
47900 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
47901 ix86_preferred_simd_mode
47902 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
47903 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
47904 ix86_autovectorize_vector_sizes
47905 #undef TARGET_VECTORIZE_INIT_COST
47906 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
47907 #undef TARGET_VECTORIZE_ADD_STMT_COST
47908 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
47909 #undef TARGET_VECTORIZE_FINISH_COST
47910 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
47911 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
47912 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
47913
47914 #undef TARGET_SET_CURRENT_FUNCTION
47915 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
47916
47917 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
47918 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
47919
47920 #undef TARGET_OPTION_SAVE
47921 #define TARGET_OPTION_SAVE ix86_function_specific_save
47922
47923 #undef TARGET_OPTION_RESTORE
47924 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
47925
47926 #undef TARGET_OPTION_PRINT
47927 #define TARGET_OPTION_PRINT ix86_function_specific_print
47928
47929 #undef TARGET_OPTION_FUNCTION_VERSIONS
47930 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
47931
47932 #undef TARGET_CAN_INLINE_P
47933 #define TARGET_CAN_INLINE_P ix86_can_inline_p
47934
47935 #undef TARGET_EXPAND_TO_RTL_HOOK
47936 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
47937
47938 #undef TARGET_LEGITIMATE_ADDRESS_P
47939 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
47940
47941 #undef TARGET_LRA_P
47942 #define TARGET_LRA_P hook_bool_void_true
47943
47944 #undef TARGET_REGISTER_PRIORITY
47945 #define TARGET_REGISTER_PRIORITY ix86_register_priority
47946
47947 #undef TARGET_REGISTER_USAGE_LEVELING_P
47948 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
47949
47950 #undef TARGET_LEGITIMATE_CONSTANT_P
47951 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
47952
47953 #undef TARGET_FRAME_POINTER_REQUIRED
47954 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
47955
47956 #undef TARGET_CAN_ELIMINATE
47957 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
47958
47959 #undef TARGET_EXTRA_LIVE_ON_ENTRY
47960 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
47961
47962 #undef TARGET_ASM_CODE_END
47963 #define TARGET_ASM_CODE_END ix86_code_end
47964
47965 #undef TARGET_CONDITIONAL_REGISTER_USAGE
47966 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
47967
47968 #if TARGET_MACHO
47969 #undef TARGET_INIT_LIBFUNCS
47970 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
47971 #endif
47972
47973 #undef TARGET_LOOP_UNROLL_ADJUST
47974 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
47975
47976 #undef TARGET_SPILL_CLASS
47977 #define TARGET_SPILL_CLASS ix86_spill_class
47978
47979 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
47980 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
47981 ix86_simd_clone_compute_vecsize_and_simdlen
47982
47983 #undef TARGET_SIMD_CLONE_ADJUST
47984 #define TARGET_SIMD_CLONE_ADJUST \
47985 ix86_simd_clone_adjust
47986
47987 #undef TARGET_SIMD_CLONE_USABLE
47988 #define TARGET_SIMD_CLONE_USABLE \
47989 ix86_simd_clone_usable
47990
47991 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
47992 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
47993 ix86_float_exceptions_rounding_supported_p
47994
47995 #undef TARGET_MODE_EMIT
47996 #define TARGET_MODE_EMIT ix86_emit_mode_set
47997
47998 #undef TARGET_MODE_NEEDED
47999 #define TARGET_MODE_NEEDED ix86_mode_needed
48000
48001 #undef TARGET_MODE_AFTER
48002 #define TARGET_MODE_AFTER ix86_mode_after
48003
48004 #undef TARGET_MODE_ENTRY
48005 #define TARGET_MODE_ENTRY ix86_mode_entry
48006
48007 #undef TARGET_MODE_EXIT
48008 #define TARGET_MODE_EXIT ix86_mode_exit
48009
48010 #undef TARGET_MODE_PRIORITY
48011 #define TARGET_MODE_PRIORITY ix86_mode_priority
48012
48013 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
48014 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
48015
48016 struct gcc_target targetm = TARGET_INITIALIZER;
48017 \f
48018 #include "gt-i386.h"