tm.texi (LEGITIMIZE_ADDRESS): Revise documentation.
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "c-common.h"
39 #include "except.h"
40 #include "function.h"
41 #include "recog.h"
42 #include "expr.h"
43 #include "optabs.h"
44 #include "toplev.h"
45 #include "basic-block.h"
46 #include "ggc.h"
47 #include "target.h"
48 #include "target-def.h"
49 #include "langhooks.h"
50 #include "cgraph.h"
51 #include "gimple.h"
52 #include "dwarf2.h"
53 #include "df.h"
54 #include "tm-constrs.h"
55 #include "params.h"
56 #include "cselib.h"
57
58 static int x86_builtin_vectorization_cost (bool);
59 static rtx legitimize_dllimport_symbol (rtx, bool);
60
61 #ifndef CHECK_STACK_LIMIT
62 #define CHECK_STACK_LIMIT (-1)
63 #endif
64
65 /* Return index of given mode in mult and division cost tables. */
66 #define MODE_INDEX(mode) \
67 ((mode) == QImode ? 0 \
68 : (mode) == HImode ? 1 \
69 : (mode) == SImode ? 2 \
70 : (mode) == DImode ? 3 \
71 : 4)
72
73 /* Processor costs (relative to an add) */
74 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
75 #define COSTS_N_BYTES(N) ((N) * 2)
76
77 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
78
79 const
80 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
81 COSTS_N_BYTES (2), /* cost of an add instruction */
82 COSTS_N_BYTES (3), /* cost of a lea instruction */
83 COSTS_N_BYTES (2), /* variable shift costs */
84 COSTS_N_BYTES (3), /* constant shift costs */
85 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 0, /* cost of multiply per each bit set */
91 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
92 COSTS_N_BYTES (3), /* HI */
93 COSTS_N_BYTES (3), /* SI */
94 COSTS_N_BYTES (3), /* DI */
95 COSTS_N_BYTES (5)}, /* other */
96 COSTS_N_BYTES (3), /* cost of movsx */
97 COSTS_N_BYTES (3), /* cost of movzx */
98 0, /* "large" insn */
99 2, /* MOVE_RATIO */
100 2, /* cost for loading QImode using movzbl */
101 {2, 2, 2}, /* cost of loading integer registers
102 in QImode, HImode and SImode.
103 Relative to reg-reg move (2). */
104 {2, 2, 2}, /* cost of storing integer registers */
105 2, /* cost of reg,reg fld/fst */
106 {2, 2, 2}, /* cost of loading fp registers
107 in SFmode, DFmode and XFmode */
108 {2, 2, 2}, /* cost of storing fp registers
109 in SFmode, DFmode and XFmode */
110 3, /* cost of moving MMX register */
111 {3, 3}, /* cost of loading MMX registers
112 in SImode and DImode */
113 {3, 3}, /* cost of storing MMX registers
114 in SImode and DImode */
115 3, /* cost of moving SSE register */
116 {3, 3, 3}, /* cost of loading SSE registers
117 in SImode, DImode and TImode */
118 {3, 3, 3}, /* cost of storing SSE registers
119 in SImode, DImode and TImode */
120 3, /* MMX or SSE register to integer */
121 0, /* size of l1 cache */
122 0, /* size of l2 cache */
123 0, /* size of prefetch block */
124 0, /* number of parallel prefetches */
125 2, /* Branch cost */
126 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
127 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
128 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
129 COSTS_N_BYTES (2), /* cost of FABS instruction. */
130 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
131 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
132 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
134 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
135 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
136 1, /* scalar_stmt_cost. */
137 1, /* scalar load_cost. */
138 1, /* scalar_store_cost. */
139 1, /* vec_stmt_cost. */
140 1, /* vec_to_scalar_cost. */
141 1, /* scalar_to_vec_cost. */
142 1, /* vec_align_load_cost. */
143 1, /* vec_unalign_load_cost. */
144 1, /* vec_store_cost. */
145 1, /* cond_taken_branch_cost. */
146 1, /* cond_not_taken_branch_cost. */
147 };
148
149 /* Processor costs (relative to an add) */
150 static const
151 struct processor_costs i386_cost = { /* 386 specific costs */
152 COSTS_N_INSNS (1), /* cost of an add instruction */
153 COSTS_N_INSNS (1), /* cost of a lea instruction */
154 COSTS_N_INSNS (3), /* variable shift costs */
155 COSTS_N_INSNS (2), /* constant shift costs */
156 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
157 COSTS_N_INSNS (6), /* HI */
158 COSTS_N_INSNS (6), /* SI */
159 COSTS_N_INSNS (6), /* DI */
160 COSTS_N_INSNS (6)}, /* other */
161 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
162 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
163 COSTS_N_INSNS (23), /* HI */
164 COSTS_N_INSNS (23), /* SI */
165 COSTS_N_INSNS (23), /* DI */
166 COSTS_N_INSNS (23)}, /* other */
167 COSTS_N_INSNS (3), /* cost of movsx */
168 COSTS_N_INSNS (2), /* cost of movzx */
169 15, /* "large" insn */
170 3, /* MOVE_RATIO */
171 4, /* cost for loading QImode using movzbl */
172 {2, 4, 2}, /* cost of loading integer registers
173 in QImode, HImode and SImode.
174 Relative to reg-reg move (2). */
175 {2, 4, 2}, /* cost of storing integer registers */
176 2, /* cost of reg,reg fld/fst */
177 {8, 8, 8}, /* cost of loading fp registers
178 in SFmode, DFmode and XFmode */
179 {8, 8, 8}, /* cost of storing fp registers
180 in SFmode, DFmode and XFmode */
181 2, /* cost of moving MMX register */
182 {4, 8}, /* cost of loading MMX registers
183 in SImode and DImode */
184 {4, 8}, /* cost of storing MMX registers
185 in SImode and DImode */
186 2, /* cost of moving SSE register */
187 {4, 8, 16}, /* cost of loading SSE registers
188 in SImode, DImode and TImode */
189 {4, 8, 16}, /* cost of storing SSE registers
190 in SImode, DImode and TImode */
191 3, /* MMX or SSE register to integer */
192 0, /* size of l1 cache */
193 0, /* size of l2 cache */
194 0, /* size of prefetch block */
195 0, /* number of parallel prefetches */
196 1, /* Branch cost */
197 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
198 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
199 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
200 COSTS_N_INSNS (22), /* cost of FABS instruction. */
201 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
202 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
203 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
204 DUMMY_STRINGOP_ALGS},
205 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
206 DUMMY_STRINGOP_ALGS},
207 1, /* scalar_stmt_cost. */
208 1, /* scalar load_cost. */
209 1, /* scalar_store_cost. */
210 1, /* vec_stmt_cost. */
211 1, /* vec_to_scalar_cost. */
212 1, /* scalar_to_vec_cost. */
213 1, /* vec_align_load_cost. */
214 2, /* vec_unalign_load_cost. */
215 1, /* vec_store_cost. */
216 3, /* cond_taken_branch_cost. */
217 1, /* cond_not_taken_branch_cost. */
218 };
219
220 static const
221 struct processor_costs i486_cost = { /* 486 specific costs */
222 COSTS_N_INSNS (1), /* cost of an add instruction */
223 COSTS_N_INSNS (1), /* cost of a lea instruction */
224 COSTS_N_INSNS (3), /* variable shift costs */
225 COSTS_N_INSNS (2), /* constant shift costs */
226 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
227 COSTS_N_INSNS (12), /* HI */
228 COSTS_N_INSNS (12), /* SI */
229 COSTS_N_INSNS (12), /* DI */
230 COSTS_N_INSNS (12)}, /* other */
231 1, /* cost of multiply per each bit set */
232 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
233 COSTS_N_INSNS (40), /* HI */
234 COSTS_N_INSNS (40), /* SI */
235 COSTS_N_INSNS (40), /* DI */
236 COSTS_N_INSNS (40)}, /* other */
237 COSTS_N_INSNS (3), /* cost of movsx */
238 COSTS_N_INSNS (2), /* cost of movzx */
239 15, /* "large" insn */
240 3, /* MOVE_RATIO */
241 4, /* cost for loading QImode using movzbl */
242 {2, 4, 2}, /* cost of loading integer registers
243 in QImode, HImode and SImode.
244 Relative to reg-reg move (2). */
245 {2, 4, 2}, /* cost of storing integer registers */
246 2, /* cost of reg,reg fld/fst */
247 {8, 8, 8}, /* cost of loading fp registers
248 in SFmode, DFmode and XFmode */
249 {8, 8, 8}, /* cost of storing fp registers
250 in SFmode, DFmode and XFmode */
251 2, /* cost of moving MMX register */
252 {4, 8}, /* cost of loading MMX registers
253 in SImode and DImode */
254 {4, 8}, /* cost of storing MMX registers
255 in SImode and DImode */
256 2, /* cost of moving SSE register */
257 {4, 8, 16}, /* cost of loading SSE registers
258 in SImode, DImode and TImode */
259 {4, 8, 16}, /* cost of storing SSE registers
260 in SImode, DImode and TImode */
261 3, /* MMX or SSE register to integer */
262 4, /* size of l1 cache. 486 has 8kB cache
263 shared for code and data, so 4kB is
264 not really precise. */
265 4, /* size of l2 cache */
266 0, /* size of prefetch block */
267 0, /* number of parallel prefetches */
268 1, /* Branch cost */
269 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
270 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
271 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
272 COSTS_N_INSNS (3), /* cost of FABS instruction. */
273 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
274 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
275 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
276 DUMMY_STRINGOP_ALGS},
277 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
278 DUMMY_STRINGOP_ALGS},
279 1, /* scalar_stmt_cost. */
280 1, /* scalar load_cost. */
281 1, /* scalar_store_cost. */
282 1, /* vec_stmt_cost. */
283 1, /* vec_to_scalar_cost. */
284 1, /* scalar_to_vec_cost. */
285 1, /* vec_align_load_cost. */
286 2, /* vec_unalign_load_cost. */
287 1, /* vec_store_cost. */
288 3, /* cond_taken_branch_cost. */
289 1, /* cond_not_taken_branch_cost. */
290 };
291
292 static const
293 struct processor_costs pentium_cost = {
294 COSTS_N_INSNS (1), /* cost of an add instruction */
295 COSTS_N_INSNS (1), /* cost of a lea instruction */
296 COSTS_N_INSNS (4), /* variable shift costs */
297 COSTS_N_INSNS (1), /* constant shift costs */
298 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
299 COSTS_N_INSNS (11), /* HI */
300 COSTS_N_INSNS (11), /* SI */
301 COSTS_N_INSNS (11), /* DI */
302 COSTS_N_INSNS (11)}, /* other */
303 0, /* cost of multiply per each bit set */
304 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
305 COSTS_N_INSNS (25), /* HI */
306 COSTS_N_INSNS (25), /* SI */
307 COSTS_N_INSNS (25), /* DI */
308 COSTS_N_INSNS (25)}, /* other */
309 COSTS_N_INSNS (3), /* cost of movsx */
310 COSTS_N_INSNS (2), /* cost of movzx */
311 8, /* "large" insn */
312 6, /* MOVE_RATIO */
313 6, /* cost for loading QImode using movzbl */
314 {2, 4, 2}, /* cost of loading integer registers
315 in QImode, HImode and SImode.
316 Relative to reg-reg move (2). */
317 {2, 4, 2}, /* cost of storing integer registers */
318 2, /* cost of reg,reg fld/fst */
319 {2, 2, 6}, /* cost of loading fp registers
320 in SFmode, DFmode and XFmode */
321 {4, 4, 6}, /* cost of storing fp registers
322 in SFmode, DFmode and XFmode */
323 8, /* cost of moving MMX register */
324 {8, 8}, /* cost of loading MMX registers
325 in SImode and DImode */
326 {8, 8}, /* cost of storing MMX registers
327 in SImode and DImode */
328 2, /* cost of moving SSE register */
329 {4, 8, 16}, /* cost of loading SSE registers
330 in SImode, DImode and TImode */
331 {4, 8, 16}, /* cost of storing SSE registers
332 in SImode, DImode and TImode */
333 3, /* MMX or SSE register to integer */
334 8, /* size of l1 cache. */
335 8, /* size of l2 cache */
336 0, /* size of prefetch block */
337 0, /* number of parallel prefetches */
338 2, /* Branch cost */
339 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
340 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
341 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
342 COSTS_N_INSNS (1), /* cost of FABS instruction. */
343 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
344 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
345 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
346 DUMMY_STRINGOP_ALGS},
347 {{libcall, {{-1, rep_prefix_4_byte}}},
348 DUMMY_STRINGOP_ALGS},
349 1, /* scalar_stmt_cost. */
350 1, /* scalar load_cost. */
351 1, /* scalar_store_cost. */
352 1, /* vec_stmt_cost. */
353 1, /* vec_to_scalar_cost. */
354 1, /* scalar_to_vec_cost. */
355 1, /* vec_align_load_cost. */
356 2, /* vec_unalign_load_cost. */
357 1, /* vec_store_cost. */
358 3, /* cond_taken_branch_cost. */
359 1, /* cond_not_taken_branch_cost. */
360 };
361
362 static const
363 struct processor_costs pentiumpro_cost = {
364 COSTS_N_INSNS (1), /* cost of an add instruction */
365 COSTS_N_INSNS (1), /* cost of a lea instruction */
366 COSTS_N_INSNS (1), /* variable shift costs */
367 COSTS_N_INSNS (1), /* constant shift costs */
368 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
369 COSTS_N_INSNS (4), /* HI */
370 COSTS_N_INSNS (4), /* SI */
371 COSTS_N_INSNS (4), /* DI */
372 COSTS_N_INSNS (4)}, /* other */
373 0, /* cost of multiply per each bit set */
374 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
375 COSTS_N_INSNS (17), /* HI */
376 COSTS_N_INSNS (17), /* SI */
377 COSTS_N_INSNS (17), /* DI */
378 COSTS_N_INSNS (17)}, /* other */
379 COSTS_N_INSNS (1), /* cost of movsx */
380 COSTS_N_INSNS (1), /* cost of movzx */
381 8, /* "large" insn */
382 6, /* MOVE_RATIO */
383 2, /* cost for loading QImode using movzbl */
384 {4, 4, 4}, /* cost of loading integer registers
385 in QImode, HImode and SImode.
386 Relative to reg-reg move (2). */
387 {2, 2, 2}, /* cost of storing integer registers */
388 2, /* cost of reg,reg fld/fst */
389 {2, 2, 6}, /* cost of loading fp registers
390 in SFmode, DFmode and XFmode */
391 {4, 4, 6}, /* cost of storing fp registers
392 in SFmode, DFmode and XFmode */
393 2, /* cost of moving MMX register */
394 {2, 2}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {2, 2}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {2, 2, 8}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {2, 2, 8}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 3, /* MMX or SSE register to integer */
404 8, /* size of l1 cache. */
405 256, /* size of l2 cache */
406 32, /* size of prefetch block */
407 6, /* number of parallel prefetches */
408 2, /* Branch cost */
409 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
410 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
411 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
412 COSTS_N_INSNS (2), /* cost of FABS instruction. */
413 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
414 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
415 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
416 the alignment). For small blocks inline loop is still a noticeable win, for bigger
417 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
418 more expensive startup time in CPU, but after 4K the difference is down in the noise.
419 */
420 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
421 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
422 DUMMY_STRINGOP_ALGS},
423 {{rep_prefix_4_byte, {{1024, unrolled_loop},
424 {8192, rep_prefix_4_byte}, {-1, libcall}}},
425 DUMMY_STRINGOP_ALGS},
426 1, /* scalar_stmt_cost. */
427 1, /* scalar load_cost. */
428 1, /* scalar_store_cost. */
429 1, /* vec_stmt_cost. */
430 1, /* vec_to_scalar_cost. */
431 1, /* scalar_to_vec_cost. */
432 1, /* vec_align_load_cost. */
433 2, /* vec_unalign_load_cost. */
434 1, /* vec_store_cost. */
435 3, /* cond_taken_branch_cost. */
436 1, /* cond_not_taken_branch_cost. */
437 };
438
439 static const
440 struct processor_costs geode_cost = {
441 COSTS_N_INSNS (1), /* cost of an add instruction */
442 COSTS_N_INSNS (1), /* cost of a lea instruction */
443 COSTS_N_INSNS (2), /* variable shift costs */
444 COSTS_N_INSNS (1), /* constant shift costs */
445 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
446 COSTS_N_INSNS (4), /* HI */
447 COSTS_N_INSNS (7), /* SI */
448 COSTS_N_INSNS (7), /* DI */
449 COSTS_N_INSNS (7)}, /* other */
450 0, /* cost of multiply per each bit set */
451 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
452 COSTS_N_INSNS (23), /* HI */
453 COSTS_N_INSNS (39), /* SI */
454 COSTS_N_INSNS (39), /* DI */
455 COSTS_N_INSNS (39)}, /* other */
456 COSTS_N_INSNS (1), /* cost of movsx */
457 COSTS_N_INSNS (1), /* cost of movzx */
458 8, /* "large" insn */
459 4, /* MOVE_RATIO */
460 1, /* cost for loading QImode using movzbl */
461 {1, 1, 1}, /* cost of loading integer registers
462 in QImode, HImode and SImode.
463 Relative to reg-reg move (2). */
464 {1, 1, 1}, /* cost of storing integer registers */
465 1, /* cost of reg,reg fld/fst */
466 {1, 1, 1}, /* cost of loading fp registers
467 in SFmode, DFmode and XFmode */
468 {4, 6, 6}, /* cost of storing fp registers
469 in SFmode, DFmode and XFmode */
470
471 1, /* cost of moving MMX register */
472 {1, 1}, /* cost of loading MMX registers
473 in SImode and DImode */
474 {1, 1}, /* cost of storing MMX registers
475 in SImode and DImode */
476 1, /* cost of moving SSE register */
477 {1, 1, 1}, /* cost of loading SSE registers
478 in SImode, DImode and TImode */
479 {1, 1, 1}, /* cost of storing SSE registers
480 in SImode, DImode and TImode */
481 1, /* MMX or SSE register to integer */
482 64, /* size of l1 cache. */
483 128, /* size of l2 cache. */
484 32, /* size of prefetch block */
485 1, /* number of parallel prefetches */
486 1, /* Branch cost */
487 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (1), /* cost of FABS instruction. */
491 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
493 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
494 DUMMY_STRINGOP_ALGS},
495 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
496 DUMMY_STRINGOP_ALGS},
497 1, /* scalar_stmt_cost. */
498 1, /* scalar load_cost. */
499 1, /* scalar_store_cost. */
500 1, /* vec_stmt_cost. */
501 1, /* vec_to_scalar_cost. */
502 1, /* scalar_to_vec_cost. */
503 1, /* vec_align_load_cost. */
504 2, /* vec_unalign_load_cost. */
505 1, /* vec_store_cost. */
506 3, /* cond_taken_branch_cost. */
507 1, /* cond_not_taken_branch_cost. */
508 };
509
510 static const
511 struct processor_costs k6_cost = {
512 COSTS_N_INSNS (1), /* cost of an add instruction */
513 COSTS_N_INSNS (2), /* cost of a lea instruction */
514 COSTS_N_INSNS (1), /* variable shift costs */
515 COSTS_N_INSNS (1), /* constant shift costs */
516 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
517 COSTS_N_INSNS (3), /* HI */
518 COSTS_N_INSNS (3), /* SI */
519 COSTS_N_INSNS (3), /* DI */
520 COSTS_N_INSNS (3)}, /* other */
521 0, /* cost of multiply per each bit set */
522 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
523 COSTS_N_INSNS (18), /* HI */
524 COSTS_N_INSNS (18), /* SI */
525 COSTS_N_INSNS (18), /* DI */
526 COSTS_N_INSNS (18)}, /* other */
527 COSTS_N_INSNS (2), /* cost of movsx */
528 COSTS_N_INSNS (2), /* cost of movzx */
529 8, /* "large" insn */
530 4, /* MOVE_RATIO */
531 3, /* cost for loading QImode using movzbl */
532 {4, 5, 4}, /* cost of loading integer registers
533 in QImode, HImode and SImode.
534 Relative to reg-reg move (2). */
535 {2, 3, 2}, /* cost of storing integer registers */
536 4, /* cost of reg,reg fld/fst */
537 {6, 6, 6}, /* cost of loading fp registers
538 in SFmode, DFmode and XFmode */
539 {4, 4, 4}, /* cost of storing fp registers
540 in SFmode, DFmode and XFmode */
541 2, /* cost of moving MMX register */
542 {2, 2}, /* cost of loading MMX registers
543 in SImode and DImode */
544 {2, 2}, /* cost of storing MMX registers
545 in SImode and DImode */
546 2, /* cost of moving SSE register */
547 {2, 2, 8}, /* cost of loading SSE registers
548 in SImode, DImode and TImode */
549 {2, 2, 8}, /* cost of storing SSE registers
550 in SImode, DImode and TImode */
551 6, /* MMX or SSE register to integer */
552 32, /* size of l1 cache. */
553 32, /* size of l2 cache. Some models
554 have integrated l2 cache, but
555 optimizing for k6 is not important
556 enough to worry about that. */
557 32, /* size of prefetch block */
558 1, /* number of parallel prefetches */
559 1, /* Branch cost */
560 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
561 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
562 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
563 COSTS_N_INSNS (2), /* cost of FABS instruction. */
564 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
565 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
566 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
567 DUMMY_STRINGOP_ALGS},
568 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
569 DUMMY_STRINGOP_ALGS},
570 1, /* scalar_stmt_cost. */
571 1, /* scalar load_cost. */
572 1, /* scalar_store_cost. */
573 1, /* vec_stmt_cost. */
574 1, /* vec_to_scalar_cost. */
575 1, /* scalar_to_vec_cost. */
576 1, /* vec_align_load_cost. */
577 2, /* vec_unalign_load_cost. */
578 1, /* vec_store_cost. */
579 3, /* cond_taken_branch_cost. */
580 1, /* cond_not_taken_branch_cost. */
581 };
582
583 static const
584 struct processor_costs athlon_cost = {
585 COSTS_N_INSNS (1), /* cost of an add instruction */
586 COSTS_N_INSNS (2), /* cost of a lea instruction */
587 COSTS_N_INSNS (1), /* variable shift costs */
588 COSTS_N_INSNS (1), /* constant shift costs */
589 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
590 COSTS_N_INSNS (5), /* HI */
591 COSTS_N_INSNS (5), /* SI */
592 COSTS_N_INSNS (5), /* DI */
593 COSTS_N_INSNS (5)}, /* other */
594 0, /* cost of multiply per each bit set */
595 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
596 COSTS_N_INSNS (26), /* HI */
597 COSTS_N_INSNS (42), /* SI */
598 COSTS_N_INSNS (74), /* DI */
599 COSTS_N_INSNS (74)}, /* other */
600 COSTS_N_INSNS (1), /* cost of movsx */
601 COSTS_N_INSNS (1), /* cost of movzx */
602 8, /* "large" insn */
603 9, /* MOVE_RATIO */
604 4, /* cost for loading QImode using movzbl */
605 {3, 4, 3}, /* cost of loading integer registers
606 in QImode, HImode and SImode.
607 Relative to reg-reg move (2). */
608 {3, 4, 3}, /* cost of storing integer registers */
609 4, /* cost of reg,reg fld/fst */
610 {4, 4, 12}, /* cost of loading fp registers
611 in SFmode, DFmode and XFmode */
612 {6, 6, 8}, /* cost of storing fp registers
613 in SFmode, DFmode and XFmode */
614 2, /* cost of moving MMX register */
615 {4, 4}, /* cost of loading MMX registers
616 in SImode and DImode */
617 {4, 4}, /* cost of storing MMX registers
618 in SImode and DImode */
619 2, /* cost of moving SSE register */
620 {4, 4, 6}, /* cost of loading SSE registers
621 in SImode, DImode and TImode */
622 {4, 4, 5}, /* cost of storing SSE registers
623 in SImode, DImode and TImode */
624 5, /* MMX or SSE register to integer */
625 64, /* size of l1 cache. */
626 256, /* size of l2 cache. */
627 64, /* size of prefetch block */
628 6, /* number of parallel prefetches */
629 5, /* Branch cost */
630 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
631 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
632 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
633 COSTS_N_INSNS (2), /* cost of FABS instruction. */
634 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
635 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
636 /* For some reason, Athlon deals better with REP prefix (relative to loops)
637 compared to K8. Alignment becomes important after 8 bytes for memcpy and
638 128 bytes for memset. */
639 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
640 DUMMY_STRINGOP_ALGS},
641 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
642 DUMMY_STRINGOP_ALGS},
643 1, /* scalar_stmt_cost. */
644 1, /* scalar load_cost. */
645 1, /* scalar_store_cost. */
646 1, /* vec_stmt_cost. */
647 1, /* vec_to_scalar_cost. */
648 1, /* scalar_to_vec_cost. */
649 1, /* vec_align_load_cost. */
650 2, /* vec_unalign_load_cost. */
651 1, /* vec_store_cost. */
652 3, /* cond_taken_branch_cost. */
653 1, /* cond_not_taken_branch_cost. */
654 };
655
656 static const
657 struct processor_costs k8_cost = {
658 COSTS_N_INSNS (1), /* cost of an add instruction */
659 COSTS_N_INSNS (2), /* cost of a lea instruction */
660 COSTS_N_INSNS (1), /* variable shift costs */
661 COSTS_N_INSNS (1), /* constant shift costs */
662 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
663 COSTS_N_INSNS (4), /* HI */
664 COSTS_N_INSNS (3), /* SI */
665 COSTS_N_INSNS (4), /* DI */
666 COSTS_N_INSNS (5)}, /* other */
667 0, /* cost of multiply per each bit set */
668 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
669 COSTS_N_INSNS (26), /* HI */
670 COSTS_N_INSNS (42), /* SI */
671 COSTS_N_INSNS (74), /* DI */
672 COSTS_N_INSNS (74)}, /* other */
673 COSTS_N_INSNS (1), /* cost of movsx */
674 COSTS_N_INSNS (1), /* cost of movzx */
675 8, /* "large" insn */
676 9, /* MOVE_RATIO */
677 4, /* cost for loading QImode using movzbl */
678 {3, 4, 3}, /* cost of loading integer registers
679 in QImode, HImode and SImode.
680 Relative to reg-reg move (2). */
681 {3, 4, 3}, /* cost of storing integer registers */
682 4, /* cost of reg,reg fld/fst */
683 {4, 4, 12}, /* cost of loading fp registers
684 in SFmode, DFmode and XFmode */
685 {6, 6, 8}, /* cost of storing fp registers
686 in SFmode, DFmode and XFmode */
687 2, /* cost of moving MMX register */
688 {3, 3}, /* cost of loading MMX registers
689 in SImode and DImode */
690 {4, 4}, /* cost of storing MMX registers
691 in SImode and DImode */
692 2, /* cost of moving SSE register */
693 {4, 3, 6}, /* cost of loading SSE registers
694 in SImode, DImode and TImode */
695 {4, 4, 5}, /* cost of storing SSE registers
696 in SImode, DImode and TImode */
697 5, /* MMX or SSE register to integer */
698 64, /* size of l1 cache. */
699 512, /* size of l2 cache. */
700 64, /* size of prefetch block */
701 /* New AMD processors never drop prefetches; if they cannot be performed
702 immediately, they are queued. We set number of simultaneous prefetches
703 to a large constant to reflect this (it probably is not a good idea not
704 to limit number of prefetches at all, as their execution also takes some
705 time). */
706 100, /* number of parallel prefetches */
707 3, /* Branch cost */
708 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
709 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
710 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
711 COSTS_N_INSNS (2), /* cost of FABS instruction. */
712 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
713 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
714 /* K8 has optimized REP instruction for medium sized blocks, but for very small
715 blocks it is better to use loop. For large blocks, libcall can do
716 nontemporary accesses and beat inline considerably. */
717 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
718 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
719 {{libcall, {{8, loop}, {24, unrolled_loop},
720 {2048, rep_prefix_4_byte}, {-1, libcall}}},
721 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
722 4, /* scalar_stmt_cost. */
723 2, /* scalar load_cost. */
724 2, /* scalar_store_cost. */
725 5, /* vec_stmt_cost. */
726 0, /* vec_to_scalar_cost. */
727 2, /* scalar_to_vec_cost. */
728 2, /* vec_align_load_cost. */
729 3, /* vec_unalign_load_cost. */
730 3, /* vec_store_cost. */
731 3, /* cond_taken_branch_cost. */
732 2, /* cond_not_taken_branch_cost. */
733 };
734
735 struct processor_costs amdfam10_cost = {
736 COSTS_N_INSNS (1), /* cost of an add instruction */
737 COSTS_N_INSNS (2), /* cost of a lea instruction */
738 COSTS_N_INSNS (1), /* variable shift costs */
739 COSTS_N_INSNS (1), /* constant shift costs */
740 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
741 COSTS_N_INSNS (4), /* HI */
742 COSTS_N_INSNS (3), /* SI */
743 COSTS_N_INSNS (4), /* DI */
744 COSTS_N_INSNS (5)}, /* other */
745 0, /* cost of multiply per each bit set */
746 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
747 COSTS_N_INSNS (35), /* HI */
748 COSTS_N_INSNS (51), /* SI */
749 COSTS_N_INSNS (83), /* DI */
750 COSTS_N_INSNS (83)}, /* other */
751 COSTS_N_INSNS (1), /* cost of movsx */
752 COSTS_N_INSNS (1), /* cost of movzx */
753 8, /* "large" insn */
754 9, /* MOVE_RATIO */
755 4, /* cost for loading QImode using movzbl */
756 {3, 4, 3}, /* cost of loading integer registers
757 in QImode, HImode and SImode.
758 Relative to reg-reg move (2). */
759 {3, 4, 3}, /* cost of storing integer registers */
760 4, /* cost of reg,reg fld/fst */
761 {4, 4, 12}, /* cost of loading fp registers
762 in SFmode, DFmode and XFmode */
763 {6, 6, 8}, /* cost of storing fp registers
764 in SFmode, DFmode and XFmode */
765 2, /* cost of moving MMX register */
766 {3, 3}, /* cost of loading MMX registers
767 in SImode and DImode */
768 {4, 4}, /* cost of storing MMX registers
769 in SImode and DImode */
770 2, /* cost of moving SSE register */
771 {4, 4, 3}, /* cost of loading SSE registers
772 in SImode, DImode and TImode */
773 {4, 4, 5}, /* cost of storing SSE registers
774 in SImode, DImode and TImode */
775 3, /* MMX or SSE register to integer */
776 /* On K8
777 MOVD reg64, xmmreg Double FSTORE 4
778 MOVD reg32, xmmreg Double FSTORE 4
779 On AMDFAM10
780 MOVD reg64, xmmreg Double FADD 3
781 1/1 1/1
782 MOVD reg32, xmmreg Double FADD 3
783 1/1 1/1 */
784 64, /* size of l1 cache. */
785 512, /* size of l2 cache. */
786 64, /* size of prefetch block */
787 /* New AMD processors never drop prefetches; if they cannot be performed
788 immediately, they are queued. We set number of simultaneous prefetches
789 to a large constant to reflect this (it probably is not a good idea not
790 to limit number of prefetches at all, as their execution also takes some
791 time). */
792 100, /* number of parallel prefetches */
793 2, /* Branch cost */
794 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
795 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
796 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
797 COSTS_N_INSNS (2), /* cost of FABS instruction. */
798 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
799 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
800
801 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
802 very small blocks it is better to use loop. For large blocks, libcall can
803 do nontemporary accesses and beat inline considerably. */
804 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
805 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
806 {{libcall, {{8, loop}, {24, unrolled_loop},
807 {2048, rep_prefix_4_byte}, {-1, libcall}}},
808 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
809 4, /* scalar_stmt_cost. */
810 2, /* scalar load_cost. */
811 2, /* scalar_store_cost. */
812 6, /* vec_stmt_cost. */
813 0, /* vec_to_scalar_cost. */
814 2, /* scalar_to_vec_cost. */
815 2, /* vec_align_load_cost. */
816 2, /* vec_unalign_load_cost. */
817 2, /* vec_store_cost. */
818 2, /* cond_taken_branch_cost. */
819 1, /* cond_not_taken_branch_cost. */
820 };
821
822 static const
823 struct processor_costs pentium4_cost = {
824 COSTS_N_INSNS (1), /* cost of an add instruction */
825 COSTS_N_INSNS (3), /* cost of a lea instruction */
826 COSTS_N_INSNS (4), /* variable shift costs */
827 COSTS_N_INSNS (4), /* constant shift costs */
828 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
829 COSTS_N_INSNS (15), /* HI */
830 COSTS_N_INSNS (15), /* SI */
831 COSTS_N_INSNS (15), /* DI */
832 COSTS_N_INSNS (15)}, /* other */
833 0, /* cost of multiply per each bit set */
834 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
835 COSTS_N_INSNS (56), /* HI */
836 COSTS_N_INSNS (56), /* SI */
837 COSTS_N_INSNS (56), /* DI */
838 COSTS_N_INSNS (56)}, /* other */
839 COSTS_N_INSNS (1), /* cost of movsx */
840 COSTS_N_INSNS (1), /* cost of movzx */
841 16, /* "large" insn */
842 6, /* MOVE_RATIO */
843 2, /* cost for loading QImode using movzbl */
844 {4, 5, 4}, /* cost of loading integer registers
845 in QImode, HImode and SImode.
846 Relative to reg-reg move (2). */
847 {2, 3, 2}, /* cost of storing integer registers */
848 2, /* cost of reg,reg fld/fst */
849 {2, 2, 6}, /* cost of loading fp registers
850 in SFmode, DFmode and XFmode */
851 {4, 4, 6}, /* cost of storing fp registers
852 in SFmode, DFmode and XFmode */
853 2, /* cost of moving MMX register */
854 {2, 2}, /* cost of loading MMX registers
855 in SImode and DImode */
856 {2, 2}, /* cost of storing MMX registers
857 in SImode and DImode */
858 12, /* cost of moving SSE register */
859 {12, 12, 12}, /* cost of loading SSE registers
860 in SImode, DImode and TImode */
861 {2, 2, 8}, /* cost of storing SSE registers
862 in SImode, DImode and TImode */
863 10, /* MMX or SSE register to integer */
864 8, /* size of l1 cache. */
865 256, /* size of l2 cache. */
866 64, /* size of prefetch block */
867 6, /* number of parallel prefetches */
868 2, /* Branch cost */
869 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
870 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
871 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
872 COSTS_N_INSNS (2), /* cost of FABS instruction. */
873 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
874 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
875 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
876 DUMMY_STRINGOP_ALGS},
877 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
878 {-1, libcall}}},
879 DUMMY_STRINGOP_ALGS},
880 1, /* scalar_stmt_cost. */
881 1, /* scalar load_cost. */
882 1, /* scalar_store_cost. */
883 1, /* vec_stmt_cost. */
884 1, /* vec_to_scalar_cost. */
885 1, /* scalar_to_vec_cost. */
886 1, /* vec_align_load_cost. */
887 2, /* vec_unalign_load_cost. */
888 1, /* vec_store_cost. */
889 3, /* cond_taken_branch_cost. */
890 1, /* cond_not_taken_branch_cost. */
891 };
892
893 static const
894 struct processor_costs nocona_cost = {
895 COSTS_N_INSNS (1), /* cost of an add instruction */
896 COSTS_N_INSNS (1), /* cost of a lea instruction */
897 COSTS_N_INSNS (1), /* variable shift costs */
898 COSTS_N_INSNS (1), /* constant shift costs */
899 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
900 COSTS_N_INSNS (10), /* HI */
901 COSTS_N_INSNS (10), /* SI */
902 COSTS_N_INSNS (10), /* DI */
903 COSTS_N_INSNS (10)}, /* other */
904 0, /* cost of multiply per each bit set */
905 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
906 COSTS_N_INSNS (66), /* HI */
907 COSTS_N_INSNS (66), /* SI */
908 COSTS_N_INSNS (66), /* DI */
909 COSTS_N_INSNS (66)}, /* other */
910 COSTS_N_INSNS (1), /* cost of movsx */
911 COSTS_N_INSNS (1), /* cost of movzx */
912 16, /* "large" insn */
913 17, /* MOVE_RATIO */
914 4, /* cost for loading QImode using movzbl */
915 {4, 4, 4}, /* cost of loading integer registers
916 in QImode, HImode and SImode.
917 Relative to reg-reg move (2). */
918 {4, 4, 4}, /* cost of storing integer registers */
919 3, /* cost of reg,reg fld/fst */
920 {12, 12, 12}, /* cost of loading fp registers
921 in SFmode, DFmode and XFmode */
922 {4, 4, 4}, /* cost of storing fp registers
923 in SFmode, DFmode and XFmode */
924 6, /* cost of moving MMX register */
925 {12, 12}, /* cost of loading MMX registers
926 in SImode and DImode */
927 {12, 12}, /* cost of storing MMX registers
928 in SImode and DImode */
929 6, /* cost of moving SSE register */
930 {12, 12, 12}, /* cost of loading SSE registers
931 in SImode, DImode and TImode */
932 {12, 12, 12}, /* cost of storing SSE registers
933 in SImode, DImode and TImode */
934 8, /* MMX or SSE register to integer */
935 8, /* size of l1 cache. */
936 1024, /* size of l2 cache. */
937 128, /* size of prefetch block */
938 8, /* number of parallel prefetches */
939 1, /* Branch cost */
940 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
941 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
942 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
943 COSTS_N_INSNS (3), /* cost of FABS instruction. */
944 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
945 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
946 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
947 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
948 {100000, unrolled_loop}, {-1, libcall}}}},
949 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
950 {-1, libcall}}},
951 {libcall, {{24, loop}, {64, unrolled_loop},
952 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
953 1, /* scalar_stmt_cost. */
954 1, /* scalar load_cost. */
955 1, /* scalar_store_cost. */
956 1, /* vec_stmt_cost. */
957 1, /* vec_to_scalar_cost. */
958 1, /* scalar_to_vec_cost. */
959 1, /* vec_align_load_cost. */
960 2, /* vec_unalign_load_cost. */
961 1, /* vec_store_cost. */
962 3, /* cond_taken_branch_cost. */
963 1, /* cond_not_taken_branch_cost. */
964 };
965
966 static const
967 struct processor_costs core2_cost = {
968 COSTS_N_INSNS (1), /* cost of an add instruction */
969 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
970 COSTS_N_INSNS (1), /* variable shift costs */
971 COSTS_N_INSNS (1), /* constant shift costs */
972 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
973 COSTS_N_INSNS (3), /* HI */
974 COSTS_N_INSNS (3), /* SI */
975 COSTS_N_INSNS (3), /* DI */
976 COSTS_N_INSNS (3)}, /* other */
977 0, /* cost of multiply per each bit set */
978 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
979 COSTS_N_INSNS (22), /* HI */
980 COSTS_N_INSNS (22), /* SI */
981 COSTS_N_INSNS (22), /* DI */
982 COSTS_N_INSNS (22)}, /* other */
983 COSTS_N_INSNS (1), /* cost of movsx */
984 COSTS_N_INSNS (1), /* cost of movzx */
985 8, /* "large" insn */
986 16, /* MOVE_RATIO */
987 2, /* cost for loading QImode using movzbl */
988 {6, 6, 6}, /* cost of loading integer registers
989 in QImode, HImode and SImode.
990 Relative to reg-reg move (2). */
991 {4, 4, 4}, /* cost of storing integer registers */
992 2, /* cost of reg,reg fld/fst */
993 {6, 6, 6}, /* cost of loading fp registers
994 in SFmode, DFmode and XFmode */
995 {4, 4, 4}, /* cost of storing fp registers
996 in SFmode, DFmode and XFmode */
997 2, /* cost of moving MMX register */
998 {6, 6}, /* cost of loading MMX registers
999 in SImode and DImode */
1000 {4, 4}, /* cost of storing MMX registers
1001 in SImode and DImode */
1002 2, /* cost of moving SSE register */
1003 {6, 6, 6}, /* cost of loading SSE registers
1004 in SImode, DImode and TImode */
1005 {4, 4, 4}, /* cost of storing SSE registers
1006 in SImode, DImode and TImode */
1007 2, /* MMX or SSE register to integer */
1008 32, /* size of l1 cache. */
1009 2048, /* size of l2 cache. */
1010 128, /* size of prefetch block */
1011 8, /* number of parallel prefetches */
1012 3, /* Branch cost */
1013 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1014 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1015 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1016 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1017 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1018 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1019 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1020 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1021 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1022 {{libcall, {{8, loop}, {15, unrolled_loop},
1023 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1024 {libcall, {{24, loop}, {32, unrolled_loop},
1025 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1026 1, /* scalar_stmt_cost. */
1027 1, /* scalar load_cost. */
1028 1, /* scalar_store_cost. */
1029 1, /* vec_stmt_cost. */
1030 1, /* vec_to_scalar_cost. */
1031 1, /* scalar_to_vec_cost. */
1032 1, /* vec_align_load_cost. */
1033 2, /* vec_unalign_load_cost. */
1034 1, /* vec_store_cost. */
1035 3, /* cond_taken_branch_cost. */
1036 1, /* cond_not_taken_branch_cost. */
1037 };
1038
1039 static const
1040 struct processor_costs atom_cost = {
1041 COSTS_N_INSNS (1), /* cost of an add instruction */
1042 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1043 COSTS_N_INSNS (1), /* variable shift costs */
1044 COSTS_N_INSNS (1), /* constant shift costs */
1045 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1046 COSTS_N_INSNS (4), /* HI */
1047 COSTS_N_INSNS (3), /* SI */
1048 COSTS_N_INSNS (4), /* DI */
1049 COSTS_N_INSNS (2)}, /* other */
1050 0, /* cost of multiply per each bit set */
1051 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1052 COSTS_N_INSNS (26), /* HI */
1053 COSTS_N_INSNS (42), /* SI */
1054 COSTS_N_INSNS (74), /* DI */
1055 COSTS_N_INSNS (74)}, /* other */
1056 COSTS_N_INSNS (1), /* cost of movsx */
1057 COSTS_N_INSNS (1), /* cost of movzx */
1058 8, /* "large" insn */
1059 17, /* MOVE_RATIO */
1060 2, /* cost for loading QImode using movzbl */
1061 {4, 4, 4}, /* cost of loading integer registers
1062 in QImode, HImode and SImode.
1063 Relative to reg-reg move (2). */
1064 {4, 4, 4}, /* cost of storing integer registers */
1065 4, /* cost of reg,reg fld/fst */
1066 {12, 12, 12}, /* cost of loading fp registers
1067 in SFmode, DFmode and XFmode */
1068 {6, 6, 8}, /* cost of storing fp registers
1069 in SFmode, DFmode and XFmode */
1070 2, /* cost of moving MMX register */
1071 {8, 8}, /* cost of loading MMX registers
1072 in SImode and DImode */
1073 {8, 8}, /* cost of storing MMX registers
1074 in SImode and DImode */
1075 2, /* cost of moving SSE register */
1076 {8, 8, 8}, /* cost of loading SSE registers
1077 in SImode, DImode and TImode */
1078 {8, 8, 8}, /* cost of storing SSE registers
1079 in SImode, DImode and TImode */
1080 5, /* MMX or SSE register to integer */
1081 32, /* size of l1 cache. */
1082 256, /* size of l2 cache. */
1083 64, /* size of prefetch block */
1084 6, /* number of parallel prefetches */
1085 3, /* Branch cost */
1086 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1087 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1088 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1089 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1090 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1091 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1092 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1093 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1094 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1095 {{libcall, {{8, loop}, {15, unrolled_loop},
1096 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1097 {libcall, {{24, loop}, {32, unrolled_loop},
1098 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1099 1, /* scalar_stmt_cost. */
1100 1, /* scalar load_cost. */
1101 1, /* scalar_store_cost. */
1102 1, /* vec_stmt_cost. */
1103 1, /* vec_to_scalar_cost. */
1104 1, /* scalar_to_vec_cost. */
1105 1, /* vec_align_load_cost. */
1106 2, /* vec_unalign_load_cost. */
1107 1, /* vec_store_cost. */
1108 3, /* cond_taken_branch_cost. */
1109 1, /* cond_not_taken_branch_cost. */
1110 };
1111
1112 /* Generic64 should produce code tuned for Nocona and K8. */
1113 static const
1114 struct processor_costs generic64_cost = {
1115 COSTS_N_INSNS (1), /* cost of an add instruction */
1116 /* On all chips taken into consideration lea is 2 cycles and more. With
1117 this cost however our current implementation of synth_mult results in
1118 use of unnecessary temporary registers causing regression on several
1119 SPECfp benchmarks. */
1120 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1121 COSTS_N_INSNS (1), /* variable shift costs */
1122 COSTS_N_INSNS (1), /* constant shift costs */
1123 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1124 COSTS_N_INSNS (4), /* HI */
1125 COSTS_N_INSNS (3), /* SI */
1126 COSTS_N_INSNS (4), /* DI */
1127 COSTS_N_INSNS (2)}, /* other */
1128 0, /* cost of multiply per each bit set */
1129 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1130 COSTS_N_INSNS (26), /* HI */
1131 COSTS_N_INSNS (42), /* SI */
1132 COSTS_N_INSNS (74), /* DI */
1133 COSTS_N_INSNS (74)}, /* other */
1134 COSTS_N_INSNS (1), /* cost of movsx */
1135 COSTS_N_INSNS (1), /* cost of movzx */
1136 8, /* "large" insn */
1137 17, /* MOVE_RATIO */
1138 4, /* cost for loading QImode using movzbl */
1139 {4, 4, 4}, /* cost of loading integer registers
1140 in QImode, HImode and SImode.
1141 Relative to reg-reg move (2). */
1142 {4, 4, 4}, /* cost of storing integer registers */
1143 4, /* cost of reg,reg fld/fst */
1144 {12, 12, 12}, /* cost of loading fp registers
1145 in SFmode, DFmode and XFmode */
1146 {6, 6, 8}, /* cost of storing fp registers
1147 in SFmode, DFmode and XFmode */
1148 2, /* cost of moving MMX register */
1149 {8, 8}, /* cost of loading MMX registers
1150 in SImode and DImode */
1151 {8, 8}, /* cost of storing MMX registers
1152 in SImode and DImode */
1153 2, /* cost of moving SSE register */
1154 {8, 8, 8}, /* cost of loading SSE registers
1155 in SImode, DImode and TImode */
1156 {8, 8, 8}, /* cost of storing SSE registers
1157 in SImode, DImode and TImode */
1158 5, /* MMX or SSE register to integer */
1159 32, /* size of l1 cache. */
1160 512, /* size of l2 cache. */
1161 64, /* size of prefetch block */
1162 6, /* number of parallel prefetches */
1163 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1164 is increased to perhaps more appropriate value of 5. */
1165 3, /* Branch cost */
1166 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1167 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1168 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1169 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1170 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1171 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1172 {DUMMY_STRINGOP_ALGS,
1173 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1174 {DUMMY_STRINGOP_ALGS,
1175 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1176 1, /* scalar_stmt_cost. */
1177 1, /* scalar load_cost. */
1178 1, /* scalar_store_cost. */
1179 1, /* vec_stmt_cost. */
1180 1, /* vec_to_scalar_cost. */
1181 1, /* scalar_to_vec_cost. */
1182 1, /* vec_align_load_cost. */
1183 2, /* vec_unalign_load_cost. */
1184 1, /* vec_store_cost. */
1185 3, /* cond_taken_branch_cost. */
1186 1, /* cond_not_taken_branch_cost. */
1187 };
1188
1189 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1190 static const
1191 struct processor_costs generic32_cost = {
1192 COSTS_N_INSNS (1), /* cost of an add instruction */
1193 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1194 COSTS_N_INSNS (1), /* variable shift costs */
1195 COSTS_N_INSNS (1), /* constant shift costs */
1196 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1197 COSTS_N_INSNS (4), /* HI */
1198 COSTS_N_INSNS (3), /* SI */
1199 COSTS_N_INSNS (4), /* DI */
1200 COSTS_N_INSNS (2)}, /* other */
1201 0, /* cost of multiply per each bit set */
1202 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1203 COSTS_N_INSNS (26), /* HI */
1204 COSTS_N_INSNS (42), /* SI */
1205 COSTS_N_INSNS (74), /* DI */
1206 COSTS_N_INSNS (74)}, /* other */
1207 COSTS_N_INSNS (1), /* cost of movsx */
1208 COSTS_N_INSNS (1), /* cost of movzx */
1209 8, /* "large" insn */
1210 17, /* MOVE_RATIO */
1211 4, /* cost for loading QImode using movzbl */
1212 {4, 4, 4}, /* cost of loading integer registers
1213 in QImode, HImode and SImode.
1214 Relative to reg-reg move (2). */
1215 {4, 4, 4}, /* cost of storing integer registers */
1216 4, /* cost of reg,reg fld/fst */
1217 {12, 12, 12}, /* cost of loading fp registers
1218 in SFmode, DFmode and XFmode */
1219 {6, 6, 8}, /* cost of storing fp registers
1220 in SFmode, DFmode and XFmode */
1221 2, /* cost of moving MMX register */
1222 {8, 8}, /* cost of loading MMX registers
1223 in SImode and DImode */
1224 {8, 8}, /* cost of storing MMX registers
1225 in SImode and DImode */
1226 2, /* cost of moving SSE register */
1227 {8, 8, 8}, /* cost of loading SSE registers
1228 in SImode, DImode and TImode */
1229 {8, 8, 8}, /* cost of storing SSE registers
1230 in SImode, DImode and TImode */
1231 5, /* MMX or SSE register to integer */
1232 32, /* size of l1 cache. */
1233 256, /* size of l2 cache. */
1234 64, /* size of prefetch block */
1235 6, /* number of parallel prefetches */
1236 3, /* Branch cost */
1237 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1238 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1239 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1240 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1241 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1242 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1243 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1244 DUMMY_STRINGOP_ALGS},
1245 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1246 DUMMY_STRINGOP_ALGS},
1247 1, /* scalar_stmt_cost. */
1248 1, /* scalar load_cost. */
1249 1, /* scalar_store_cost. */
1250 1, /* vec_stmt_cost. */
1251 1, /* vec_to_scalar_cost. */
1252 1, /* scalar_to_vec_cost. */
1253 1, /* vec_align_load_cost. */
1254 2, /* vec_unalign_load_cost. */
1255 1, /* vec_store_cost. */
1256 3, /* cond_taken_branch_cost. */
1257 1, /* cond_not_taken_branch_cost. */
1258 };
1259
1260 const struct processor_costs *ix86_cost = &pentium_cost;
1261
1262 /* Processor feature/optimization bitmasks. */
1263 #define m_386 (1<<PROCESSOR_I386)
1264 #define m_486 (1<<PROCESSOR_I486)
1265 #define m_PENT (1<<PROCESSOR_PENTIUM)
1266 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1267 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1268 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1269 #define m_CORE2 (1<<PROCESSOR_CORE2)
1270 #define m_ATOM (1<<PROCESSOR_ATOM)
1271
1272 #define m_GEODE (1<<PROCESSOR_GEODE)
1273 #define m_K6 (1<<PROCESSOR_K6)
1274 #define m_K6_GEODE (m_K6 | m_GEODE)
1275 #define m_K8 (1<<PROCESSOR_K8)
1276 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1277 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1278 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1279 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1280
1281 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1282 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1283
1284 /* Generic instruction choice should be common subset of supported CPUs
1285 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1286 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1287
1288 /* Feature tests against the various tunings. */
1289 unsigned char ix86_tune_features[X86_TUNE_LAST];
1290
1291 /* Feature tests against the various tunings used to create ix86_tune_features
1292 based on the processor mask. */
1293 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1294 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1295 negatively, so enabling for Generic64 seems like good code size
1296 tradeoff. We can't enable it for 32bit generic because it does not
1297 work well with PPro base chips. */
1298 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1299
1300 /* X86_TUNE_PUSH_MEMORY */
1301 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1302 | m_NOCONA | m_CORE2 | m_GENERIC,
1303
1304 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1305 m_486 | m_PENT,
1306
1307 /* X86_TUNE_UNROLL_STRLEN */
1308 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1309 | m_CORE2 | m_GENERIC,
1310
1311 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1312 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1313
1314 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1315 on simulation result. But after P4 was made, no performance benefit
1316 was observed with branch hints. It also increases the code size.
1317 As a result, icc never generates branch hints. */
1318 0,
1319
1320 /* X86_TUNE_DOUBLE_WITH_ADD */
1321 ~m_386,
1322
1323 /* X86_TUNE_USE_SAHF */
1324 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1325 | m_NOCONA | m_CORE2 | m_GENERIC,
1326
1327 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1328 partial dependencies. */
1329 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1330 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1331
1332 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1333 register stalls on Generic32 compilation setting as well. However
1334 in current implementation the partial register stalls are not eliminated
1335 very well - they can be introduced via subregs synthesized by combine
1336 and can happen in caller/callee saving sequences. Because this option
1337 pays back little on PPro based chips and is in conflict with partial reg
1338 dependencies used by Athlon/P4 based chips, it is better to leave it off
1339 for generic32 for now. */
1340 m_PPRO,
1341
1342 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1343 m_CORE2 | m_GENERIC,
1344
1345 /* X86_TUNE_USE_HIMODE_FIOP */
1346 m_386 | m_486 | m_K6_GEODE,
1347
1348 /* X86_TUNE_USE_SIMODE_FIOP */
1349 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
1350
1351 /* X86_TUNE_USE_MOV0 */
1352 m_K6,
1353
1354 /* X86_TUNE_USE_CLTD */
1355 ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
1356
1357 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1358 m_PENT4,
1359
1360 /* X86_TUNE_SPLIT_LONG_MOVES */
1361 m_PPRO,
1362
1363 /* X86_TUNE_READ_MODIFY_WRITE */
1364 ~m_PENT,
1365
1366 /* X86_TUNE_READ_MODIFY */
1367 ~(m_PENT | m_PPRO),
1368
1369 /* X86_TUNE_PROMOTE_QIMODE */
1370 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1371 | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
1372
1373 /* X86_TUNE_FAST_PREFIX */
1374 ~(m_PENT | m_486 | m_386),
1375
1376 /* X86_TUNE_SINGLE_STRINGOP */
1377 m_386 | m_PENT4 | m_NOCONA,
1378
1379 /* X86_TUNE_QIMODE_MATH */
1380 ~0,
1381
1382 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1383 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1384 might be considered for Generic32 if our scheme for avoiding partial
1385 stalls was more effective. */
1386 ~m_PPRO,
1387
1388 /* X86_TUNE_PROMOTE_QI_REGS */
1389 0,
1390
1391 /* X86_TUNE_PROMOTE_HI_REGS */
1392 m_PPRO,
1393
1394 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1395 m_ATOM | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA
1396 | m_CORE2 | m_GENERIC,
1397
1398 /* X86_TUNE_ADD_ESP_8 */
1399 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_K6_GEODE | m_386
1400 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1401
1402 /* X86_TUNE_SUB_ESP_4 */
1403 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2
1404 | m_GENERIC,
1405
1406 /* X86_TUNE_SUB_ESP_8 */
1407 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_386 | m_486
1408 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1409
1410 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1411 for DFmode copies */
1412 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1413 | m_GENERIC | m_GEODE),
1414
1415 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1416 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1417
1418 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1419 conflict here in between PPro/Pentium4 based chips that thread 128bit
1420 SSE registers as single units versus K8 based chips that divide SSE
1421 registers to two 64bit halves. This knob promotes all store destinations
1422 to be 128bit to allow register renaming on 128bit SSE units, but usually
1423 results in one extra microop on 64bit SSE units. Experimental results
1424 shows that disabling this option on P4 brings over 20% SPECfp regression,
1425 while enabling it on K8 brings roughly 2.4% regression that can be partly
1426 masked by careful scheduling of moves. */
1427 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1428 | m_AMDFAM10,
1429
1430 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1431 m_AMDFAM10,
1432
1433 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1434 are resolved on SSE register parts instead of whole registers, so we may
1435 maintain just lower part of scalar values in proper format leaving the
1436 upper part undefined. */
1437 m_ATHLON_K8,
1438
1439 /* X86_TUNE_SSE_TYPELESS_STORES */
1440 m_AMD_MULTIPLE,
1441
1442 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1443 m_PPRO | m_PENT4 | m_NOCONA,
1444
1445 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1446 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1447
1448 /* X86_TUNE_PROLOGUE_USING_MOVE */
1449 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1450
1451 /* X86_TUNE_EPILOGUE_USING_MOVE */
1452 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1453
1454 /* X86_TUNE_SHIFT1 */
1455 ~m_486,
1456
1457 /* X86_TUNE_USE_FFREEP */
1458 m_AMD_MULTIPLE,
1459
1460 /* X86_TUNE_INTER_UNIT_MOVES */
1461 ~(m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
1462
1463 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1464 ~(m_AMDFAM10),
1465
1466 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1467 than 4 branch instructions in the 16 byte window. */
1468 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
1469 | m_GENERIC,
1470
1471 /* X86_TUNE_SCHEDULE */
1472 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
1473 | m_GENERIC,
1474
1475 /* X86_TUNE_USE_BT */
1476 m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
1477
1478 /* X86_TUNE_USE_INCDEC */
1479 ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
1480
1481 /* X86_TUNE_PAD_RETURNS */
1482 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1483
1484 /* X86_TUNE_EXT_80387_CONSTANTS */
1485 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1486 | m_CORE2 | m_GENERIC,
1487
1488 /* X86_TUNE_SHORTEN_X87_SSE */
1489 ~m_K8,
1490
1491 /* X86_TUNE_AVOID_VECTOR_DECODE */
1492 m_K8 | m_GENERIC64,
1493
1494 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1495 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1496 ~(m_386 | m_486),
1497
1498 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1499 vector path on AMD machines. */
1500 m_K8 | m_GENERIC64 | m_AMDFAM10,
1501
1502 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1503 machines. */
1504 m_K8 | m_GENERIC64 | m_AMDFAM10,
1505
1506 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1507 than a MOV. */
1508 m_PENT,
1509
1510 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1511 but one byte longer. */
1512 m_PENT,
1513
1514 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1515 operand that cannot be represented using a modRM byte. The XOR
1516 replacement is long decoded, so this split helps here as well. */
1517 m_K6,
1518
1519 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1520 from FP to FP. */
1521 m_AMDFAM10 | m_GENERIC,
1522
1523 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1524 from integer to FP. */
1525 m_AMDFAM10,
1526
1527 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1528 with a subsequent conditional jump instruction into a single
1529 compare-and-branch uop. */
1530 m_CORE2,
1531
1532 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1533 will impact LEA instruction selection. */
1534 m_ATOM,
1535 };
1536
1537 /* Feature tests against the various architecture variations. */
1538 unsigned char ix86_arch_features[X86_ARCH_LAST];
1539
1540 /* Feature tests against the various architecture variations, used to create
1541 ix86_arch_features based on the processor mask. */
1542 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1543 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1544 ~(m_386 | m_486 | m_PENT | m_K6),
1545
1546 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1547 ~m_386,
1548
1549 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1550 ~(m_386 | m_486),
1551
1552 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1553 ~m_386,
1554
1555 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1556 ~m_386,
1557 };
1558
1559 static const unsigned int x86_accumulate_outgoing_args
1560 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1561 | m_GENERIC;
1562
1563 static const unsigned int x86_arch_always_fancy_math_387
1564 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1565 | m_NOCONA | m_CORE2 | m_GENERIC;
1566
1567 static enum stringop_alg stringop_alg = no_stringop;
1568
1569 /* In case the average insn count for single function invocation is
1570 lower than this constant, emit fast (but longer) prologue and
1571 epilogue code. */
1572 #define FAST_PROLOGUE_INSN_COUNT 20
1573
1574 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1575 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1576 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1577 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1578
1579 /* Array of the smallest class containing reg number REGNO, indexed by
1580 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1581
1582 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1583 {
1584 /* ax, dx, cx, bx */
1585 AREG, DREG, CREG, BREG,
1586 /* si, di, bp, sp */
1587 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1588 /* FP registers */
1589 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1590 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1591 /* arg pointer */
1592 NON_Q_REGS,
1593 /* flags, fpsr, fpcr, frame */
1594 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1595 /* SSE registers */
1596 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1597 SSE_REGS, SSE_REGS,
1598 /* MMX registers */
1599 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1600 MMX_REGS, MMX_REGS,
1601 /* REX registers */
1602 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1603 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1604 /* SSE REX registers */
1605 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1606 SSE_REGS, SSE_REGS,
1607 };
1608
1609 /* The "default" register map used in 32bit mode. */
1610
1611 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1612 {
1613 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1614 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1615 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1616 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1617 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1618 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1619 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1620 };
1621
1622 /* The "default" register map used in 64bit mode. */
1623
1624 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1625 {
1626 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1627 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1628 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1629 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1630 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1631 8,9,10,11,12,13,14,15, /* extended integer registers */
1632 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1633 };
1634
1635 /* Define the register numbers to be used in Dwarf debugging information.
1636 The SVR4 reference port C compiler uses the following register numbers
1637 in its Dwarf output code:
1638 0 for %eax (gcc regno = 0)
1639 1 for %ecx (gcc regno = 2)
1640 2 for %edx (gcc regno = 1)
1641 3 for %ebx (gcc regno = 3)
1642 4 for %esp (gcc regno = 7)
1643 5 for %ebp (gcc regno = 6)
1644 6 for %esi (gcc regno = 4)
1645 7 for %edi (gcc regno = 5)
1646 The following three DWARF register numbers are never generated by
1647 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1648 believes these numbers have these meanings.
1649 8 for %eip (no gcc equivalent)
1650 9 for %eflags (gcc regno = 17)
1651 10 for %trapno (no gcc equivalent)
1652 It is not at all clear how we should number the FP stack registers
1653 for the x86 architecture. If the version of SDB on x86/svr4 were
1654 a bit less brain dead with respect to floating-point then we would
1655 have a precedent to follow with respect to DWARF register numbers
1656 for x86 FP registers, but the SDB on x86/svr4 is so completely
1657 broken with respect to FP registers that it is hardly worth thinking
1658 of it as something to strive for compatibility with.
1659 The version of x86/svr4 SDB I have at the moment does (partially)
1660 seem to believe that DWARF register number 11 is associated with
1661 the x86 register %st(0), but that's about all. Higher DWARF
1662 register numbers don't seem to be associated with anything in
1663 particular, and even for DWARF regno 11, SDB only seems to under-
1664 stand that it should say that a variable lives in %st(0) (when
1665 asked via an `=' command) if we said it was in DWARF regno 11,
1666 but SDB still prints garbage when asked for the value of the
1667 variable in question (via a `/' command).
1668 (Also note that the labels SDB prints for various FP stack regs
1669 when doing an `x' command are all wrong.)
1670 Note that these problems generally don't affect the native SVR4
1671 C compiler because it doesn't allow the use of -O with -g and
1672 because when it is *not* optimizing, it allocates a memory
1673 location for each floating-point variable, and the memory
1674 location is what gets described in the DWARF AT_location
1675 attribute for the variable in question.
1676 Regardless of the severe mental illness of the x86/svr4 SDB, we
1677 do something sensible here and we use the following DWARF
1678 register numbers. Note that these are all stack-top-relative
1679 numbers.
1680 11 for %st(0) (gcc regno = 8)
1681 12 for %st(1) (gcc regno = 9)
1682 13 for %st(2) (gcc regno = 10)
1683 14 for %st(3) (gcc regno = 11)
1684 15 for %st(4) (gcc regno = 12)
1685 16 for %st(5) (gcc regno = 13)
1686 17 for %st(6) (gcc regno = 14)
1687 18 for %st(7) (gcc regno = 15)
1688 */
1689 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1690 {
1691 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1692 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1693 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1694 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1695 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1696 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1697 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1698 };
1699
1700 /* Test and compare insns in i386.md store the information needed to
1701 generate branch and scc insns here. */
1702
1703 rtx ix86_compare_op0 = NULL_RTX;
1704 rtx ix86_compare_op1 = NULL_RTX;
1705
1706 /* Define parameter passing and return registers. */
1707
1708 static int const x86_64_int_parameter_registers[6] =
1709 {
1710 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1711 };
1712
1713 static int const x86_64_ms_abi_int_parameter_registers[4] =
1714 {
1715 CX_REG, DX_REG, R8_REG, R9_REG
1716 };
1717
1718 static int const x86_64_int_return_registers[4] =
1719 {
1720 AX_REG, DX_REG, DI_REG, SI_REG
1721 };
1722
1723 /* Define the structure for the machine field in struct function. */
1724
1725 struct GTY(()) stack_local_entry {
1726 unsigned short mode;
1727 unsigned short n;
1728 rtx rtl;
1729 struct stack_local_entry *next;
1730 };
1731
1732 /* Structure describing stack frame layout.
1733 Stack grows downward:
1734
1735 [arguments]
1736 <- ARG_POINTER
1737 saved pc
1738
1739 saved frame pointer if frame_pointer_needed
1740 <- HARD_FRAME_POINTER
1741 [saved regs]
1742
1743 [padding0]
1744
1745 [saved SSE regs]
1746
1747 [padding1] \
1748 )
1749 [va_arg registers] (
1750 > to_allocate <- FRAME_POINTER
1751 [frame] (
1752 )
1753 [padding2] /
1754 */
1755 struct ix86_frame
1756 {
1757 int padding0;
1758 int nsseregs;
1759 int nregs;
1760 int padding1;
1761 int va_arg_size;
1762 HOST_WIDE_INT frame;
1763 int padding2;
1764 int outgoing_arguments_size;
1765 int red_zone_size;
1766
1767 HOST_WIDE_INT to_allocate;
1768 /* The offsets relative to ARG_POINTER. */
1769 HOST_WIDE_INT frame_pointer_offset;
1770 HOST_WIDE_INT hard_frame_pointer_offset;
1771 HOST_WIDE_INT stack_pointer_offset;
1772
1773 /* When save_regs_using_mov is set, emit prologue using
1774 move instead of push instructions. */
1775 bool save_regs_using_mov;
1776 };
1777
1778 /* Code model option. */
1779 enum cmodel ix86_cmodel;
1780 /* Asm dialect. */
1781 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1782 /* TLS dialects. */
1783 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1784
1785 /* Which unit we are generating floating point math for. */
1786 enum fpmath_unit ix86_fpmath;
1787
1788 /* Which cpu are we scheduling for. */
1789 enum attr_cpu ix86_schedule;
1790
1791 /* Which cpu are we optimizing for. */
1792 enum processor_type ix86_tune;
1793
1794 /* Which instruction set architecture to use. */
1795 enum processor_type ix86_arch;
1796
1797 /* true if sse prefetch instruction is not NOOP. */
1798 int x86_prefetch_sse;
1799
1800 /* ix86_regparm_string as a number */
1801 static int ix86_regparm;
1802
1803 /* -mstackrealign option */
1804 extern int ix86_force_align_arg_pointer;
1805 static const char ix86_force_align_arg_pointer_string[]
1806 = "force_align_arg_pointer";
1807
1808 static rtx (*ix86_gen_leave) (void);
1809 static rtx (*ix86_gen_pop1) (rtx);
1810 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1811 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1812 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx);
1813 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1814 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1815 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1816
1817 /* Preferred alignment for stack boundary in bits. */
1818 unsigned int ix86_preferred_stack_boundary;
1819
1820 /* Alignment for incoming stack boundary in bits specified at
1821 command line. */
1822 static unsigned int ix86_user_incoming_stack_boundary;
1823
1824 /* Default alignment for incoming stack boundary in bits. */
1825 static unsigned int ix86_default_incoming_stack_boundary;
1826
1827 /* Alignment for incoming stack boundary in bits. */
1828 unsigned int ix86_incoming_stack_boundary;
1829
1830 /* The abi used by target. */
1831 enum calling_abi ix86_abi;
1832
1833 /* Values 1-5: see jump.c */
1834 int ix86_branch_cost;
1835
1836 /* Calling abi specific va_list type nodes. */
1837 static GTY(()) tree sysv_va_list_type_node;
1838 static GTY(()) tree ms_va_list_type_node;
1839
1840 /* Variables which are this size or smaller are put in the data/bss
1841 or ldata/lbss sections. */
1842
1843 int ix86_section_threshold = 65536;
1844
1845 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1846 char internal_label_prefix[16];
1847 int internal_label_prefix_len;
1848
1849 /* Fence to use after loop using movnt. */
1850 tree x86_mfence;
1851
1852 /* Register class used for passing given 64bit part of the argument.
1853 These represent classes as documented by the PS ABI, with the exception
1854 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1855 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1856
1857 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1858 whenever possible (upper half does contain padding). */
1859 enum x86_64_reg_class
1860 {
1861 X86_64_NO_CLASS,
1862 X86_64_INTEGER_CLASS,
1863 X86_64_INTEGERSI_CLASS,
1864 X86_64_SSE_CLASS,
1865 X86_64_SSESF_CLASS,
1866 X86_64_SSEDF_CLASS,
1867 X86_64_SSEUP_CLASS,
1868 X86_64_X87_CLASS,
1869 X86_64_X87UP_CLASS,
1870 X86_64_COMPLEX_X87_CLASS,
1871 X86_64_MEMORY_CLASS
1872 };
1873
1874 #define MAX_CLASSES 4
1875
1876 /* Table of constants used by fldpi, fldln2, etc.... */
1877 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1878 static bool ext_80387_constants_init = 0;
1879
1880 \f
1881 static struct machine_function * ix86_init_machine_status (void);
1882 static rtx ix86_function_value (const_tree, const_tree, bool);
1883 static int ix86_function_regparm (const_tree, const_tree);
1884 static void ix86_compute_frame_layout (struct ix86_frame *);
1885 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1886 rtx, rtx, int);
1887 static void ix86_add_new_builtins (int);
1888
1889 enum ix86_function_specific_strings
1890 {
1891 IX86_FUNCTION_SPECIFIC_ARCH,
1892 IX86_FUNCTION_SPECIFIC_TUNE,
1893 IX86_FUNCTION_SPECIFIC_FPMATH,
1894 IX86_FUNCTION_SPECIFIC_MAX
1895 };
1896
1897 static char *ix86_target_string (int, int, const char *, const char *,
1898 const char *, bool);
1899 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
1900 static void ix86_function_specific_save (struct cl_target_option *);
1901 static void ix86_function_specific_restore (struct cl_target_option *);
1902 static void ix86_function_specific_print (FILE *, int,
1903 struct cl_target_option *);
1904 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
1905 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
1906 static bool ix86_can_inline_p (tree, tree);
1907 static void ix86_set_current_function (tree);
1908
1909 static enum calling_abi ix86_function_abi (const_tree);
1910
1911 \f
1912 /* The svr4 ABI for the i386 says that records and unions are returned
1913 in memory. */
1914 #ifndef DEFAULT_PCC_STRUCT_RETURN
1915 #define DEFAULT_PCC_STRUCT_RETURN 1
1916 #endif
1917
1918 /* Whether -mtune= or -march= were specified */
1919 static int ix86_tune_defaulted;
1920 static int ix86_arch_specified;
1921
1922 /* Bit flags that specify the ISA we are compiling for. */
1923 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1924
1925 /* A mask of ix86_isa_flags that includes bit X if X
1926 was set or cleared on the command line. */
1927 static int ix86_isa_flags_explicit;
1928
1929 /* Define a set of ISAs which are available when a given ISA is
1930 enabled. MMX and SSE ISAs are handled separately. */
1931
1932 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1933 #define OPTION_MASK_ISA_3DNOW_SET \
1934 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1935
1936 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1937 #define OPTION_MASK_ISA_SSE2_SET \
1938 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1939 #define OPTION_MASK_ISA_SSE3_SET \
1940 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1941 #define OPTION_MASK_ISA_SSSE3_SET \
1942 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1943 #define OPTION_MASK_ISA_SSE4_1_SET \
1944 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1945 #define OPTION_MASK_ISA_SSE4_2_SET \
1946 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1947 #define OPTION_MASK_ISA_AVX_SET \
1948 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
1949 #define OPTION_MASK_ISA_FMA_SET \
1950 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
1951
1952 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1953 as -msse4.2. */
1954 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1955
1956 #define OPTION_MASK_ISA_SSE4A_SET \
1957 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1958 #define OPTION_MASK_ISA_SSE5_SET \
1959 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1960
1961 /* AES and PCLMUL need SSE2 because they use xmm registers */
1962 #define OPTION_MASK_ISA_AES_SET \
1963 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
1964 #define OPTION_MASK_ISA_PCLMUL_SET \
1965 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
1966
1967 #define OPTION_MASK_ISA_ABM_SET \
1968 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
1969 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
1970 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
1971 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
1972
1973 /* Define a set of ISAs which aren't available when a given ISA is
1974 disabled. MMX and SSE ISAs are handled separately. */
1975
1976 #define OPTION_MASK_ISA_MMX_UNSET \
1977 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1978 #define OPTION_MASK_ISA_3DNOW_UNSET \
1979 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1980 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1981
1982 #define OPTION_MASK_ISA_SSE_UNSET \
1983 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1984 #define OPTION_MASK_ISA_SSE2_UNSET \
1985 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1986 #define OPTION_MASK_ISA_SSE3_UNSET \
1987 (OPTION_MASK_ISA_SSE3 \
1988 | OPTION_MASK_ISA_SSSE3_UNSET \
1989 | OPTION_MASK_ISA_SSE4A_UNSET )
1990 #define OPTION_MASK_ISA_SSSE3_UNSET \
1991 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1992 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1993 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1994 #define OPTION_MASK_ISA_SSE4_2_UNSET \
1995 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
1996 #define OPTION_MASK_ISA_AVX_UNSET \
1997 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET)
1998 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
1999
2000 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2001 as -mno-sse4.1. */
2002 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2003
2004 #define OPTION_MASK_ISA_SSE4A_UNSET \
2005 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
2006 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
2007 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2008 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2009 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2010 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2011 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2012 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2013
2014 /* Vectorization library interface and handlers. */
2015 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
2016 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2017 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2018
2019 /* Processor target table, indexed by processor number */
2020 struct ptt
2021 {
2022 const struct processor_costs *cost; /* Processor costs */
2023 const int align_loop; /* Default alignments. */
2024 const int align_loop_max_skip;
2025 const int align_jump;
2026 const int align_jump_max_skip;
2027 const int align_func;
2028 };
2029
2030 static const struct ptt processor_target_table[PROCESSOR_max] =
2031 {
2032 {&i386_cost, 4, 3, 4, 3, 4},
2033 {&i486_cost, 16, 15, 16, 15, 16},
2034 {&pentium_cost, 16, 7, 16, 7, 16},
2035 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2036 {&geode_cost, 0, 0, 0, 0, 0},
2037 {&k6_cost, 32, 7, 32, 7, 32},
2038 {&athlon_cost, 16, 7, 16, 7, 16},
2039 {&pentium4_cost, 0, 0, 0, 0, 0},
2040 {&k8_cost, 16, 7, 16, 7, 16},
2041 {&nocona_cost, 0, 0, 0, 0, 0},
2042 {&core2_cost, 16, 10, 16, 10, 16},
2043 {&generic32_cost, 16, 7, 16, 7, 16},
2044 {&generic64_cost, 16, 10, 16, 10, 16},
2045 {&amdfam10_cost, 32, 24, 32, 7, 32},
2046 {&atom_cost, 16, 7, 16, 7, 16}
2047 };
2048
2049 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2050 {
2051 "generic",
2052 "i386",
2053 "i486",
2054 "pentium",
2055 "pentium-mmx",
2056 "pentiumpro",
2057 "pentium2",
2058 "pentium3",
2059 "pentium4",
2060 "pentium-m",
2061 "prescott",
2062 "nocona",
2063 "core2",
2064 "atom",
2065 "geode",
2066 "k6",
2067 "k6-2",
2068 "k6-3",
2069 "athlon",
2070 "athlon-4",
2071 "k8",
2072 "amdfam10"
2073 };
2074 \f
2075 /* Implement TARGET_HANDLE_OPTION. */
2076
2077 static bool
2078 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2079 {
2080 switch (code)
2081 {
2082 case OPT_mmmx:
2083 if (value)
2084 {
2085 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2086 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2087 }
2088 else
2089 {
2090 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2091 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2092 }
2093 return true;
2094
2095 case OPT_m3dnow:
2096 if (value)
2097 {
2098 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2099 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2100 }
2101 else
2102 {
2103 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2104 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2105 }
2106 return true;
2107
2108 case OPT_m3dnowa:
2109 return false;
2110
2111 case OPT_msse:
2112 if (value)
2113 {
2114 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2115 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2116 }
2117 else
2118 {
2119 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2120 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2121 }
2122 return true;
2123
2124 case OPT_msse2:
2125 if (value)
2126 {
2127 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2128 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2129 }
2130 else
2131 {
2132 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2133 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2134 }
2135 return true;
2136
2137 case OPT_msse3:
2138 if (value)
2139 {
2140 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2141 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2142 }
2143 else
2144 {
2145 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2146 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2147 }
2148 return true;
2149
2150 case OPT_mssse3:
2151 if (value)
2152 {
2153 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2154 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2155 }
2156 else
2157 {
2158 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2159 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2160 }
2161 return true;
2162
2163 case OPT_msse4_1:
2164 if (value)
2165 {
2166 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2167 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2168 }
2169 else
2170 {
2171 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2172 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2173 }
2174 return true;
2175
2176 case OPT_msse4_2:
2177 if (value)
2178 {
2179 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2180 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2181 }
2182 else
2183 {
2184 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2185 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2186 }
2187 return true;
2188
2189 case OPT_mavx:
2190 if (value)
2191 {
2192 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2193 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2194 }
2195 else
2196 {
2197 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2198 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2199 }
2200 return true;
2201
2202 case OPT_mfma:
2203 if (value)
2204 {
2205 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2206 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2207 }
2208 else
2209 {
2210 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2211 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2212 }
2213 return true;
2214
2215 case OPT_msse4:
2216 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2217 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2218 return true;
2219
2220 case OPT_mno_sse4:
2221 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2222 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2223 return true;
2224
2225 case OPT_msse4a:
2226 if (value)
2227 {
2228 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2229 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2230 }
2231 else
2232 {
2233 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2234 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2235 }
2236 return true;
2237
2238 case OPT_msse5:
2239 if (value)
2240 {
2241 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
2242 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
2243 }
2244 else
2245 {
2246 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
2247 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
2248 }
2249 return true;
2250
2251 case OPT_mabm:
2252 if (value)
2253 {
2254 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2255 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2256 }
2257 else
2258 {
2259 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2260 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2261 }
2262 return true;
2263
2264 case OPT_mpopcnt:
2265 if (value)
2266 {
2267 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2268 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2269 }
2270 else
2271 {
2272 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2273 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2274 }
2275 return true;
2276
2277 case OPT_msahf:
2278 if (value)
2279 {
2280 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2281 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2282 }
2283 else
2284 {
2285 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2286 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2287 }
2288 return true;
2289
2290 case OPT_mcx16:
2291 if (value)
2292 {
2293 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2294 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2295 }
2296 else
2297 {
2298 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2299 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2300 }
2301 return true;
2302
2303 case OPT_maes:
2304 if (value)
2305 {
2306 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2307 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2308 }
2309 else
2310 {
2311 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2312 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2313 }
2314 return true;
2315
2316 case OPT_mpclmul:
2317 if (value)
2318 {
2319 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2320 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2321 }
2322 else
2323 {
2324 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2325 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2326 }
2327 return true;
2328
2329 default:
2330 return true;
2331 }
2332 }
2333 \f
2334 /* Return a string the documents the current -m options. The caller is
2335 responsible for freeing the string. */
2336
2337 static char *
2338 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2339 const char *fpmath, bool add_nl_p)
2340 {
2341 struct ix86_target_opts
2342 {
2343 const char *option; /* option string */
2344 int mask; /* isa mask options */
2345 };
2346
2347 /* This table is ordered so that options like -msse5 or -msse4.2 that imply
2348 preceding options while match those first. */
2349 static struct ix86_target_opts isa_opts[] =
2350 {
2351 { "-m64", OPTION_MASK_ISA_64BIT },
2352 { "-msse5", OPTION_MASK_ISA_SSE5 },
2353 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2354 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2355 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2356 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2357 { "-msse3", OPTION_MASK_ISA_SSE3 },
2358 { "-msse2", OPTION_MASK_ISA_SSE2 },
2359 { "-msse", OPTION_MASK_ISA_SSE },
2360 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2361 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2362 { "-mmmx", OPTION_MASK_ISA_MMX },
2363 { "-mabm", OPTION_MASK_ISA_ABM },
2364 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2365 { "-maes", OPTION_MASK_ISA_AES },
2366 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2367 };
2368
2369 /* Flag options. */
2370 static struct ix86_target_opts flag_opts[] =
2371 {
2372 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2373 { "-m80387", MASK_80387 },
2374 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2375 { "-malign-double", MASK_ALIGN_DOUBLE },
2376 { "-mcld", MASK_CLD },
2377 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2378 { "-mieee-fp", MASK_IEEE_FP },
2379 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2380 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2381 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2382 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2383 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2384 { "-mno-fused-madd", MASK_NO_FUSED_MADD },
2385 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2386 { "-mno-red-zone", MASK_NO_RED_ZONE },
2387 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2388 { "-mrecip", MASK_RECIP },
2389 { "-mrtd", MASK_RTD },
2390 { "-msseregparm", MASK_SSEREGPARM },
2391 { "-mstack-arg-probe", MASK_STACK_PROBE },
2392 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2393 };
2394
2395 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2396
2397 char isa_other[40];
2398 char target_other[40];
2399 unsigned num = 0;
2400 unsigned i, j;
2401 char *ret;
2402 char *ptr;
2403 size_t len;
2404 size_t line_len;
2405 size_t sep_len;
2406
2407 memset (opts, '\0', sizeof (opts));
2408
2409 /* Add -march= option. */
2410 if (arch)
2411 {
2412 opts[num][0] = "-march=";
2413 opts[num++][1] = arch;
2414 }
2415
2416 /* Add -mtune= option. */
2417 if (tune)
2418 {
2419 opts[num][0] = "-mtune=";
2420 opts[num++][1] = tune;
2421 }
2422
2423 /* Pick out the options in isa options. */
2424 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2425 {
2426 if ((isa & isa_opts[i].mask) != 0)
2427 {
2428 opts[num++][0] = isa_opts[i].option;
2429 isa &= ~ isa_opts[i].mask;
2430 }
2431 }
2432
2433 if (isa && add_nl_p)
2434 {
2435 opts[num++][0] = isa_other;
2436 sprintf (isa_other, "(other isa: 0x%x)", isa);
2437 }
2438
2439 /* Add flag options. */
2440 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2441 {
2442 if ((flags & flag_opts[i].mask) != 0)
2443 {
2444 opts[num++][0] = flag_opts[i].option;
2445 flags &= ~ flag_opts[i].mask;
2446 }
2447 }
2448
2449 if (flags && add_nl_p)
2450 {
2451 opts[num++][0] = target_other;
2452 sprintf (target_other, "(other flags: 0x%x)", isa);
2453 }
2454
2455 /* Add -fpmath= option. */
2456 if (fpmath)
2457 {
2458 opts[num][0] = "-mfpmath=";
2459 opts[num++][1] = fpmath;
2460 }
2461
2462 /* Any options? */
2463 if (num == 0)
2464 return NULL;
2465
2466 gcc_assert (num < ARRAY_SIZE (opts));
2467
2468 /* Size the string. */
2469 len = 0;
2470 sep_len = (add_nl_p) ? 3 : 1;
2471 for (i = 0; i < num; i++)
2472 {
2473 len += sep_len;
2474 for (j = 0; j < 2; j++)
2475 if (opts[i][j])
2476 len += strlen (opts[i][j]);
2477 }
2478
2479 /* Build the string. */
2480 ret = ptr = (char *) xmalloc (len);
2481 line_len = 0;
2482
2483 for (i = 0; i < num; i++)
2484 {
2485 size_t len2[2];
2486
2487 for (j = 0; j < 2; j++)
2488 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2489
2490 if (i != 0)
2491 {
2492 *ptr++ = ' ';
2493 line_len++;
2494
2495 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2496 {
2497 *ptr++ = '\\';
2498 *ptr++ = '\n';
2499 line_len = 0;
2500 }
2501 }
2502
2503 for (j = 0; j < 2; j++)
2504 if (opts[i][j])
2505 {
2506 memcpy (ptr, opts[i][j], len2[j]);
2507 ptr += len2[j];
2508 line_len += len2[j];
2509 }
2510 }
2511
2512 *ptr = '\0';
2513 gcc_assert (ret + len >= ptr);
2514
2515 return ret;
2516 }
2517
2518 /* Function that is callable from the debugger to print the current
2519 options. */
2520 void
2521 ix86_debug_options (void)
2522 {
2523 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2524 ix86_arch_string, ix86_tune_string,
2525 ix86_fpmath_string, true);
2526
2527 if (opts)
2528 {
2529 fprintf (stderr, "%s\n\n", opts);
2530 free (opts);
2531 }
2532 else
2533 fprintf (stderr, "<no options>\n\n");
2534
2535 return;
2536 }
2537 \f
2538 /* Sometimes certain combinations of command options do not make
2539 sense on a particular target machine. You can define a macro
2540 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2541 defined, is executed once just after all the command options have
2542 been parsed.
2543
2544 Don't use this macro to turn on various extra optimizations for
2545 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2546
2547 void
2548 override_options (bool main_args_p)
2549 {
2550 int i;
2551 unsigned int ix86_arch_mask, ix86_tune_mask;
2552 const char *prefix;
2553 const char *suffix;
2554 const char *sw;
2555
2556 /* Comes from final.c -- no real reason to change it. */
2557 #define MAX_CODE_ALIGN 16
2558
2559 enum pta_flags
2560 {
2561 PTA_SSE = 1 << 0,
2562 PTA_SSE2 = 1 << 1,
2563 PTA_SSE3 = 1 << 2,
2564 PTA_MMX = 1 << 3,
2565 PTA_PREFETCH_SSE = 1 << 4,
2566 PTA_3DNOW = 1 << 5,
2567 PTA_3DNOW_A = 1 << 6,
2568 PTA_64BIT = 1 << 7,
2569 PTA_SSSE3 = 1 << 8,
2570 PTA_CX16 = 1 << 9,
2571 PTA_POPCNT = 1 << 10,
2572 PTA_ABM = 1 << 11,
2573 PTA_SSE4A = 1 << 12,
2574 PTA_NO_SAHF = 1 << 13,
2575 PTA_SSE4_1 = 1 << 14,
2576 PTA_SSE4_2 = 1 << 15,
2577 PTA_SSE5 = 1 << 16,
2578 PTA_AES = 1 << 17,
2579 PTA_PCLMUL = 1 << 18,
2580 PTA_AVX = 1 << 19,
2581 PTA_FMA = 1 << 20
2582 };
2583
2584 static struct pta
2585 {
2586 const char *const name; /* processor name or nickname. */
2587 const enum processor_type processor;
2588 const enum attr_cpu schedule;
2589 const unsigned /*enum pta_flags*/ flags;
2590 }
2591 const processor_alias_table[] =
2592 {
2593 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2594 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2595 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2596 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2597 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2598 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2599 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2600 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2601 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2602 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2603 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2604 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2605 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2606 PTA_MMX | PTA_SSE},
2607 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2608 PTA_MMX | PTA_SSE},
2609 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2610 PTA_MMX | PTA_SSE | PTA_SSE2},
2611 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2612 PTA_MMX |PTA_SSE | PTA_SSE2},
2613 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2614 PTA_MMX | PTA_SSE | PTA_SSE2},
2615 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2616 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2617 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2618 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2619 | PTA_CX16 | PTA_NO_SAHF},
2620 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2621 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2622 | PTA_SSSE3 | PTA_CX16},
2623 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2624 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2625 | PTA_SSSE3 | PTA_CX16},
2626 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2627 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2628 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2629 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2630 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2631 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2632 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2633 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2634 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2635 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2636 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2637 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2638 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2639 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2640 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2641 {"x86-64", PROCESSOR_K8, CPU_K8,
2642 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2643 {"k8", PROCESSOR_K8, CPU_K8,
2644 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2645 | PTA_SSE2 | PTA_NO_SAHF},
2646 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2647 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2648 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2649 {"opteron", PROCESSOR_K8, CPU_K8,
2650 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2651 | PTA_SSE2 | PTA_NO_SAHF},
2652 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2653 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2654 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2655 {"athlon64", PROCESSOR_K8, CPU_K8,
2656 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2657 | PTA_SSE2 | PTA_NO_SAHF},
2658 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2659 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2660 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2661 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2662 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2663 | PTA_SSE2 | PTA_NO_SAHF},
2664 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2665 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2666 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2667 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2668 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2669 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2670 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2671 0 /* flags are only used for -march switch. */ },
2672 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2673 PTA_64BIT /* flags are only used for -march switch. */ },
2674 };
2675
2676 int const pta_size = ARRAY_SIZE (processor_alias_table);
2677
2678 /* Set up prefix/suffix so the error messages refer to either the command
2679 line argument, or the attribute(target). */
2680 if (main_args_p)
2681 {
2682 prefix = "-m";
2683 suffix = "";
2684 sw = "switch";
2685 }
2686 else
2687 {
2688 prefix = "option(\"";
2689 suffix = "\")";
2690 sw = "attribute";
2691 }
2692
2693 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2694 SUBTARGET_OVERRIDE_OPTIONS;
2695 #endif
2696
2697 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2698 SUBSUBTARGET_OVERRIDE_OPTIONS;
2699 #endif
2700
2701 /* -fPIC is the default for x86_64. */
2702 if (TARGET_MACHO && TARGET_64BIT)
2703 flag_pic = 2;
2704
2705 /* Set the default values for switches whose default depends on TARGET_64BIT
2706 in case they weren't overwritten by command line options. */
2707 if (TARGET_64BIT)
2708 {
2709 /* Mach-O doesn't support omitting the frame pointer for now. */
2710 if (flag_omit_frame_pointer == 2)
2711 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2712 if (flag_asynchronous_unwind_tables == 2)
2713 flag_asynchronous_unwind_tables = 1;
2714 if (flag_pcc_struct_return == 2)
2715 flag_pcc_struct_return = 0;
2716 }
2717 else
2718 {
2719 if (flag_omit_frame_pointer == 2)
2720 flag_omit_frame_pointer = 0;
2721 if (flag_asynchronous_unwind_tables == 2)
2722 flag_asynchronous_unwind_tables = 0;
2723 if (flag_pcc_struct_return == 2)
2724 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2725 }
2726
2727 /* Need to check -mtune=generic first. */
2728 if (ix86_tune_string)
2729 {
2730 if (!strcmp (ix86_tune_string, "generic")
2731 || !strcmp (ix86_tune_string, "i686")
2732 /* As special support for cross compilers we read -mtune=native
2733 as -mtune=generic. With native compilers we won't see the
2734 -mtune=native, as it was changed by the driver. */
2735 || !strcmp (ix86_tune_string, "native"))
2736 {
2737 if (TARGET_64BIT)
2738 ix86_tune_string = "generic64";
2739 else
2740 ix86_tune_string = "generic32";
2741 }
2742 /* If this call is for setting the option attribute, allow the
2743 generic32/generic64 that was previously set. */
2744 else if (!main_args_p
2745 && (!strcmp (ix86_tune_string, "generic32")
2746 || !strcmp (ix86_tune_string, "generic64")))
2747 ;
2748 else if (!strncmp (ix86_tune_string, "generic", 7))
2749 error ("bad value (%s) for %stune=%s %s",
2750 ix86_tune_string, prefix, suffix, sw);
2751 }
2752 else
2753 {
2754 if (ix86_arch_string)
2755 ix86_tune_string = ix86_arch_string;
2756 if (!ix86_tune_string)
2757 {
2758 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2759 ix86_tune_defaulted = 1;
2760 }
2761
2762 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2763 need to use a sensible tune option. */
2764 if (!strcmp (ix86_tune_string, "generic")
2765 || !strcmp (ix86_tune_string, "x86-64")
2766 || !strcmp (ix86_tune_string, "i686"))
2767 {
2768 if (TARGET_64BIT)
2769 ix86_tune_string = "generic64";
2770 else
2771 ix86_tune_string = "generic32";
2772 }
2773 }
2774 if (ix86_stringop_string)
2775 {
2776 if (!strcmp (ix86_stringop_string, "rep_byte"))
2777 stringop_alg = rep_prefix_1_byte;
2778 else if (!strcmp (ix86_stringop_string, "libcall"))
2779 stringop_alg = libcall;
2780 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2781 stringop_alg = rep_prefix_4_byte;
2782 else if (!strcmp (ix86_stringop_string, "rep_8byte")
2783 && TARGET_64BIT)
2784 /* rep; movq isn't available in 32-bit code. */
2785 stringop_alg = rep_prefix_8_byte;
2786 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2787 stringop_alg = loop_1_byte;
2788 else if (!strcmp (ix86_stringop_string, "loop"))
2789 stringop_alg = loop;
2790 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2791 stringop_alg = unrolled_loop;
2792 else
2793 error ("bad value (%s) for %sstringop-strategy=%s %s",
2794 ix86_stringop_string, prefix, suffix, sw);
2795 }
2796 if (!strcmp (ix86_tune_string, "x86-64"))
2797 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
2798 "%stune=k8%s or %stune=generic%s instead as appropriate.",
2799 prefix, suffix, prefix, suffix, prefix, suffix);
2800
2801 if (!ix86_arch_string)
2802 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2803 else
2804 ix86_arch_specified = 1;
2805
2806 if (!strcmp (ix86_arch_string, "generic"))
2807 error ("generic CPU can be used only for %stune=%s %s",
2808 prefix, suffix, sw);
2809 if (!strncmp (ix86_arch_string, "generic", 7))
2810 error ("bad value (%s) for %sarch=%s %s",
2811 ix86_arch_string, prefix, suffix, sw);
2812
2813 /* Validate -mabi= value. */
2814 if (ix86_abi_string)
2815 {
2816 if (strcmp (ix86_abi_string, "sysv") == 0)
2817 ix86_abi = SYSV_ABI;
2818 else if (strcmp (ix86_abi_string, "ms") == 0)
2819 ix86_abi = MS_ABI;
2820 else
2821 error ("unknown ABI (%s) for %sabi=%s %s",
2822 ix86_abi_string, prefix, suffix, sw);
2823 }
2824 else
2825 ix86_abi = DEFAULT_ABI;
2826
2827 if (ix86_cmodel_string != 0)
2828 {
2829 if (!strcmp (ix86_cmodel_string, "small"))
2830 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2831 else if (!strcmp (ix86_cmodel_string, "medium"))
2832 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2833 else if (!strcmp (ix86_cmodel_string, "large"))
2834 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2835 else if (flag_pic)
2836 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2837 else if (!strcmp (ix86_cmodel_string, "32"))
2838 ix86_cmodel = CM_32;
2839 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2840 ix86_cmodel = CM_KERNEL;
2841 else
2842 error ("bad value (%s) for %scmodel=%s %s",
2843 ix86_cmodel_string, prefix, suffix, sw);
2844 }
2845 else
2846 {
2847 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2848 use of rip-relative addressing. This eliminates fixups that
2849 would otherwise be needed if this object is to be placed in a
2850 DLL, and is essentially just as efficient as direct addressing. */
2851 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
2852 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2853 else if (TARGET_64BIT)
2854 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2855 else
2856 ix86_cmodel = CM_32;
2857 }
2858 if (ix86_asm_string != 0)
2859 {
2860 if (! TARGET_MACHO
2861 && !strcmp (ix86_asm_string, "intel"))
2862 ix86_asm_dialect = ASM_INTEL;
2863 else if (!strcmp (ix86_asm_string, "att"))
2864 ix86_asm_dialect = ASM_ATT;
2865 else
2866 error ("bad value (%s) for %sasm=%s %s",
2867 ix86_asm_string, prefix, suffix, sw);
2868 }
2869 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2870 error ("code model %qs not supported in the %s bit mode",
2871 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2872 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2873 sorry ("%i-bit mode not compiled in",
2874 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2875
2876 for (i = 0; i < pta_size; i++)
2877 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2878 {
2879 ix86_schedule = processor_alias_table[i].schedule;
2880 ix86_arch = processor_alias_table[i].processor;
2881 /* Default cpu tuning to the architecture. */
2882 ix86_tune = ix86_arch;
2883
2884 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2885 error ("CPU you selected does not support x86-64 "
2886 "instruction set");
2887
2888 if (processor_alias_table[i].flags & PTA_MMX
2889 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2890 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2891 if (processor_alias_table[i].flags & PTA_3DNOW
2892 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2893 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2894 if (processor_alias_table[i].flags & PTA_3DNOW_A
2895 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2896 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2897 if (processor_alias_table[i].flags & PTA_SSE
2898 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2899 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2900 if (processor_alias_table[i].flags & PTA_SSE2
2901 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2902 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2903 if (processor_alias_table[i].flags & PTA_SSE3
2904 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2905 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2906 if (processor_alias_table[i].flags & PTA_SSSE3
2907 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2908 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2909 if (processor_alias_table[i].flags & PTA_SSE4_1
2910 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2911 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2912 if (processor_alias_table[i].flags & PTA_SSE4_2
2913 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2914 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2915 if (processor_alias_table[i].flags & PTA_AVX
2916 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
2917 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
2918 if (processor_alias_table[i].flags & PTA_FMA
2919 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
2920 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
2921 if (processor_alias_table[i].flags & PTA_SSE4A
2922 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2923 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2924 if (processor_alias_table[i].flags & PTA_SSE5
2925 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2926 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2927 if (processor_alias_table[i].flags & PTA_ABM
2928 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
2929 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
2930 if (processor_alias_table[i].flags & PTA_CX16
2931 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
2932 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
2933 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
2934 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
2935 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
2936 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
2937 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
2938 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
2939 if (processor_alias_table[i].flags & PTA_AES
2940 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
2941 ix86_isa_flags |= OPTION_MASK_ISA_AES;
2942 if (processor_alias_table[i].flags & PTA_PCLMUL
2943 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
2944 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
2945 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2946 x86_prefetch_sse = true;
2947
2948 break;
2949 }
2950
2951 if (i == pta_size)
2952 error ("bad value (%s) for %sarch=%s %s",
2953 ix86_arch_string, prefix, suffix, sw);
2954
2955 ix86_arch_mask = 1u << ix86_arch;
2956 for (i = 0; i < X86_ARCH_LAST; ++i)
2957 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
2958
2959 for (i = 0; i < pta_size; i++)
2960 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2961 {
2962 ix86_schedule = processor_alias_table[i].schedule;
2963 ix86_tune = processor_alias_table[i].processor;
2964 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2965 {
2966 if (ix86_tune_defaulted)
2967 {
2968 ix86_tune_string = "x86-64";
2969 for (i = 0; i < pta_size; i++)
2970 if (! strcmp (ix86_tune_string,
2971 processor_alias_table[i].name))
2972 break;
2973 ix86_schedule = processor_alias_table[i].schedule;
2974 ix86_tune = processor_alias_table[i].processor;
2975 }
2976 else
2977 error ("CPU you selected does not support x86-64 "
2978 "instruction set");
2979 }
2980 /* Intel CPUs have always interpreted SSE prefetch instructions as
2981 NOPs; so, we can enable SSE prefetch instructions even when
2982 -mtune (rather than -march) points us to a processor that has them.
2983 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2984 higher processors. */
2985 if (TARGET_CMOVE
2986 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2987 x86_prefetch_sse = true;
2988 break;
2989 }
2990 if (i == pta_size)
2991 error ("bad value (%s) for %stune=%s %s",
2992 ix86_tune_string, prefix, suffix, sw);
2993
2994 ix86_tune_mask = 1u << ix86_tune;
2995 for (i = 0; i < X86_TUNE_LAST; ++i)
2996 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
2997
2998 if (optimize_size)
2999 ix86_cost = &ix86_size_cost;
3000 else
3001 ix86_cost = processor_target_table[ix86_tune].cost;
3002
3003 /* Arrange to set up i386_stack_locals for all functions. */
3004 init_machine_status = ix86_init_machine_status;
3005
3006 /* Validate -mregparm= value. */
3007 if (ix86_regparm_string)
3008 {
3009 if (TARGET_64BIT)
3010 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3011 i = atoi (ix86_regparm_string);
3012 if (i < 0 || i > REGPARM_MAX)
3013 error ("%sregparm=%d%s is not between 0 and %d",
3014 prefix, i, suffix, REGPARM_MAX);
3015 else
3016 ix86_regparm = i;
3017 }
3018 if (TARGET_64BIT)
3019 ix86_regparm = REGPARM_MAX;
3020
3021 /* If the user has provided any of the -malign-* options,
3022 warn and use that value only if -falign-* is not set.
3023 Remove this code in GCC 3.2 or later. */
3024 if (ix86_align_loops_string)
3025 {
3026 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3027 prefix, suffix, suffix);
3028 if (align_loops == 0)
3029 {
3030 i = atoi (ix86_align_loops_string);
3031 if (i < 0 || i > MAX_CODE_ALIGN)
3032 error ("%salign-loops=%d%s is not between 0 and %d",
3033 prefix, i, suffix, MAX_CODE_ALIGN);
3034 else
3035 align_loops = 1 << i;
3036 }
3037 }
3038
3039 if (ix86_align_jumps_string)
3040 {
3041 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3042 prefix, suffix, suffix);
3043 if (align_jumps == 0)
3044 {
3045 i = atoi (ix86_align_jumps_string);
3046 if (i < 0 || i > MAX_CODE_ALIGN)
3047 error ("%salign-loops=%d%s is not between 0 and %d",
3048 prefix, i, suffix, MAX_CODE_ALIGN);
3049 else
3050 align_jumps = 1 << i;
3051 }
3052 }
3053
3054 if (ix86_align_funcs_string)
3055 {
3056 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3057 prefix, suffix, suffix);
3058 if (align_functions == 0)
3059 {
3060 i = atoi (ix86_align_funcs_string);
3061 if (i < 0 || i > MAX_CODE_ALIGN)
3062 error ("%salign-loops=%d%s is not between 0 and %d",
3063 prefix, i, suffix, MAX_CODE_ALIGN);
3064 else
3065 align_functions = 1 << i;
3066 }
3067 }
3068
3069 /* Default align_* from the processor table. */
3070 if (align_loops == 0)
3071 {
3072 align_loops = processor_target_table[ix86_tune].align_loop;
3073 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3074 }
3075 if (align_jumps == 0)
3076 {
3077 align_jumps = processor_target_table[ix86_tune].align_jump;
3078 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3079 }
3080 if (align_functions == 0)
3081 {
3082 align_functions = processor_target_table[ix86_tune].align_func;
3083 }
3084
3085 /* Validate -mbranch-cost= value, or provide default. */
3086 ix86_branch_cost = ix86_cost->branch_cost;
3087 if (ix86_branch_cost_string)
3088 {
3089 i = atoi (ix86_branch_cost_string);
3090 if (i < 0 || i > 5)
3091 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3092 else
3093 ix86_branch_cost = i;
3094 }
3095 if (ix86_section_threshold_string)
3096 {
3097 i = atoi (ix86_section_threshold_string);
3098 if (i < 0)
3099 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3100 else
3101 ix86_section_threshold = i;
3102 }
3103
3104 if (ix86_tls_dialect_string)
3105 {
3106 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3107 ix86_tls_dialect = TLS_DIALECT_GNU;
3108 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3109 ix86_tls_dialect = TLS_DIALECT_GNU2;
3110 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
3111 ix86_tls_dialect = TLS_DIALECT_SUN;
3112 else
3113 error ("bad value (%s) for %stls-dialect=%s %s",
3114 ix86_tls_dialect_string, prefix, suffix, sw);
3115 }
3116
3117 if (ix87_precision_string)
3118 {
3119 i = atoi (ix87_precision_string);
3120 if (i != 32 && i != 64 && i != 80)
3121 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3122 }
3123
3124 if (TARGET_64BIT)
3125 {
3126 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3127
3128 /* Enable by default the SSE and MMX builtins. Do allow the user to
3129 explicitly disable any of these. In particular, disabling SSE and
3130 MMX for kernel code is extremely useful. */
3131 if (!ix86_arch_specified)
3132 ix86_isa_flags
3133 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3134 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3135
3136 if (TARGET_RTD)
3137 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3138 }
3139 else
3140 {
3141 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3142
3143 if (!ix86_arch_specified)
3144 ix86_isa_flags
3145 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3146
3147 /* i386 ABI does not specify red zone. It still makes sense to use it
3148 when programmer takes care to stack from being destroyed. */
3149 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3150 target_flags |= MASK_NO_RED_ZONE;
3151 }
3152
3153 /* Keep nonleaf frame pointers. */
3154 if (flag_omit_frame_pointer)
3155 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3156 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3157 flag_omit_frame_pointer = 1;
3158
3159 /* If we're doing fast math, we don't care about comparison order
3160 wrt NaNs. This lets us use a shorter comparison sequence. */
3161 if (flag_finite_math_only)
3162 target_flags &= ~MASK_IEEE_FP;
3163
3164 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3165 since the insns won't need emulation. */
3166 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3167 target_flags &= ~MASK_NO_FANCY_MATH_387;
3168
3169 /* Likewise, if the target doesn't have a 387, or we've specified
3170 software floating point, don't use 387 inline intrinsics. */
3171 if (!TARGET_80387)
3172 target_flags |= MASK_NO_FANCY_MATH_387;
3173
3174 /* Turn on MMX builtins for -msse. */
3175 if (TARGET_SSE)
3176 {
3177 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3178 x86_prefetch_sse = true;
3179 }
3180
3181 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3182 if (TARGET_SSE4_2 || TARGET_ABM)
3183 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3184
3185 /* Validate -mpreferred-stack-boundary= value or default it to
3186 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3187 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3188 if (ix86_preferred_stack_boundary_string)
3189 {
3190 i = atoi (ix86_preferred_stack_boundary_string);
3191 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3192 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3193 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3194 else
3195 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3196 }
3197
3198 /* Set the default value for -mstackrealign. */
3199 if (ix86_force_align_arg_pointer == -1)
3200 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3201
3202 /* Validate -mincoming-stack-boundary= value or default it to
3203 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3204 if (ix86_force_align_arg_pointer)
3205 ix86_default_incoming_stack_boundary = MIN_STACK_BOUNDARY;
3206 else
3207 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3208 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3209 if (ix86_incoming_stack_boundary_string)
3210 {
3211 i = atoi (ix86_incoming_stack_boundary_string);
3212 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3213 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3214 i, TARGET_64BIT ? 4 : 2);
3215 else
3216 {
3217 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3218 ix86_incoming_stack_boundary
3219 = ix86_user_incoming_stack_boundary;
3220 }
3221 }
3222
3223 /* Accept -msseregparm only if at least SSE support is enabled. */
3224 if (TARGET_SSEREGPARM
3225 && ! TARGET_SSE)
3226 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3227
3228 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3229 if (ix86_fpmath_string != 0)
3230 {
3231 if (! strcmp (ix86_fpmath_string, "387"))
3232 ix86_fpmath = FPMATH_387;
3233 else if (! strcmp (ix86_fpmath_string, "sse"))
3234 {
3235 if (!TARGET_SSE)
3236 {
3237 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3238 ix86_fpmath = FPMATH_387;
3239 }
3240 else
3241 ix86_fpmath = FPMATH_SSE;
3242 }
3243 else if (! strcmp (ix86_fpmath_string, "387,sse")
3244 || ! strcmp (ix86_fpmath_string, "387+sse")
3245 || ! strcmp (ix86_fpmath_string, "sse,387")
3246 || ! strcmp (ix86_fpmath_string, "sse+387")
3247 || ! strcmp (ix86_fpmath_string, "both"))
3248 {
3249 if (!TARGET_SSE)
3250 {
3251 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3252 ix86_fpmath = FPMATH_387;
3253 }
3254 else if (!TARGET_80387)
3255 {
3256 warning (0, "387 instruction set disabled, using SSE arithmetics");
3257 ix86_fpmath = FPMATH_SSE;
3258 }
3259 else
3260 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3261 }
3262 else
3263 error ("bad value (%s) for %sfpmath=%s %s",
3264 ix86_fpmath_string, prefix, suffix, sw);
3265 }
3266
3267 /* If the i387 is disabled, then do not return values in it. */
3268 if (!TARGET_80387)
3269 target_flags &= ~MASK_FLOAT_RETURNS;
3270
3271 /* Use external vectorized library in vectorizing intrinsics. */
3272 if (ix86_veclibabi_string)
3273 {
3274 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3275 ix86_veclib_handler = ix86_veclibabi_svml;
3276 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3277 ix86_veclib_handler = ix86_veclibabi_acml;
3278 else
3279 error ("unknown vectorization library ABI type (%s) for "
3280 "%sveclibabi=%s %s", ix86_veclibabi_string,
3281 prefix, suffix, sw);
3282 }
3283
3284 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
3285 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3286 && !optimize_size)
3287 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3288
3289 /* ??? Unwind info is not correct around the CFG unless either a frame
3290 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3291 unwind info generation to be aware of the CFG and propagating states
3292 around edges. */
3293 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3294 || flag_exceptions || flag_non_call_exceptions)
3295 && flag_omit_frame_pointer
3296 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3297 {
3298 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3299 warning (0, "unwind tables currently require either a frame pointer "
3300 "or %saccumulate-outgoing-args%s for correctness",
3301 prefix, suffix);
3302 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3303 }
3304
3305 /* If stack probes are required, the space used for large function
3306 arguments on the stack must also be probed, so enable
3307 -maccumulate-outgoing-args so this happens in the prologue. */
3308 if (TARGET_STACK_PROBE
3309 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3310 {
3311 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3312 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3313 "for correctness", prefix, suffix);
3314 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3315 }
3316
3317 /* For sane SSE instruction set generation we need fcomi instruction.
3318 It is safe to enable all CMOVE instructions. */
3319 if (TARGET_SSE)
3320 TARGET_CMOVE = 1;
3321
3322 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3323 {
3324 char *p;
3325 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3326 p = strchr (internal_label_prefix, 'X');
3327 internal_label_prefix_len = p - internal_label_prefix;
3328 *p = '\0';
3329 }
3330
3331 /* When scheduling description is not available, disable scheduler pass
3332 so it won't slow down the compilation and make x87 code slower. */
3333 if (!TARGET_SCHEDULE)
3334 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3335
3336 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3337 set_param_value ("simultaneous-prefetches",
3338 ix86_cost->simultaneous_prefetches);
3339 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3340 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3341 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3342 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3343 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3344 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3345
3346 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3347 can be optimized to ap = __builtin_next_arg (0). */
3348 if (!TARGET_64BIT)
3349 targetm.expand_builtin_va_start = NULL;
3350
3351 if (TARGET_64BIT)
3352 {
3353 ix86_gen_leave = gen_leave_rex64;
3354 ix86_gen_pop1 = gen_popdi1;
3355 ix86_gen_add3 = gen_adddi3;
3356 ix86_gen_sub3 = gen_subdi3;
3357 ix86_gen_sub3_carry = gen_subdi3_carry_rex64;
3358 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3359 ix86_gen_monitor = gen_sse3_monitor64;
3360 ix86_gen_andsp = gen_anddi3;
3361 }
3362 else
3363 {
3364 ix86_gen_leave = gen_leave;
3365 ix86_gen_pop1 = gen_popsi1;
3366 ix86_gen_add3 = gen_addsi3;
3367 ix86_gen_sub3 = gen_subsi3;
3368 ix86_gen_sub3_carry = gen_subsi3_carry;
3369 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3370 ix86_gen_monitor = gen_sse3_monitor;
3371 ix86_gen_andsp = gen_andsi3;
3372 }
3373
3374 #ifdef USE_IX86_CLD
3375 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3376 if (!TARGET_64BIT)
3377 target_flags |= MASK_CLD & ~target_flags_explicit;
3378 #endif
3379
3380 /* Save the initial options in case the user does function specific options */
3381 if (main_args_p)
3382 target_option_default_node = target_option_current_node
3383 = build_target_option_node ();
3384 }
3385 \f
3386 /* Save the current options */
3387
3388 static void
3389 ix86_function_specific_save (struct cl_target_option *ptr)
3390 {
3391 gcc_assert (IN_RANGE (ix86_arch, 0, 255));
3392 gcc_assert (IN_RANGE (ix86_schedule, 0, 255));
3393 gcc_assert (IN_RANGE (ix86_tune, 0, 255));
3394 gcc_assert (IN_RANGE (ix86_fpmath, 0, 255));
3395 gcc_assert (IN_RANGE (ix86_branch_cost, 0, 255));
3396
3397 ptr->arch = ix86_arch;
3398 ptr->schedule = ix86_schedule;
3399 ptr->tune = ix86_tune;
3400 ptr->fpmath = ix86_fpmath;
3401 ptr->branch_cost = ix86_branch_cost;
3402 ptr->tune_defaulted = ix86_tune_defaulted;
3403 ptr->arch_specified = ix86_arch_specified;
3404 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3405 ptr->target_flags_explicit = target_flags_explicit;
3406 }
3407
3408 /* Restore the current options */
3409
3410 static void
3411 ix86_function_specific_restore (struct cl_target_option *ptr)
3412 {
3413 enum processor_type old_tune = ix86_tune;
3414 enum processor_type old_arch = ix86_arch;
3415 unsigned int ix86_arch_mask, ix86_tune_mask;
3416 int i;
3417
3418 ix86_arch = (enum processor_type) ptr->arch;
3419 ix86_schedule = (enum attr_cpu) ptr->schedule;
3420 ix86_tune = (enum processor_type) ptr->tune;
3421 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
3422 ix86_branch_cost = ptr->branch_cost;
3423 ix86_tune_defaulted = ptr->tune_defaulted;
3424 ix86_arch_specified = ptr->arch_specified;
3425 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3426 target_flags_explicit = ptr->target_flags_explicit;
3427
3428 /* Recreate the arch feature tests if the arch changed */
3429 if (old_arch != ix86_arch)
3430 {
3431 ix86_arch_mask = 1u << ix86_arch;
3432 for (i = 0; i < X86_ARCH_LAST; ++i)
3433 ix86_arch_features[i]
3434 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3435 }
3436
3437 /* Recreate the tune optimization tests */
3438 if (old_tune != ix86_tune)
3439 {
3440 ix86_tune_mask = 1u << ix86_tune;
3441 for (i = 0; i < X86_TUNE_LAST; ++i)
3442 ix86_tune_features[i]
3443 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3444 }
3445 }
3446
3447 /* Print the current options */
3448
3449 static void
3450 ix86_function_specific_print (FILE *file, int indent,
3451 struct cl_target_option *ptr)
3452 {
3453 char *target_string
3454 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3455 NULL, NULL, NULL, false);
3456
3457 fprintf (file, "%*sarch = %d (%s)\n",
3458 indent, "",
3459 ptr->arch,
3460 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3461 ? cpu_names[ptr->arch]
3462 : "<unknown>"));
3463
3464 fprintf (file, "%*stune = %d (%s)\n",
3465 indent, "",
3466 ptr->tune,
3467 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3468 ? cpu_names[ptr->tune]
3469 : "<unknown>"));
3470
3471 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3472 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3473 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3474 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3475
3476 if (target_string)
3477 {
3478 fprintf (file, "%*s%s\n", indent, "", target_string);
3479 free (target_string);
3480 }
3481 }
3482
3483 \f
3484 /* Inner function to process the attribute((target(...))), take an argument and
3485 set the current options from the argument. If we have a list, recursively go
3486 over the list. */
3487
3488 static bool
3489 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3490 {
3491 char *next_optstr;
3492 bool ret = true;
3493
3494 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3495 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3496 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3497 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3498
3499 enum ix86_opt_type
3500 {
3501 ix86_opt_unknown,
3502 ix86_opt_yes,
3503 ix86_opt_no,
3504 ix86_opt_str,
3505 ix86_opt_isa
3506 };
3507
3508 static const struct
3509 {
3510 const char *string;
3511 size_t len;
3512 enum ix86_opt_type type;
3513 int opt;
3514 int mask;
3515 } attrs[] = {
3516 /* isa options */
3517 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3518 IX86_ATTR_ISA ("abm", OPT_mabm),
3519 IX86_ATTR_ISA ("aes", OPT_maes),
3520 IX86_ATTR_ISA ("avx", OPT_mavx),
3521 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3522 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3523 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3524 IX86_ATTR_ISA ("sse", OPT_msse),
3525 IX86_ATTR_ISA ("sse2", OPT_msse2),
3526 IX86_ATTR_ISA ("sse3", OPT_msse3),
3527 IX86_ATTR_ISA ("sse4", OPT_msse4),
3528 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3529 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3530 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3531 IX86_ATTR_ISA ("sse5", OPT_msse5),
3532 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3533
3534 /* string options */
3535 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3536 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3537 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3538
3539 /* flag options */
3540 IX86_ATTR_YES ("cld",
3541 OPT_mcld,
3542 MASK_CLD),
3543
3544 IX86_ATTR_NO ("fancy-math-387",
3545 OPT_mfancy_math_387,
3546 MASK_NO_FANCY_MATH_387),
3547
3548 IX86_ATTR_NO ("fused-madd",
3549 OPT_mfused_madd,
3550 MASK_NO_FUSED_MADD),
3551
3552 IX86_ATTR_YES ("ieee-fp",
3553 OPT_mieee_fp,
3554 MASK_IEEE_FP),
3555
3556 IX86_ATTR_YES ("inline-all-stringops",
3557 OPT_minline_all_stringops,
3558 MASK_INLINE_ALL_STRINGOPS),
3559
3560 IX86_ATTR_YES ("inline-stringops-dynamically",
3561 OPT_minline_stringops_dynamically,
3562 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3563
3564 IX86_ATTR_NO ("align-stringops",
3565 OPT_mno_align_stringops,
3566 MASK_NO_ALIGN_STRINGOPS),
3567
3568 IX86_ATTR_YES ("recip",
3569 OPT_mrecip,
3570 MASK_RECIP),
3571
3572 };
3573
3574 /* If this is a list, recurse to get the options. */
3575 if (TREE_CODE (args) == TREE_LIST)
3576 {
3577 bool ret = true;
3578
3579 for (; args; args = TREE_CHAIN (args))
3580 if (TREE_VALUE (args)
3581 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3582 ret = false;
3583
3584 return ret;
3585 }
3586
3587 else if (TREE_CODE (args) != STRING_CST)
3588 gcc_unreachable ();
3589
3590 /* Handle multiple arguments separated by commas. */
3591 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3592
3593 while (next_optstr && *next_optstr != '\0')
3594 {
3595 char *p = next_optstr;
3596 char *orig_p = p;
3597 char *comma = strchr (next_optstr, ',');
3598 const char *opt_string;
3599 size_t len, opt_len;
3600 int opt;
3601 bool opt_set_p;
3602 char ch;
3603 unsigned i;
3604 enum ix86_opt_type type = ix86_opt_unknown;
3605 int mask = 0;
3606
3607 if (comma)
3608 {
3609 *comma = '\0';
3610 len = comma - next_optstr;
3611 next_optstr = comma + 1;
3612 }
3613 else
3614 {
3615 len = strlen (p);
3616 next_optstr = NULL;
3617 }
3618
3619 /* Recognize no-xxx. */
3620 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
3621 {
3622 opt_set_p = false;
3623 p += 3;
3624 len -= 3;
3625 }
3626 else
3627 opt_set_p = true;
3628
3629 /* Find the option. */
3630 ch = *p;
3631 opt = N_OPTS;
3632 for (i = 0; i < ARRAY_SIZE (attrs); i++)
3633 {
3634 type = attrs[i].type;
3635 opt_len = attrs[i].len;
3636 if (ch == attrs[i].string[0]
3637 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
3638 && memcmp (p, attrs[i].string, opt_len) == 0)
3639 {
3640 opt = attrs[i].opt;
3641 mask = attrs[i].mask;
3642 opt_string = attrs[i].string;
3643 break;
3644 }
3645 }
3646
3647 /* Process the option. */
3648 if (opt == N_OPTS)
3649 {
3650 error ("attribute(target(\"%s\")) is unknown", orig_p);
3651 ret = false;
3652 }
3653
3654 else if (type == ix86_opt_isa)
3655 ix86_handle_option (opt, p, opt_set_p);
3656
3657 else if (type == ix86_opt_yes || type == ix86_opt_no)
3658 {
3659 if (type == ix86_opt_no)
3660 opt_set_p = !opt_set_p;
3661
3662 if (opt_set_p)
3663 target_flags |= mask;
3664 else
3665 target_flags &= ~mask;
3666 }
3667
3668 else if (type == ix86_opt_str)
3669 {
3670 if (p_strings[opt])
3671 {
3672 error ("option(\"%s\") was already specified", opt_string);
3673 ret = false;
3674 }
3675 else
3676 p_strings[opt] = xstrdup (p + opt_len);
3677 }
3678
3679 else
3680 gcc_unreachable ();
3681 }
3682
3683 return ret;
3684 }
3685
3686 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
3687
3688 tree
3689 ix86_valid_target_attribute_tree (tree args)
3690 {
3691 const char *orig_arch_string = ix86_arch_string;
3692 const char *orig_tune_string = ix86_tune_string;
3693 const char *orig_fpmath_string = ix86_fpmath_string;
3694 int orig_tune_defaulted = ix86_tune_defaulted;
3695 int orig_arch_specified = ix86_arch_specified;
3696 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
3697 tree t = NULL_TREE;
3698 int i;
3699 struct cl_target_option *def
3700 = TREE_TARGET_OPTION (target_option_default_node);
3701
3702 /* Process each of the options on the chain. */
3703 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
3704 return NULL_TREE;
3705
3706 /* If the changed options are different from the default, rerun override_options,
3707 and then save the options away. The string options are are attribute options,
3708 and will be undone when we copy the save structure. */
3709 if (ix86_isa_flags != def->ix86_isa_flags
3710 || target_flags != def->target_flags
3711 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
3712 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
3713 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3714 {
3715 /* If we are using the default tune= or arch=, undo the string assigned,
3716 and use the default. */
3717 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
3718 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
3719 else if (!orig_arch_specified)
3720 ix86_arch_string = NULL;
3721
3722 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
3723 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
3724 else if (orig_tune_defaulted)
3725 ix86_tune_string = NULL;
3726
3727 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
3728 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3729 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
3730 else if (!TARGET_64BIT && TARGET_SSE)
3731 ix86_fpmath_string = "sse,387";
3732
3733 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
3734 override_options (false);
3735
3736 /* Add any builtin functions with the new isa if any. */
3737 ix86_add_new_builtins (ix86_isa_flags);
3738
3739 /* Save the current options unless we are validating options for
3740 #pragma. */
3741 t = build_target_option_node ();
3742
3743 ix86_arch_string = orig_arch_string;
3744 ix86_tune_string = orig_tune_string;
3745 ix86_fpmath_string = orig_fpmath_string;
3746
3747 /* Free up memory allocated to hold the strings */
3748 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
3749 if (option_strings[i])
3750 free (option_strings[i]);
3751 }
3752
3753 return t;
3754 }
3755
3756 /* Hook to validate attribute((target("string"))). */
3757
3758 static bool
3759 ix86_valid_target_attribute_p (tree fndecl,
3760 tree ARG_UNUSED (name),
3761 tree args,
3762 int ARG_UNUSED (flags))
3763 {
3764 struct cl_target_option cur_target;
3765 bool ret = true;
3766 tree old_optimize = build_optimization_node ();
3767 tree new_target, new_optimize;
3768 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
3769
3770 /* If the function changed the optimization levels as well as setting target
3771 options, start with the optimizations specified. */
3772 if (func_optimize && func_optimize != old_optimize)
3773 cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
3774
3775 /* The target attributes may also change some optimization flags, so update
3776 the optimization options if necessary. */
3777 cl_target_option_save (&cur_target);
3778 new_target = ix86_valid_target_attribute_tree (args);
3779 new_optimize = build_optimization_node ();
3780
3781 if (!new_target)
3782 ret = false;
3783
3784 else if (fndecl)
3785 {
3786 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
3787
3788 if (old_optimize != new_optimize)
3789 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
3790 }
3791
3792 cl_target_option_restore (&cur_target);
3793
3794 if (old_optimize != new_optimize)
3795 cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
3796
3797 return ret;
3798 }
3799
3800 \f
3801 /* Hook to determine if one function can safely inline another. */
3802
3803 static bool
3804 ix86_can_inline_p (tree caller, tree callee)
3805 {
3806 bool ret = false;
3807 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
3808 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
3809
3810 /* If callee has no option attributes, then it is ok to inline. */
3811 if (!callee_tree)
3812 ret = true;
3813
3814 /* If caller has no option attributes, but callee does then it is not ok to
3815 inline. */
3816 else if (!caller_tree)
3817 ret = false;
3818
3819 else
3820 {
3821 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
3822 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
3823
3824 /* Callee's isa options should a subset of the caller's, i.e. a SSE5 function
3825 can inline a SSE2 function but a SSE2 function can't inline a SSE5
3826 function. */
3827 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
3828 != callee_opts->ix86_isa_flags)
3829 ret = false;
3830
3831 /* See if we have the same non-isa options. */
3832 else if (caller_opts->target_flags != callee_opts->target_flags)
3833 ret = false;
3834
3835 /* See if arch, tune, etc. are the same. */
3836 else if (caller_opts->arch != callee_opts->arch)
3837 ret = false;
3838
3839 else if (caller_opts->tune != callee_opts->tune)
3840 ret = false;
3841
3842 else if (caller_opts->fpmath != callee_opts->fpmath)
3843 ret = false;
3844
3845 else if (caller_opts->branch_cost != callee_opts->branch_cost)
3846 ret = false;
3847
3848 else
3849 ret = true;
3850 }
3851
3852 return ret;
3853 }
3854
3855 \f
3856 /* Remember the last target of ix86_set_current_function. */
3857 static GTY(()) tree ix86_previous_fndecl;
3858
3859 /* Establish appropriate back-end context for processing the function
3860 FNDECL. The argument might be NULL to indicate processing at top
3861 level, outside of any function scope. */
3862 static void
3863 ix86_set_current_function (tree fndecl)
3864 {
3865 /* Only change the context if the function changes. This hook is called
3866 several times in the course of compiling a function, and we don't want to
3867 slow things down too much or call target_reinit when it isn't safe. */
3868 if (fndecl && fndecl != ix86_previous_fndecl)
3869 {
3870 tree old_tree = (ix86_previous_fndecl
3871 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
3872 : NULL_TREE);
3873
3874 tree new_tree = (fndecl
3875 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
3876 : NULL_TREE);
3877
3878 ix86_previous_fndecl = fndecl;
3879 if (old_tree == new_tree)
3880 ;
3881
3882 else if (new_tree)
3883 {
3884 cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
3885 target_reinit ();
3886 }
3887
3888 else if (old_tree)
3889 {
3890 struct cl_target_option *def
3891 = TREE_TARGET_OPTION (target_option_current_node);
3892
3893 cl_target_option_restore (def);
3894 target_reinit ();
3895 }
3896 }
3897 }
3898
3899 \f
3900 /* Return true if this goes in large data/bss. */
3901
3902 static bool
3903 ix86_in_large_data_p (tree exp)
3904 {
3905 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
3906 return false;
3907
3908 /* Functions are never large data. */
3909 if (TREE_CODE (exp) == FUNCTION_DECL)
3910 return false;
3911
3912 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
3913 {
3914 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
3915 if (strcmp (section, ".ldata") == 0
3916 || strcmp (section, ".lbss") == 0)
3917 return true;
3918 return false;
3919 }
3920 else
3921 {
3922 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
3923
3924 /* If this is an incomplete type with size 0, then we can't put it
3925 in data because it might be too big when completed. */
3926 if (!size || size > ix86_section_threshold)
3927 return true;
3928 }
3929
3930 return false;
3931 }
3932
3933 /* Switch to the appropriate section for output of DECL.
3934 DECL is either a `VAR_DECL' node or a constant of some sort.
3935 RELOC indicates whether forming the initial value of DECL requires
3936 link-time relocations. */
3937
3938 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
3939 ATTRIBUTE_UNUSED;
3940
3941 static section *
3942 x86_64_elf_select_section (tree decl, int reloc,
3943 unsigned HOST_WIDE_INT align)
3944 {
3945 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3946 && ix86_in_large_data_p (decl))
3947 {
3948 const char *sname = NULL;
3949 unsigned int flags = SECTION_WRITE;
3950 switch (categorize_decl_for_section (decl, reloc))
3951 {
3952 case SECCAT_DATA:
3953 sname = ".ldata";
3954 break;
3955 case SECCAT_DATA_REL:
3956 sname = ".ldata.rel";
3957 break;
3958 case SECCAT_DATA_REL_LOCAL:
3959 sname = ".ldata.rel.local";
3960 break;
3961 case SECCAT_DATA_REL_RO:
3962 sname = ".ldata.rel.ro";
3963 break;
3964 case SECCAT_DATA_REL_RO_LOCAL:
3965 sname = ".ldata.rel.ro.local";
3966 break;
3967 case SECCAT_BSS:
3968 sname = ".lbss";
3969 flags |= SECTION_BSS;
3970 break;
3971 case SECCAT_RODATA:
3972 case SECCAT_RODATA_MERGE_STR:
3973 case SECCAT_RODATA_MERGE_STR_INIT:
3974 case SECCAT_RODATA_MERGE_CONST:
3975 sname = ".lrodata";
3976 flags = 0;
3977 break;
3978 case SECCAT_SRODATA:
3979 case SECCAT_SDATA:
3980 case SECCAT_SBSS:
3981 gcc_unreachable ();
3982 case SECCAT_TEXT:
3983 case SECCAT_TDATA:
3984 case SECCAT_TBSS:
3985 /* We don't split these for medium model. Place them into
3986 default sections and hope for best. */
3987 break;
3988 case SECCAT_EMUTLS_VAR:
3989 case SECCAT_EMUTLS_TMPL:
3990 gcc_unreachable ();
3991 }
3992 if (sname)
3993 {
3994 /* We might get called with string constants, but get_named_section
3995 doesn't like them as they are not DECLs. Also, we need to set
3996 flags in that case. */
3997 if (!DECL_P (decl))
3998 return get_section (sname, flags, NULL);
3999 return get_named_section (decl, sname, reloc);
4000 }
4001 }
4002 return default_elf_select_section (decl, reloc, align);
4003 }
4004
4005 /* Build up a unique section name, expressed as a
4006 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4007 RELOC indicates whether the initial value of EXP requires
4008 link-time relocations. */
4009
4010 static void ATTRIBUTE_UNUSED
4011 x86_64_elf_unique_section (tree decl, int reloc)
4012 {
4013 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4014 && ix86_in_large_data_p (decl))
4015 {
4016 const char *prefix = NULL;
4017 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4018 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4019
4020 switch (categorize_decl_for_section (decl, reloc))
4021 {
4022 case SECCAT_DATA:
4023 case SECCAT_DATA_REL:
4024 case SECCAT_DATA_REL_LOCAL:
4025 case SECCAT_DATA_REL_RO:
4026 case SECCAT_DATA_REL_RO_LOCAL:
4027 prefix = one_only ? ".ld" : ".ldata";
4028 break;
4029 case SECCAT_BSS:
4030 prefix = one_only ? ".lb" : ".lbss";
4031 break;
4032 case SECCAT_RODATA:
4033 case SECCAT_RODATA_MERGE_STR:
4034 case SECCAT_RODATA_MERGE_STR_INIT:
4035 case SECCAT_RODATA_MERGE_CONST:
4036 prefix = one_only ? ".lr" : ".lrodata";
4037 break;
4038 case SECCAT_SRODATA:
4039 case SECCAT_SDATA:
4040 case SECCAT_SBSS:
4041 gcc_unreachable ();
4042 case SECCAT_TEXT:
4043 case SECCAT_TDATA:
4044 case SECCAT_TBSS:
4045 /* We don't split these for medium model. Place them into
4046 default sections and hope for best. */
4047 break;
4048 case SECCAT_EMUTLS_VAR:
4049 prefix = targetm.emutls.var_section;
4050 break;
4051 case SECCAT_EMUTLS_TMPL:
4052 prefix = targetm.emutls.tmpl_section;
4053 break;
4054 }
4055 if (prefix)
4056 {
4057 const char *name, *linkonce;
4058 char *string;
4059
4060 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4061 name = targetm.strip_name_encoding (name);
4062
4063 /* If we're using one_only, then there needs to be a .gnu.linkonce
4064 prefix to the section name. */
4065 linkonce = one_only ? ".gnu.linkonce" : "";
4066
4067 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4068
4069 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4070 return;
4071 }
4072 }
4073 default_unique_section (decl, reloc);
4074 }
4075
4076 #ifdef COMMON_ASM_OP
4077 /* This says how to output assembler code to declare an
4078 uninitialized external linkage data object.
4079
4080 For medium model x86-64 we need to use .largecomm opcode for
4081 large objects. */
4082 void
4083 x86_elf_aligned_common (FILE *file,
4084 const char *name, unsigned HOST_WIDE_INT size,
4085 int align)
4086 {
4087 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4088 && size > (unsigned int)ix86_section_threshold)
4089 fprintf (file, ".largecomm\t");
4090 else
4091 fprintf (file, "%s", COMMON_ASM_OP);
4092 assemble_name (file, name);
4093 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
4094 size, align / BITS_PER_UNIT);
4095 }
4096 #endif
4097
4098 /* Utility function for targets to use in implementing
4099 ASM_OUTPUT_ALIGNED_BSS. */
4100
4101 void
4102 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4103 const char *name, unsigned HOST_WIDE_INT size,
4104 int align)
4105 {
4106 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4107 && size > (unsigned int)ix86_section_threshold)
4108 switch_to_section (get_named_section (decl, ".lbss", 0));
4109 else
4110 switch_to_section (bss_section);
4111 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4112 #ifdef ASM_DECLARE_OBJECT_NAME
4113 last_assemble_variable_decl = decl;
4114 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4115 #else
4116 /* Standard thing is just output label for the object. */
4117 ASM_OUTPUT_LABEL (file, name);
4118 #endif /* ASM_DECLARE_OBJECT_NAME */
4119 ASM_OUTPUT_SKIP (file, size ? size : 1);
4120 }
4121 \f
4122 void
4123 optimization_options (int level, int size ATTRIBUTE_UNUSED)
4124 {
4125 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4126 make the problem with not enough registers even worse. */
4127 #ifdef INSN_SCHEDULING
4128 if (level > 1)
4129 flag_schedule_insns = 0;
4130 #endif
4131
4132 if (TARGET_MACHO)
4133 /* The Darwin libraries never set errno, so we might as well
4134 avoid calling them when that's the only reason we would. */
4135 flag_errno_math = 0;
4136
4137 /* The default values of these switches depend on the TARGET_64BIT
4138 that is not known at this moment. Mark these values with 2 and
4139 let user the to override these. In case there is no command line option
4140 specifying them, we will set the defaults in override_options. */
4141 if (optimize >= 1)
4142 flag_omit_frame_pointer = 2;
4143 flag_pcc_struct_return = 2;
4144 flag_asynchronous_unwind_tables = 2;
4145 flag_vect_cost_model = 1;
4146 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4147 SUBTARGET_OPTIMIZATION_OPTIONS;
4148 #endif
4149 }
4150 \f
4151 /* Decide whether we can make a sibling call to a function. DECL is the
4152 declaration of the function being targeted by the call and EXP is the
4153 CALL_EXPR representing the call. */
4154
4155 static bool
4156 ix86_function_ok_for_sibcall (tree decl, tree exp)
4157 {
4158 tree func;
4159 rtx a, b;
4160
4161 /* If we are generating position-independent code, we cannot sibcall
4162 optimize any indirect call, or a direct call to a global function,
4163 as the PLT requires %ebx be live. */
4164 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4165 return false;
4166
4167 if (decl)
4168 func = decl;
4169 else
4170 {
4171 func = TREE_TYPE (CALL_EXPR_FN (exp));
4172 if (POINTER_TYPE_P (func))
4173 func = TREE_TYPE (func);
4174 }
4175
4176 /* Check that the return value locations are the same. Like
4177 if we are returning floats on the 80387 register stack, we cannot
4178 make a sibcall from a function that doesn't return a float to a
4179 function that does or, conversely, from a function that does return
4180 a float to a function that doesn't; the necessary stack adjustment
4181 would not be executed. This is also the place we notice
4182 differences in the return value ABI. Note that it is ok for one
4183 of the functions to have void return type as long as the return
4184 value of the other is passed in a register. */
4185 a = ix86_function_value (TREE_TYPE (exp), func, false);
4186 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4187 cfun->decl, false);
4188 if (STACK_REG_P (a) || STACK_REG_P (b))
4189 {
4190 if (!rtx_equal_p (a, b))
4191 return false;
4192 }
4193 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4194 ;
4195 else if (!rtx_equal_p (a, b))
4196 return false;
4197
4198 /* If this call is indirect, we'll need to be able to use a call-clobbered
4199 register for the address of the target function. Make sure that all
4200 such registers are not used for passing parameters. */
4201 if (!decl && !TARGET_64BIT)
4202 {
4203 tree type;
4204
4205 /* We're looking at the CALL_EXPR, we need the type of the function. */
4206 type = CALL_EXPR_FN (exp); /* pointer expression */
4207 type = TREE_TYPE (type); /* pointer type */
4208 type = TREE_TYPE (type); /* function type */
4209
4210 if (ix86_function_regparm (type, NULL) >= 3)
4211 {
4212 /* ??? Need to count the actual number of registers to be used,
4213 not the possible number of registers. Fix later. */
4214 return false;
4215 }
4216 }
4217
4218 /* Dllimport'd functions are also called indirectly. */
4219 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
4220 && !TARGET_64BIT
4221 && decl && DECL_DLLIMPORT_P (decl)
4222 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
4223 return false;
4224
4225 /* If we need to align the outgoing stack, then sibcalling would
4226 unalign the stack, which may break the called function. */
4227 if (ix86_incoming_stack_boundary < PREFERRED_STACK_BOUNDARY)
4228 return false;
4229
4230 /* Otherwise okay. That also includes certain types of indirect calls. */
4231 return true;
4232 }
4233
4234 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
4235 calling convention attributes;
4236 arguments as in struct attribute_spec.handler. */
4237
4238 static tree
4239 ix86_handle_cconv_attribute (tree *node, tree name,
4240 tree args,
4241 int flags ATTRIBUTE_UNUSED,
4242 bool *no_add_attrs)
4243 {
4244 if (TREE_CODE (*node) != FUNCTION_TYPE
4245 && TREE_CODE (*node) != METHOD_TYPE
4246 && TREE_CODE (*node) != FIELD_DECL
4247 && TREE_CODE (*node) != TYPE_DECL)
4248 {
4249 warning (OPT_Wattributes, "%qs attribute only applies to functions",
4250 IDENTIFIER_POINTER (name));
4251 *no_add_attrs = true;
4252 return NULL_TREE;
4253 }
4254
4255 /* Can combine regparm with all attributes but fastcall. */
4256 if (is_attribute_p ("regparm", name))
4257 {
4258 tree cst;
4259
4260 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4261 {
4262 error ("fastcall and regparm attributes are not compatible");
4263 }
4264
4265 cst = TREE_VALUE (args);
4266 if (TREE_CODE (cst) != INTEGER_CST)
4267 {
4268 warning (OPT_Wattributes,
4269 "%qs attribute requires an integer constant argument",
4270 IDENTIFIER_POINTER (name));
4271 *no_add_attrs = true;
4272 }
4273 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4274 {
4275 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
4276 IDENTIFIER_POINTER (name), REGPARM_MAX);
4277 *no_add_attrs = true;
4278 }
4279
4280 return NULL_TREE;
4281 }
4282
4283 if (TARGET_64BIT)
4284 {
4285 /* Do not warn when emulating the MS ABI. */
4286 if (TREE_CODE (*node) != FUNCTION_TYPE || ix86_function_type_abi (*node)!=MS_ABI)
4287 warning (OPT_Wattributes, "%qs attribute ignored",
4288 IDENTIFIER_POINTER (name));
4289 *no_add_attrs = true;
4290 return NULL_TREE;
4291 }
4292
4293 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4294 if (is_attribute_p ("fastcall", name))
4295 {
4296 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4297 {
4298 error ("fastcall and cdecl attributes are not compatible");
4299 }
4300 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4301 {
4302 error ("fastcall and stdcall attributes are not compatible");
4303 }
4304 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4305 {
4306 error ("fastcall and regparm attributes are not compatible");
4307 }
4308 }
4309
4310 /* Can combine stdcall with fastcall (redundant), regparm and
4311 sseregparm. */
4312 else if (is_attribute_p ("stdcall", name))
4313 {
4314 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4315 {
4316 error ("stdcall and cdecl attributes are not compatible");
4317 }
4318 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4319 {
4320 error ("stdcall and fastcall attributes are not compatible");
4321 }
4322 }
4323
4324 /* Can combine cdecl with regparm and sseregparm. */
4325 else if (is_attribute_p ("cdecl", name))
4326 {
4327 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4328 {
4329 error ("stdcall and cdecl attributes are not compatible");
4330 }
4331 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4332 {
4333 error ("fastcall and cdecl attributes are not compatible");
4334 }
4335 }
4336
4337 /* Can combine sseregparm with all attributes. */
4338
4339 return NULL_TREE;
4340 }
4341
4342 /* Return 0 if the attributes for two types are incompatible, 1 if they
4343 are compatible, and 2 if they are nearly compatible (which causes a
4344 warning to be generated). */
4345
4346 static int
4347 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4348 {
4349 /* Check for mismatch of non-default calling convention. */
4350 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4351
4352 if (TREE_CODE (type1) != FUNCTION_TYPE
4353 && TREE_CODE (type1) != METHOD_TYPE)
4354 return 1;
4355
4356 /* Check for mismatched fastcall/regparm types. */
4357 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4358 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4359 || (ix86_function_regparm (type1, NULL)
4360 != ix86_function_regparm (type2, NULL)))
4361 return 0;
4362
4363 /* Check for mismatched sseregparm types. */
4364 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4365 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4366 return 0;
4367
4368 /* Check for mismatched return types (cdecl vs stdcall). */
4369 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4370 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4371 return 0;
4372
4373 return 1;
4374 }
4375 \f
4376 /* Return the regparm value for a function with the indicated TYPE and DECL.
4377 DECL may be NULL when calling function indirectly
4378 or considering a libcall. */
4379
4380 static int
4381 ix86_function_regparm (const_tree type, const_tree decl)
4382 {
4383 tree attr;
4384 int regparm;
4385
4386 static bool error_issued;
4387
4388 if (TARGET_64BIT)
4389 return (ix86_function_type_abi (type) == SYSV_ABI
4390 ? X86_64_REGPARM_MAX : X64_REGPARM_MAX);
4391
4392 regparm = ix86_regparm;
4393 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4394 if (attr)
4395 {
4396 regparm
4397 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4398
4399 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
4400 {
4401 /* We can't use regparm(3) for nested functions because
4402 these pass static chain pointer in %ecx register. */
4403 if (!error_issued && regparm == 3
4404 && decl_function_context (decl)
4405 && !DECL_NO_STATIC_CHAIN (decl))
4406 {
4407 error ("nested functions are limited to 2 register parameters");
4408 error_issued = true;
4409 return 0;
4410 }
4411 }
4412
4413 return regparm;
4414 }
4415
4416 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4417 return 2;
4418
4419 /* Use register calling convention for local functions when possible. */
4420 if (decl
4421 && TREE_CODE (decl) == FUNCTION_DECL
4422 && optimize
4423 && !profile_flag)
4424 {
4425 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4426 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4427 if (i && i->local)
4428 {
4429 int local_regparm, globals = 0, regno;
4430 struct function *f;
4431
4432 /* Make sure no regparm register is taken by a
4433 fixed register variable. */
4434 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4435 if (fixed_regs[local_regparm])
4436 break;
4437
4438 /* We can't use regparm(3) for nested functions as these use
4439 static chain pointer in third argument. */
4440 if (local_regparm == 3
4441 && decl_function_context (decl)
4442 && !DECL_NO_STATIC_CHAIN (decl))
4443 local_regparm = 2;
4444
4445 /* If the function realigns its stackpointer, the prologue will
4446 clobber %ecx. If we've already generated code for the callee,
4447 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
4448 scanning the attributes for the self-realigning property. */
4449 f = DECL_STRUCT_FUNCTION (decl);
4450 /* Since current internal arg pointer won't conflict with
4451 parameter passing regs, so no need to change stack
4452 realignment and adjust regparm number.
4453
4454 Each fixed register usage increases register pressure,
4455 so less registers should be used for argument passing.
4456 This functionality can be overriden by an explicit
4457 regparm value. */
4458 for (regno = 0; regno <= DI_REG; regno++)
4459 if (fixed_regs[regno])
4460 globals++;
4461
4462 local_regparm
4463 = globals < local_regparm ? local_regparm - globals : 0;
4464
4465 if (local_regparm > regparm)
4466 regparm = local_regparm;
4467 }
4468 }
4469
4470 return regparm;
4471 }
4472
4473 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4474 DFmode (2) arguments in SSE registers for a function with the
4475 indicated TYPE and DECL. DECL may be NULL when calling function
4476 indirectly or considering a libcall. Otherwise return 0. */
4477
4478 static int
4479 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4480 {
4481 gcc_assert (!TARGET_64BIT);
4482
4483 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4484 by the sseregparm attribute. */
4485 if (TARGET_SSEREGPARM
4486 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4487 {
4488 if (!TARGET_SSE)
4489 {
4490 if (warn)
4491 {
4492 if (decl)
4493 error ("Calling %qD with attribute sseregparm without "
4494 "SSE/SSE2 enabled", decl);
4495 else
4496 error ("Calling %qT with attribute sseregparm without "
4497 "SSE/SSE2 enabled", type);
4498 }
4499 return 0;
4500 }
4501
4502 return 2;
4503 }
4504
4505 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4506 (and DFmode for SSE2) arguments in SSE registers. */
4507 if (decl && TARGET_SSE_MATH && optimize && !profile_flag)
4508 {
4509 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4510 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4511 if (i && i->local)
4512 return TARGET_SSE2 ? 2 : 1;
4513 }
4514
4515 return 0;
4516 }
4517
4518 /* Return true if EAX is live at the start of the function. Used by
4519 ix86_expand_prologue to determine if we need special help before
4520 calling allocate_stack_worker. */
4521
4522 static bool
4523 ix86_eax_live_at_start_p (void)
4524 {
4525 /* Cheat. Don't bother working forward from ix86_function_regparm
4526 to the function type to whether an actual argument is located in
4527 eax. Instead just look at cfg info, which is still close enough
4528 to correct at this point. This gives false positives for broken
4529 functions that might use uninitialized data that happens to be
4530 allocated in eax, but who cares? */
4531 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4532 }
4533
4534 /* Value is the number of bytes of arguments automatically
4535 popped when returning from a subroutine call.
4536 FUNDECL is the declaration node of the function (as a tree),
4537 FUNTYPE is the data type of the function (as a tree),
4538 or for a library call it is an identifier node for the subroutine name.
4539 SIZE is the number of bytes of arguments passed on the stack.
4540
4541 On the 80386, the RTD insn may be used to pop them if the number
4542 of args is fixed, but if the number is variable then the caller
4543 must pop them all. RTD can't be used for library calls now
4544 because the library is compiled with the Unix compiler.
4545 Use of RTD is a selectable option, since it is incompatible with
4546 standard Unix calling sequences. If the option is not selected,
4547 the caller must always pop the args.
4548
4549 The attribute stdcall is equivalent to RTD on a per module basis. */
4550
4551 int
4552 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4553 {
4554 int rtd;
4555
4556 /* None of the 64-bit ABIs pop arguments. */
4557 if (TARGET_64BIT)
4558 return 0;
4559
4560 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
4561
4562 /* Cdecl functions override -mrtd, and never pop the stack. */
4563 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
4564 {
4565 /* Stdcall and fastcall functions will pop the stack if not
4566 variable args. */
4567 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
4568 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
4569 rtd = 1;
4570
4571 if (rtd && ! stdarg_p (funtype))
4572 return size;
4573 }
4574
4575 /* Lose any fake structure return argument if it is passed on the stack. */
4576 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
4577 && !KEEP_AGGREGATE_RETURN_POINTER)
4578 {
4579 int nregs = ix86_function_regparm (funtype, fundecl);
4580 if (nregs == 0)
4581 return GET_MODE_SIZE (Pmode);
4582 }
4583
4584 return 0;
4585 }
4586 \f
4587 /* Argument support functions. */
4588
4589 /* Return true when register may be used to pass function parameters. */
4590 bool
4591 ix86_function_arg_regno_p (int regno)
4592 {
4593 int i;
4594 const int *parm_regs;
4595
4596 if (!TARGET_64BIT)
4597 {
4598 if (TARGET_MACHO)
4599 return (regno < REGPARM_MAX
4600 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
4601 else
4602 return (regno < REGPARM_MAX
4603 || (TARGET_MMX && MMX_REGNO_P (regno)
4604 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
4605 || (TARGET_SSE && SSE_REGNO_P (regno)
4606 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
4607 }
4608
4609 if (TARGET_MACHO)
4610 {
4611 if (SSE_REGNO_P (regno) && TARGET_SSE)
4612 return true;
4613 }
4614 else
4615 {
4616 if (TARGET_SSE && SSE_REGNO_P (regno)
4617 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
4618 return true;
4619 }
4620
4621 /* TODO: The function should depend on current function ABI but
4622 builtins.c would need updating then. Therefore we use the
4623 default ABI. */
4624
4625 /* RAX is used as hidden argument to va_arg functions. */
4626 if (ix86_abi == SYSV_ABI && regno == AX_REG)
4627 return true;
4628
4629 if (ix86_abi == MS_ABI)
4630 parm_regs = x86_64_ms_abi_int_parameter_registers;
4631 else
4632 parm_regs = x86_64_int_parameter_registers;
4633 for (i = 0; i < (ix86_abi == MS_ABI ? X64_REGPARM_MAX
4634 : X86_64_REGPARM_MAX); i++)
4635 if (regno == parm_regs[i])
4636 return true;
4637 return false;
4638 }
4639
4640 /* Return if we do not know how to pass TYPE solely in registers. */
4641
4642 static bool
4643 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
4644 {
4645 if (must_pass_in_stack_var_size_or_pad (mode, type))
4646 return true;
4647
4648 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
4649 The layout_type routine is crafty and tries to trick us into passing
4650 currently unsupported vector types on the stack by using TImode. */
4651 return (!TARGET_64BIT && mode == TImode
4652 && type && TREE_CODE (type) != VECTOR_TYPE);
4653 }
4654
4655 /* It returns the size, in bytes, of the area reserved for arguments passed
4656 in registers for the function represented by fndecl dependent to the used
4657 abi format. */
4658 int
4659 ix86_reg_parm_stack_space (const_tree fndecl)
4660 {
4661 enum calling_abi call_abi = SYSV_ABI;
4662 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
4663 call_abi = ix86_function_abi (fndecl);
4664 else
4665 call_abi = ix86_function_type_abi (fndecl);
4666 if (call_abi == MS_ABI)
4667 return 32;
4668 return 0;
4669 }
4670
4671 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
4672 call abi used. */
4673 enum calling_abi
4674 ix86_function_type_abi (const_tree fntype)
4675 {
4676 if (TARGET_64BIT && fntype != NULL)
4677 {
4678 enum calling_abi abi = ix86_abi;
4679 if (abi == SYSV_ABI)
4680 {
4681 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
4682 abi = MS_ABI;
4683 }
4684 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
4685 abi = SYSV_ABI;
4686 return abi;
4687 }
4688 return ix86_abi;
4689 }
4690
4691 static enum calling_abi
4692 ix86_function_abi (const_tree fndecl)
4693 {
4694 if (! fndecl)
4695 return ix86_abi;
4696 return ix86_function_type_abi (TREE_TYPE (fndecl));
4697 }
4698
4699 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
4700 call abi used. */
4701 enum calling_abi
4702 ix86_cfun_abi (void)
4703 {
4704 if (! cfun || ! TARGET_64BIT)
4705 return ix86_abi;
4706 return cfun->machine->call_abi;
4707 }
4708
4709 /* regclass.c */
4710 extern void init_regs (void);
4711
4712 /* Implementation of call abi switching target hook. Specific to FNDECL
4713 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
4714 for more details. */
4715 void
4716 ix86_call_abi_override (const_tree fndecl)
4717 {
4718 if (fndecl == NULL_TREE)
4719 cfun->machine->call_abi = ix86_abi;
4720 else
4721 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
4722 }
4723
4724 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
4725 re-initialization of init_regs each time we switch function context since
4726 this is needed only during RTL expansion. */
4727 static void
4728 ix86_maybe_switch_abi (void)
4729 {
4730 if (TARGET_64BIT &&
4731 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
4732 reinit_regs ();
4733 }
4734
4735 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4736 for a call to a function whose data type is FNTYPE.
4737 For a library call, FNTYPE is 0. */
4738
4739 void
4740 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
4741 tree fntype, /* tree ptr for function decl */
4742 rtx libname, /* SYMBOL_REF of library name or 0 */
4743 tree fndecl)
4744 {
4745 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
4746 memset (cum, 0, sizeof (*cum));
4747
4748 if (fndecl)
4749 cum->call_abi = ix86_function_abi (fndecl);
4750 else
4751 cum->call_abi = ix86_function_type_abi (fntype);
4752 /* Set up the number of registers to use for passing arguments. */
4753
4754 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
4755 sorry ("ms_abi attribute require -maccumulate-outgoing-args or subtarget optimization implying it");
4756 cum->nregs = ix86_regparm;
4757 if (TARGET_64BIT)
4758 {
4759 if (cum->call_abi != ix86_abi)
4760 cum->nregs = ix86_abi != SYSV_ABI ? X86_64_REGPARM_MAX
4761 : X64_REGPARM_MAX;
4762 }
4763 if (TARGET_SSE)
4764 {
4765 cum->sse_nregs = SSE_REGPARM_MAX;
4766 if (TARGET_64BIT)
4767 {
4768 if (cum->call_abi != ix86_abi)
4769 cum->sse_nregs = ix86_abi != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
4770 : X64_SSE_REGPARM_MAX;
4771 }
4772 }
4773 if (TARGET_MMX)
4774 cum->mmx_nregs = MMX_REGPARM_MAX;
4775 cum->warn_avx = true;
4776 cum->warn_sse = true;
4777 cum->warn_mmx = true;
4778
4779 /* Because type might mismatch in between caller and callee, we need to
4780 use actual type of function for local calls.
4781 FIXME: cgraph_analyze can be told to actually record if function uses
4782 va_start so for local functions maybe_vaarg can be made aggressive
4783 helping K&R code.
4784 FIXME: once typesytem is fixed, we won't need this code anymore. */
4785 if (i && i->local)
4786 fntype = TREE_TYPE (fndecl);
4787 cum->maybe_vaarg = (fntype
4788 ? (!prototype_p (fntype) || stdarg_p (fntype))
4789 : !libname);
4790
4791 if (!TARGET_64BIT)
4792 {
4793 /* If there are variable arguments, then we won't pass anything
4794 in registers in 32-bit mode. */
4795 if (stdarg_p (fntype))
4796 {
4797 cum->nregs = 0;
4798 cum->sse_nregs = 0;
4799 cum->mmx_nregs = 0;
4800 cum->warn_avx = 0;
4801 cum->warn_sse = 0;
4802 cum->warn_mmx = 0;
4803 return;
4804 }
4805
4806 /* Use ecx and edx registers if function has fastcall attribute,
4807 else look for regparm information. */
4808 if (fntype)
4809 {
4810 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
4811 {
4812 cum->nregs = 2;
4813 cum->fastcall = 1;
4814 }
4815 else
4816 cum->nregs = ix86_function_regparm (fntype, fndecl);
4817 }
4818
4819 /* Set up the number of SSE registers used for passing SFmode
4820 and DFmode arguments. Warn for mismatching ABI. */
4821 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
4822 }
4823 }
4824
4825 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
4826 But in the case of vector types, it is some vector mode.
4827
4828 When we have only some of our vector isa extensions enabled, then there
4829 are some modes for which vector_mode_supported_p is false. For these
4830 modes, the generic vector support in gcc will choose some non-vector mode
4831 in order to implement the type. By computing the natural mode, we'll
4832 select the proper ABI location for the operand and not depend on whatever
4833 the middle-end decides to do with these vector types.
4834
4835 The midde-end can't deal with the vector types > 16 bytes. In this
4836 case, we return the original mode and warn ABI change if CUM isn't
4837 NULL. */
4838
4839 static enum machine_mode
4840 type_natural_mode (const_tree type, CUMULATIVE_ARGS *cum)
4841 {
4842 enum machine_mode mode = TYPE_MODE (type);
4843
4844 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
4845 {
4846 HOST_WIDE_INT size = int_size_in_bytes (type);
4847 if ((size == 8 || size == 16 || size == 32)
4848 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
4849 && TYPE_VECTOR_SUBPARTS (type) > 1)
4850 {
4851 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
4852
4853 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
4854 mode = MIN_MODE_VECTOR_FLOAT;
4855 else
4856 mode = MIN_MODE_VECTOR_INT;
4857
4858 /* Get the mode which has this inner mode and number of units. */
4859 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
4860 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
4861 && GET_MODE_INNER (mode) == innermode)
4862 {
4863 if (size == 32 && !TARGET_AVX)
4864 {
4865 static bool warnedavx;
4866
4867 if (cum
4868 && !warnedavx
4869 && cum->warn_avx)
4870 {
4871 warnedavx = true;
4872 warning (0, "AVX vector argument without AVX "
4873 "enabled changes the ABI");
4874 }
4875 return TYPE_MODE (type);
4876 }
4877 else
4878 return mode;
4879 }
4880
4881 gcc_unreachable ();
4882 }
4883 }
4884
4885 return mode;
4886 }
4887
4888 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
4889 this may not agree with the mode that the type system has chosen for the
4890 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
4891 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
4892
4893 static rtx
4894 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
4895 unsigned int regno)
4896 {
4897 rtx tmp;
4898
4899 if (orig_mode != BLKmode)
4900 tmp = gen_rtx_REG (orig_mode, regno);
4901 else
4902 {
4903 tmp = gen_rtx_REG (mode, regno);
4904 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
4905 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
4906 }
4907
4908 return tmp;
4909 }
4910
4911 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
4912 of this code is to classify each 8bytes of incoming argument by the register
4913 class and assign registers accordingly. */
4914
4915 /* Return the union class of CLASS1 and CLASS2.
4916 See the x86-64 PS ABI for details. */
4917
4918 static enum x86_64_reg_class
4919 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
4920 {
4921 /* Rule #1: If both classes are equal, this is the resulting class. */
4922 if (class1 == class2)
4923 return class1;
4924
4925 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
4926 the other class. */
4927 if (class1 == X86_64_NO_CLASS)
4928 return class2;
4929 if (class2 == X86_64_NO_CLASS)
4930 return class1;
4931
4932 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
4933 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
4934 return X86_64_MEMORY_CLASS;
4935
4936 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
4937 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
4938 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
4939 return X86_64_INTEGERSI_CLASS;
4940 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
4941 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
4942 return X86_64_INTEGER_CLASS;
4943
4944 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
4945 MEMORY is used. */
4946 if (class1 == X86_64_X87_CLASS
4947 || class1 == X86_64_X87UP_CLASS
4948 || class1 == X86_64_COMPLEX_X87_CLASS
4949 || class2 == X86_64_X87_CLASS
4950 || class2 == X86_64_X87UP_CLASS
4951 || class2 == X86_64_COMPLEX_X87_CLASS)
4952 return X86_64_MEMORY_CLASS;
4953
4954 /* Rule #6: Otherwise class SSE is used. */
4955 return X86_64_SSE_CLASS;
4956 }
4957
4958 /* Classify the argument of type TYPE and mode MODE.
4959 CLASSES will be filled by the register class used to pass each word
4960 of the operand. The number of words is returned. In case the parameter
4961 should be passed in memory, 0 is returned. As a special case for zero
4962 sized containers, classes[0] will be NO_CLASS and 1 is returned.
4963
4964 BIT_OFFSET is used internally for handling records and specifies offset
4965 of the offset in bits modulo 256 to avoid overflow cases.
4966
4967 See the x86-64 PS ABI for details.
4968 */
4969
4970 static int
4971 classify_argument (enum machine_mode mode, const_tree type,
4972 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
4973 {
4974 HOST_WIDE_INT bytes =
4975 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
4976 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4977
4978 /* Variable sized entities are always passed/returned in memory. */
4979 if (bytes < 0)
4980 return 0;
4981
4982 if (mode != VOIDmode
4983 && targetm.calls.must_pass_in_stack (mode, type))
4984 return 0;
4985
4986 if (type && AGGREGATE_TYPE_P (type))
4987 {
4988 int i;
4989 tree field;
4990 enum x86_64_reg_class subclasses[MAX_CLASSES];
4991
4992 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
4993 if (bytes > 32)
4994 return 0;
4995
4996 for (i = 0; i < words; i++)
4997 classes[i] = X86_64_NO_CLASS;
4998
4999 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5000 signalize memory class, so handle it as special case. */
5001 if (!words)
5002 {
5003 classes[0] = X86_64_NO_CLASS;
5004 return 1;
5005 }
5006
5007 /* Classify each field of record and merge classes. */
5008 switch (TREE_CODE (type))
5009 {
5010 case RECORD_TYPE:
5011 /* And now merge the fields of structure. */
5012 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5013 {
5014 if (TREE_CODE (field) == FIELD_DECL)
5015 {
5016 int num;
5017
5018 if (TREE_TYPE (field) == error_mark_node)
5019 continue;
5020
5021 /* Bitfields are always classified as integer. Handle them
5022 early, since later code would consider them to be
5023 misaligned integers. */
5024 if (DECL_BIT_FIELD (field))
5025 {
5026 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5027 i < ((int_bit_position (field) + (bit_offset % 64))
5028 + tree_low_cst (DECL_SIZE (field), 0)
5029 + 63) / 8 / 8; i++)
5030 classes[i] =
5031 merge_classes (X86_64_INTEGER_CLASS,
5032 classes[i]);
5033 }
5034 else
5035 {
5036 int pos;
5037
5038 type = TREE_TYPE (field);
5039
5040 /* Flexible array member is ignored. */
5041 if (TYPE_MODE (type) == BLKmode
5042 && TREE_CODE (type) == ARRAY_TYPE
5043 && TYPE_SIZE (type) == NULL_TREE
5044 && TYPE_DOMAIN (type) != NULL_TREE
5045 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5046 == NULL_TREE))
5047 {
5048 static bool warned;
5049
5050 if (!warned && warn_psabi)
5051 {
5052 warned = true;
5053 inform (input_location,
5054 "The ABI of passing struct with"
5055 " a flexible array member has"
5056 " changed in GCC 4.4");
5057 }
5058 continue;
5059 }
5060 num = classify_argument (TYPE_MODE (type), type,
5061 subclasses,
5062 (int_bit_position (field)
5063 + bit_offset) % 256);
5064 if (!num)
5065 return 0;
5066 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5067 for (i = 0; i < num && (i + pos) < words; i++)
5068 classes[i + pos] =
5069 merge_classes (subclasses[i], classes[i + pos]);
5070 }
5071 }
5072 }
5073 break;
5074
5075 case ARRAY_TYPE:
5076 /* Arrays are handled as small records. */
5077 {
5078 int num;
5079 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5080 TREE_TYPE (type), subclasses, bit_offset);
5081 if (!num)
5082 return 0;
5083
5084 /* The partial classes are now full classes. */
5085 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5086 subclasses[0] = X86_64_SSE_CLASS;
5087 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5088 && !((bit_offset % 64) == 0 && bytes == 4))
5089 subclasses[0] = X86_64_INTEGER_CLASS;
5090
5091 for (i = 0; i < words; i++)
5092 classes[i] = subclasses[i % num];
5093
5094 break;
5095 }
5096 case UNION_TYPE:
5097 case QUAL_UNION_TYPE:
5098 /* Unions are similar to RECORD_TYPE but offset is always 0.
5099 */
5100 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5101 {
5102 if (TREE_CODE (field) == FIELD_DECL)
5103 {
5104 int num;
5105
5106 if (TREE_TYPE (field) == error_mark_node)
5107 continue;
5108
5109 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5110 TREE_TYPE (field), subclasses,
5111 bit_offset);
5112 if (!num)
5113 return 0;
5114 for (i = 0; i < num; i++)
5115 classes[i] = merge_classes (subclasses[i], classes[i]);
5116 }
5117 }
5118 break;
5119
5120 default:
5121 gcc_unreachable ();
5122 }
5123
5124 if (words > 2)
5125 {
5126 /* When size > 16 bytes, if the first one isn't
5127 X86_64_SSE_CLASS or any other ones aren't
5128 X86_64_SSEUP_CLASS, everything should be passed in
5129 memory. */
5130 if (classes[0] != X86_64_SSE_CLASS)
5131 return 0;
5132
5133 for (i = 1; i < words; i++)
5134 if (classes[i] != X86_64_SSEUP_CLASS)
5135 return 0;
5136 }
5137
5138 /* Final merger cleanup. */
5139 for (i = 0; i < words; i++)
5140 {
5141 /* If one class is MEMORY, everything should be passed in
5142 memory. */
5143 if (classes[i] == X86_64_MEMORY_CLASS)
5144 return 0;
5145
5146 /* The X86_64_SSEUP_CLASS should be always preceded by
5147 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5148 if (classes[i] == X86_64_SSEUP_CLASS
5149 && classes[i - 1] != X86_64_SSE_CLASS
5150 && classes[i - 1] != X86_64_SSEUP_CLASS)
5151 {
5152 /* The first one should never be X86_64_SSEUP_CLASS. */
5153 gcc_assert (i != 0);
5154 classes[i] = X86_64_SSE_CLASS;
5155 }
5156
5157 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5158 everything should be passed in memory. */
5159 if (classes[i] == X86_64_X87UP_CLASS
5160 && (classes[i - 1] != X86_64_X87_CLASS))
5161 {
5162 static bool warned;
5163
5164 /* The first one should never be X86_64_X87UP_CLASS. */
5165 gcc_assert (i != 0);
5166 if (!warned && warn_psabi)
5167 {
5168 warned = true;
5169 inform (input_location,
5170 "The ABI of passing union with long double"
5171 " has changed in GCC 4.4");
5172 }
5173 return 0;
5174 }
5175 }
5176 return words;
5177 }
5178
5179 /* Compute alignment needed. We align all types to natural boundaries with
5180 exception of XFmode that is aligned to 64bits. */
5181 if (mode != VOIDmode && mode != BLKmode)
5182 {
5183 int mode_alignment = GET_MODE_BITSIZE (mode);
5184
5185 if (mode == XFmode)
5186 mode_alignment = 128;
5187 else if (mode == XCmode)
5188 mode_alignment = 256;
5189 if (COMPLEX_MODE_P (mode))
5190 mode_alignment /= 2;
5191 /* Misaligned fields are always returned in memory. */
5192 if (bit_offset % mode_alignment)
5193 return 0;
5194 }
5195
5196 /* for V1xx modes, just use the base mode */
5197 if (VECTOR_MODE_P (mode) && mode != V1DImode
5198 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5199 mode = GET_MODE_INNER (mode);
5200
5201 /* Classification of atomic types. */
5202 switch (mode)
5203 {
5204 case SDmode:
5205 case DDmode:
5206 classes[0] = X86_64_SSE_CLASS;
5207 return 1;
5208 case TDmode:
5209 classes[0] = X86_64_SSE_CLASS;
5210 classes[1] = X86_64_SSEUP_CLASS;
5211 return 2;
5212 case DImode:
5213 case SImode:
5214 case HImode:
5215 case QImode:
5216 case CSImode:
5217 case CHImode:
5218 case CQImode:
5219 {
5220 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5221
5222 if (size <= 32)
5223 {
5224 classes[0] = X86_64_INTEGERSI_CLASS;
5225 return 1;
5226 }
5227 else if (size <= 64)
5228 {
5229 classes[0] = X86_64_INTEGER_CLASS;
5230 return 1;
5231 }
5232 else if (size <= 64+32)
5233 {
5234 classes[0] = X86_64_INTEGER_CLASS;
5235 classes[1] = X86_64_INTEGERSI_CLASS;
5236 return 2;
5237 }
5238 else if (size <= 64+64)
5239 {
5240 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5241 return 2;
5242 }
5243 else
5244 gcc_unreachable ();
5245 }
5246 case CDImode:
5247 case TImode:
5248 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5249 return 2;
5250 case COImode:
5251 case OImode:
5252 /* OImode shouldn't be used directly. */
5253 gcc_unreachable ();
5254 case CTImode:
5255 return 0;
5256 case SFmode:
5257 if (!(bit_offset % 64))
5258 classes[0] = X86_64_SSESF_CLASS;
5259 else
5260 classes[0] = X86_64_SSE_CLASS;
5261 return 1;
5262 case DFmode:
5263 classes[0] = X86_64_SSEDF_CLASS;
5264 return 1;
5265 case XFmode:
5266 classes[0] = X86_64_X87_CLASS;
5267 classes[1] = X86_64_X87UP_CLASS;
5268 return 2;
5269 case TFmode:
5270 classes[0] = X86_64_SSE_CLASS;
5271 classes[1] = X86_64_SSEUP_CLASS;
5272 return 2;
5273 case SCmode:
5274 classes[0] = X86_64_SSE_CLASS;
5275 if (!(bit_offset % 64))
5276 return 1;
5277 else
5278 {
5279 static bool warned;
5280
5281 if (!warned && warn_psabi)
5282 {
5283 warned = true;
5284 inform (input_location,
5285 "The ABI of passing structure with complex float"
5286 " member has changed in GCC 4.4");
5287 }
5288 classes[1] = X86_64_SSESF_CLASS;
5289 return 2;
5290 }
5291 case DCmode:
5292 classes[0] = X86_64_SSEDF_CLASS;
5293 classes[1] = X86_64_SSEDF_CLASS;
5294 return 2;
5295 case XCmode:
5296 classes[0] = X86_64_COMPLEX_X87_CLASS;
5297 return 1;
5298 case TCmode:
5299 /* This modes is larger than 16 bytes. */
5300 return 0;
5301 case V8SFmode:
5302 case V8SImode:
5303 case V32QImode:
5304 case V16HImode:
5305 case V4DFmode:
5306 case V4DImode:
5307 classes[0] = X86_64_SSE_CLASS;
5308 classes[1] = X86_64_SSEUP_CLASS;
5309 classes[2] = X86_64_SSEUP_CLASS;
5310 classes[3] = X86_64_SSEUP_CLASS;
5311 return 4;
5312 case V4SFmode:
5313 case V4SImode:
5314 case V16QImode:
5315 case V8HImode:
5316 case V2DFmode:
5317 case V2DImode:
5318 classes[0] = X86_64_SSE_CLASS;
5319 classes[1] = X86_64_SSEUP_CLASS;
5320 return 2;
5321 case V1DImode:
5322 case V2SFmode:
5323 case V2SImode:
5324 case V4HImode:
5325 case V8QImode:
5326 classes[0] = X86_64_SSE_CLASS;
5327 return 1;
5328 case BLKmode:
5329 case VOIDmode:
5330 return 0;
5331 default:
5332 gcc_assert (VECTOR_MODE_P (mode));
5333
5334 if (bytes > 16)
5335 return 0;
5336
5337 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5338
5339 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5340 classes[0] = X86_64_INTEGERSI_CLASS;
5341 else
5342 classes[0] = X86_64_INTEGER_CLASS;
5343 classes[1] = X86_64_INTEGER_CLASS;
5344 return 1 + (bytes > 8);
5345 }
5346 }
5347
5348 /* Examine the argument and return set number of register required in each
5349 class. Return 0 iff parameter should be passed in memory. */
5350 static int
5351 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5352 int *int_nregs, int *sse_nregs)
5353 {
5354 enum x86_64_reg_class regclass[MAX_CLASSES];
5355 int n = classify_argument (mode, type, regclass, 0);
5356
5357 *int_nregs = 0;
5358 *sse_nregs = 0;
5359 if (!n)
5360 return 0;
5361 for (n--; n >= 0; n--)
5362 switch (regclass[n])
5363 {
5364 case X86_64_INTEGER_CLASS:
5365 case X86_64_INTEGERSI_CLASS:
5366 (*int_nregs)++;
5367 break;
5368 case X86_64_SSE_CLASS:
5369 case X86_64_SSESF_CLASS:
5370 case X86_64_SSEDF_CLASS:
5371 (*sse_nregs)++;
5372 break;
5373 case X86_64_NO_CLASS:
5374 case X86_64_SSEUP_CLASS:
5375 break;
5376 case X86_64_X87_CLASS:
5377 case X86_64_X87UP_CLASS:
5378 if (!in_return)
5379 return 0;
5380 break;
5381 case X86_64_COMPLEX_X87_CLASS:
5382 return in_return ? 2 : 0;
5383 case X86_64_MEMORY_CLASS:
5384 gcc_unreachable ();
5385 }
5386 return 1;
5387 }
5388
5389 /* Construct container for the argument used by GCC interface. See
5390 FUNCTION_ARG for the detailed description. */
5391
5392 static rtx
5393 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5394 const_tree type, int in_return, int nintregs, int nsseregs,
5395 const int *intreg, int sse_regno)
5396 {
5397 /* The following variables hold the static issued_error state. */
5398 static bool issued_sse_arg_error;
5399 static bool issued_sse_ret_error;
5400 static bool issued_x87_ret_error;
5401
5402 enum machine_mode tmpmode;
5403 int bytes =
5404 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5405 enum x86_64_reg_class regclass[MAX_CLASSES];
5406 int n;
5407 int i;
5408 int nexps = 0;
5409 int needed_sseregs, needed_intregs;
5410 rtx exp[MAX_CLASSES];
5411 rtx ret;
5412
5413 n = classify_argument (mode, type, regclass, 0);
5414 if (!n)
5415 return NULL;
5416 if (!examine_argument (mode, type, in_return, &needed_intregs,
5417 &needed_sseregs))
5418 return NULL;
5419 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5420 return NULL;
5421
5422 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5423 some less clueful developer tries to use floating-point anyway. */
5424 if (needed_sseregs && !TARGET_SSE)
5425 {
5426 if (in_return)
5427 {
5428 if (!issued_sse_ret_error)
5429 {
5430 error ("SSE register return with SSE disabled");
5431 issued_sse_ret_error = true;
5432 }
5433 }
5434 else if (!issued_sse_arg_error)
5435 {
5436 error ("SSE register argument with SSE disabled");
5437 issued_sse_arg_error = true;
5438 }
5439 return NULL;
5440 }
5441
5442 /* Likewise, error if the ABI requires us to return values in the
5443 x87 registers and the user specified -mno-80387. */
5444 if (!TARGET_80387 && in_return)
5445 for (i = 0; i < n; i++)
5446 if (regclass[i] == X86_64_X87_CLASS
5447 || regclass[i] == X86_64_X87UP_CLASS
5448 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5449 {
5450 if (!issued_x87_ret_error)
5451 {
5452 error ("x87 register return with x87 disabled");
5453 issued_x87_ret_error = true;
5454 }
5455 return NULL;
5456 }
5457
5458 /* First construct simple cases. Avoid SCmode, since we want to use
5459 single register to pass this type. */
5460 if (n == 1 && mode != SCmode)
5461 switch (regclass[0])
5462 {
5463 case X86_64_INTEGER_CLASS:
5464 case X86_64_INTEGERSI_CLASS:
5465 return gen_rtx_REG (mode, intreg[0]);
5466 case X86_64_SSE_CLASS:
5467 case X86_64_SSESF_CLASS:
5468 case X86_64_SSEDF_CLASS:
5469 if (mode != BLKmode)
5470 return gen_reg_or_parallel (mode, orig_mode,
5471 SSE_REGNO (sse_regno));
5472 break;
5473 case X86_64_X87_CLASS:
5474 case X86_64_COMPLEX_X87_CLASS:
5475 return gen_rtx_REG (mode, FIRST_STACK_REG);
5476 case X86_64_NO_CLASS:
5477 /* Zero sized array, struct or class. */
5478 return NULL;
5479 default:
5480 gcc_unreachable ();
5481 }
5482 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5483 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5484 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5485 if (n == 4
5486 && regclass[0] == X86_64_SSE_CLASS
5487 && regclass[1] == X86_64_SSEUP_CLASS
5488 && regclass[2] == X86_64_SSEUP_CLASS
5489 && regclass[3] == X86_64_SSEUP_CLASS
5490 && mode != BLKmode)
5491 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5492
5493 if (n == 2
5494 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5495 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5496 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5497 && regclass[1] == X86_64_INTEGER_CLASS
5498 && (mode == CDImode || mode == TImode || mode == TFmode)
5499 && intreg[0] + 1 == intreg[1])
5500 return gen_rtx_REG (mode, intreg[0]);
5501
5502 /* Otherwise figure out the entries of the PARALLEL. */
5503 for (i = 0; i < n; i++)
5504 {
5505 int pos;
5506
5507 switch (regclass[i])
5508 {
5509 case X86_64_NO_CLASS:
5510 break;
5511 case X86_64_INTEGER_CLASS:
5512 case X86_64_INTEGERSI_CLASS:
5513 /* Merge TImodes on aligned occasions here too. */
5514 if (i * 8 + 8 > bytes)
5515 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
5516 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
5517 tmpmode = SImode;
5518 else
5519 tmpmode = DImode;
5520 /* We've requested 24 bytes we don't have mode for. Use DImode. */
5521 if (tmpmode == BLKmode)
5522 tmpmode = DImode;
5523 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5524 gen_rtx_REG (tmpmode, *intreg),
5525 GEN_INT (i*8));
5526 intreg++;
5527 break;
5528 case X86_64_SSESF_CLASS:
5529 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5530 gen_rtx_REG (SFmode,
5531 SSE_REGNO (sse_regno)),
5532 GEN_INT (i*8));
5533 sse_regno++;
5534 break;
5535 case X86_64_SSEDF_CLASS:
5536 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5537 gen_rtx_REG (DFmode,
5538 SSE_REGNO (sse_regno)),
5539 GEN_INT (i*8));
5540 sse_regno++;
5541 break;
5542 case X86_64_SSE_CLASS:
5543 pos = i;
5544 switch (n)
5545 {
5546 case 1:
5547 tmpmode = DImode;
5548 break;
5549 case 2:
5550 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
5551 {
5552 tmpmode = TImode;
5553 i++;
5554 }
5555 else
5556 tmpmode = DImode;
5557 break;
5558 case 4:
5559 gcc_assert (i == 0
5560 && regclass[1] == X86_64_SSEUP_CLASS
5561 && regclass[2] == X86_64_SSEUP_CLASS
5562 && regclass[3] == X86_64_SSEUP_CLASS);
5563 tmpmode = OImode;
5564 i += 3;
5565 break;
5566 default:
5567 gcc_unreachable ();
5568 }
5569 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5570 gen_rtx_REG (tmpmode,
5571 SSE_REGNO (sse_regno)),
5572 GEN_INT (pos*8));
5573 sse_regno++;
5574 break;
5575 default:
5576 gcc_unreachable ();
5577 }
5578 }
5579
5580 /* Empty aligned struct, union or class. */
5581 if (nexps == 0)
5582 return NULL;
5583
5584 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
5585 for (i = 0; i < nexps; i++)
5586 XVECEXP (ret, 0, i) = exp [i];
5587 return ret;
5588 }
5589
5590 /* Update the data in CUM to advance over an argument of mode MODE
5591 and data type TYPE. (TYPE is null for libcalls where that information
5592 may not be available.) */
5593
5594 static void
5595 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5596 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5597 {
5598 switch (mode)
5599 {
5600 default:
5601 break;
5602
5603 case BLKmode:
5604 if (bytes < 0)
5605 break;
5606 /* FALLTHRU */
5607
5608 case DImode:
5609 case SImode:
5610 case HImode:
5611 case QImode:
5612 cum->words += words;
5613 cum->nregs -= words;
5614 cum->regno += words;
5615
5616 if (cum->nregs <= 0)
5617 {
5618 cum->nregs = 0;
5619 cum->regno = 0;
5620 }
5621 break;
5622
5623 case OImode:
5624 /* OImode shouldn't be used directly. */
5625 gcc_unreachable ();
5626
5627 case DFmode:
5628 if (cum->float_in_sse < 2)
5629 break;
5630 case SFmode:
5631 if (cum->float_in_sse < 1)
5632 break;
5633 /* FALLTHRU */
5634
5635 case V8SFmode:
5636 case V8SImode:
5637 case V32QImode:
5638 case V16HImode:
5639 case V4DFmode:
5640 case V4DImode:
5641 case TImode:
5642 case V16QImode:
5643 case V8HImode:
5644 case V4SImode:
5645 case V2DImode:
5646 case V4SFmode:
5647 case V2DFmode:
5648 if (!type || !AGGREGATE_TYPE_P (type))
5649 {
5650 cum->sse_words += words;
5651 cum->sse_nregs -= 1;
5652 cum->sse_regno += 1;
5653 if (cum->sse_nregs <= 0)
5654 {
5655 cum->sse_nregs = 0;
5656 cum->sse_regno = 0;
5657 }
5658 }
5659 break;
5660
5661 case V8QImode:
5662 case V4HImode:
5663 case V2SImode:
5664 case V2SFmode:
5665 case V1DImode:
5666 if (!type || !AGGREGATE_TYPE_P (type))
5667 {
5668 cum->mmx_words += words;
5669 cum->mmx_nregs -= 1;
5670 cum->mmx_regno += 1;
5671 if (cum->mmx_nregs <= 0)
5672 {
5673 cum->mmx_nregs = 0;
5674 cum->mmx_regno = 0;
5675 }
5676 }
5677 break;
5678 }
5679 }
5680
5681 static void
5682 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5683 tree type, HOST_WIDE_INT words, int named)
5684 {
5685 int int_nregs, sse_nregs;
5686
5687 /* Unnamed 256bit vector mode parameters are passed on stack. */
5688 if (!named && VALID_AVX256_REG_MODE (mode))
5689 return;
5690
5691 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
5692 cum->words += words;
5693 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
5694 {
5695 cum->nregs -= int_nregs;
5696 cum->sse_nregs -= sse_nregs;
5697 cum->regno += int_nregs;
5698 cum->sse_regno += sse_nregs;
5699 }
5700 else
5701 cum->words += words;
5702 }
5703
5704 static void
5705 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
5706 HOST_WIDE_INT words)
5707 {
5708 /* Otherwise, this should be passed indirect. */
5709 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
5710
5711 cum->words += words;
5712 if (cum->nregs > 0)
5713 {
5714 cum->nregs -= 1;
5715 cum->regno += 1;
5716 }
5717 }
5718
5719 void
5720 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5721 tree type, int named)
5722 {
5723 HOST_WIDE_INT bytes, words;
5724
5725 if (mode == BLKmode)
5726 bytes = int_size_in_bytes (type);
5727 else
5728 bytes = GET_MODE_SIZE (mode);
5729 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5730
5731 if (type)
5732 mode = type_natural_mode (type, NULL);
5733
5734 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
5735 function_arg_advance_ms_64 (cum, bytes, words);
5736 else if (TARGET_64BIT)
5737 function_arg_advance_64 (cum, mode, type, words, named);
5738 else
5739 function_arg_advance_32 (cum, mode, type, bytes, words);
5740 }
5741
5742 /* Define where to put the arguments to a function.
5743 Value is zero to push the argument on the stack,
5744 or a hard register in which to store the argument.
5745
5746 MODE is the argument's machine mode.
5747 TYPE is the data type of the argument (as a tree).
5748 This is null for libcalls where that information may
5749 not be available.
5750 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5751 the preceding args and about the function being called.
5752 NAMED is nonzero if this argument is a named parameter
5753 (otherwise it is an extra parameter matching an ellipsis). */
5754
5755 static rtx
5756 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5757 enum machine_mode orig_mode, tree type,
5758 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5759 {
5760 static bool warnedsse, warnedmmx;
5761
5762 /* Avoid the AL settings for the Unix64 ABI. */
5763 if (mode == VOIDmode)
5764 return constm1_rtx;
5765
5766 switch (mode)
5767 {
5768 default:
5769 break;
5770
5771 case BLKmode:
5772 if (bytes < 0)
5773 break;
5774 /* FALLTHRU */
5775 case DImode:
5776 case SImode:
5777 case HImode:
5778 case QImode:
5779 if (words <= cum->nregs)
5780 {
5781 int regno = cum->regno;
5782
5783 /* Fastcall allocates the first two DWORD (SImode) or
5784 smaller arguments to ECX and EDX if it isn't an
5785 aggregate type . */
5786 if (cum->fastcall)
5787 {
5788 if (mode == BLKmode
5789 || mode == DImode
5790 || (type && AGGREGATE_TYPE_P (type)))
5791 break;
5792
5793 /* ECX not EAX is the first allocated register. */
5794 if (regno == AX_REG)
5795 regno = CX_REG;
5796 }
5797 return gen_rtx_REG (mode, regno);
5798 }
5799 break;
5800
5801 case DFmode:
5802 if (cum->float_in_sse < 2)
5803 break;
5804 case SFmode:
5805 if (cum->float_in_sse < 1)
5806 break;
5807 /* FALLTHRU */
5808 case TImode:
5809 /* In 32bit, we pass TImode in xmm registers. */
5810 case V16QImode:
5811 case V8HImode:
5812 case V4SImode:
5813 case V2DImode:
5814 case V4SFmode:
5815 case V2DFmode:
5816 if (!type || !AGGREGATE_TYPE_P (type))
5817 {
5818 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
5819 {
5820 warnedsse = true;
5821 warning (0, "SSE vector argument without SSE enabled "
5822 "changes the ABI");
5823 }
5824 if (cum->sse_nregs)
5825 return gen_reg_or_parallel (mode, orig_mode,
5826 cum->sse_regno + FIRST_SSE_REG);
5827 }
5828 break;
5829
5830 case OImode:
5831 /* OImode shouldn't be used directly. */
5832 gcc_unreachable ();
5833
5834 case V8SFmode:
5835 case V8SImode:
5836 case V32QImode:
5837 case V16HImode:
5838 case V4DFmode:
5839 case V4DImode:
5840 if (!type || !AGGREGATE_TYPE_P (type))
5841 {
5842 if (cum->sse_nregs)
5843 return gen_reg_or_parallel (mode, orig_mode,
5844 cum->sse_regno + FIRST_SSE_REG);
5845 }
5846 break;
5847
5848 case V8QImode:
5849 case V4HImode:
5850 case V2SImode:
5851 case V2SFmode:
5852 case V1DImode:
5853 if (!type || !AGGREGATE_TYPE_P (type))
5854 {
5855 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
5856 {
5857 warnedmmx = true;
5858 warning (0, "MMX vector argument without MMX enabled "
5859 "changes the ABI");
5860 }
5861 if (cum->mmx_nregs)
5862 return gen_reg_or_parallel (mode, orig_mode,
5863 cum->mmx_regno + FIRST_MMX_REG);
5864 }
5865 break;
5866 }
5867
5868 return NULL_RTX;
5869 }
5870
5871 static rtx
5872 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5873 enum machine_mode orig_mode, tree type, int named)
5874 {
5875 /* Handle a hidden AL argument containing number of registers
5876 for varargs x86-64 functions. */
5877 if (mode == VOIDmode)
5878 return GEN_INT (cum->maybe_vaarg
5879 ? (cum->sse_nregs < 0
5880 ? (cum->call_abi == ix86_abi
5881 ? SSE_REGPARM_MAX
5882 : (ix86_abi != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
5883 : X64_SSE_REGPARM_MAX))
5884 : cum->sse_regno)
5885 : -1);
5886
5887 switch (mode)
5888 {
5889 default:
5890 break;
5891
5892 case V8SFmode:
5893 case V8SImode:
5894 case V32QImode:
5895 case V16HImode:
5896 case V4DFmode:
5897 case V4DImode:
5898 /* Unnamed 256bit vector mode parameters are passed on stack. */
5899 if (!named)
5900 return NULL;
5901 break;
5902 }
5903
5904 return construct_container (mode, orig_mode, type, 0, cum->nregs,
5905 cum->sse_nregs,
5906 &x86_64_int_parameter_registers [cum->regno],
5907 cum->sse_regno);
5908 }
5909
5910 static rtx
5911 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5912 enum machine_mode orig_mode, int named,
5913 HOST_WIDE_INT bytes)
5914 {
5915 unsigned int regno;
5916
5917 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
5918 We use value of -2 to specify that current function call is MSABI. */
5919 if (mode == VOIDmode)
5920 return GEN_INT (-2);
5921
5922 /* If we've run out of registers, it goes on the stack. */
5923 if (cum->nregs == 0)
5924 return NULL_RTX;
5925
5926 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
5927
5928 /* Only floating point modes are passed in anything but integer regs. */
5929 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
5930 {
5931 if (named)
5932 regno = cum->regno + FIRST_SSE_REG;
5933 else
5934 {
5935 rtx t1, t2;
5936
5937 /* Unnamed floating parameters are passed in both the
5938 SSE and integer registers. */
5939 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
5940 t2 = gen_rtx_REG (mode, regno);
5941 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
5942 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
5943 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
5944 }
5945 }
5946 /* Handle aggregated types passed in register. */
5947 if (orig_mode == BLKmode)
5948 {
5949 if (bytes > 0 && bytes <= 8)
5950 mode = (bytes > 4 ? DImode : SImode);
5951 if (mode == BLKmode)
5952 mode = DImode;
5953 }
5954
5955 return gen_reg_or_parallel (mode, orig_mode, regno);
5956 }
5957
5958 rtx
5959 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
5960 tree type, int named)
5961 {
5962 enum machine_mode mode = omode;
5963 HOST_WIDE_INT bytes, words;
5964
5965 if (mode == BLKmode)
5966 bytes = int_size_in_bytes (type);
5967 else
5968 bytes = GET_MODE_SIZE (mode);
5969 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5970
5971 /* To simplify the code below, represent vector types with a vector mode
5972 even if MMX/SSE are not active. */
5973 if (type && TREE_CODE (type) == VECTOR_TYPE)
5974 mode = type_natural_mode (type, cum);
5975
5976 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
5977 return function_arg_ms_64 (cum, mode, omode, named, bytes);
5978 else if (TARGET_64BIT)
5979 return function_arg_64 (cum, mode, omode, type, named);
5980 else
5981 return function_arg_32 (cum, mode, omode, type, bytes, words);
5982 }
5983
5984 /* A C expression that indicates when an argument must be passed by
5985 reference. If nonzero for an argument, a copy of that argument is
5986 made in memory and a pointer to the argument is passed instead of
5987 the argument itself. The pointer is passed in whatever way is
5988 appropriate for passing a pointer to that type. */
5989
5990 static bool
5991 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
5992 enum machine_mode mode ATTRIBUTE_UNUSED,
5993 const_tree type, bool named ATTRIBUTE_UNUSED)
5994 {
5995 /* See Windows x64 Software Convention. */
5996 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
5997 {
5998 int msize = (int) GET_MODE_SIZE (mode);
5999 if (type)
6000 {
6001 /* Arrays are passed by reference. */
6002 if (TREE_CODE (type) == ARRAY_TYPE)
6003 return true;
6004
6005 if (AGGREGATE_TYPE_P (type))
6006 {
6007 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6008 are passed by reference. */
6009 msize = int_size_in_bytes (type);
6010 }
6011 }
6012
6013 /* __m128 is passed by reference. */
6014 switch (msize) {
6015 case 1: case 2: case 4: case 8:
6016 break;
6017 default:
6018 return true;
6019 }
6020 }
6021 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6022 return 1;
6023
6024 return 0;
6025 }
6026
6027 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
6028 ABI. */
6029 static bool
6030 contains_aligned_value_p (tree type)
6031 {
6032 enum machine_mode mode = TYPE_MODE (type);
6033 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6034 || mode == TDmode
6035 || mode == TFmode
6036 || mode == TCmode)
6037 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6038 return true;
6039 if (TYPE_ALIGN (type) < 128)
6040 return false;
6041
6042 if (AGGREGATE_TYPE_P (type))
6043 {
6044 /* Walk the aggregates recursively. */
6045 switch (TREE_CODE (type))
6046 {
6047 case RECORD_TYPE:
6048 case UNION_TYPE:
6049 case QUAL_UNION_TYPE:
6050 {
6051 tree field;
6052
6053 /* Walk all the structure fields. */
6054 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6055 {
6056 if (TREE_CODE (field) == FIELD_DECL
6057 && contains_aligned_value_p (TREE_TYPE (field)))
6058 return true;
6059 }
6060 break;
6061 }
6062
6063 case ARRAY_TYPE:
6064 /* Just for use if some languages passes arrays by value. */
6065 if (contains_aligned_value_p (TREE_TYPE (type)))
6066 return true;
6067 break;
6068
6069 default:
6070 gcc_unreachable ();
6071 }
6072 }
6073 return false;
6074 }
6075
6076 /* Gives the alignment boundary, in bits, of an argument with the
6077 specified mode and type. */
6078
6079 int
6080 ix86_function_arg_boundary (enum machine_mode mode, tree type)
6081 {
6082 int align;
6083 if (type)
6084 {
6085 /* Since canonical type is used for call, we convert it to
6086 canonical type if needed. */
6087 if (!TYPE_STRUCTURAL_EQUALITY_P (type))
6088 type = TYPE_CANONICAL (type);
6089 align = TYPE_ALIGN (type);
6090 }
6091 else
6092 align = GET_MODE_ALIGNMENT (mode);
6093 if (align < PARM_BOUNDARY)
6094 align = PARM_BOUNDARY;
6095 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6096 natural boundaries. */
6097 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6098 {
6099 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6100 make an exception for SSE modes since these require 128bit
6101 alignment.
6102
6103 The handling here differs from field_alignment. ICC aligns MMX
6104 arguments to 4 byte boundaries, while structure fields are aligned
6105 to 8 byte boundaries. */
6106 if (!type)
6107 {
6108 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6109 align = PARM_BOUNDARY;
6110 }
6111 else
6112 {
6113 if (!contains_aligned_value_p (type))
6114 align = PARM_BOUNDARY;
6115 }
6116 }
6117 if (align > BIGGEST_ALIGNMENT)
6118 align = BIGGEST_ALIGNMENT;
6119 return align;
6120 }
6121
6122 /* Return true if N is a possible register number of function value. */
6123
6124 bool
6125 ix86_function_value_regno_p (int regno)
6126 {
6127 switch (regno)
6128 {
6129 case 0:
6130 return true;
6131
6132 case FIRST_FLOAT_REG:
6133 /* TODO: The function should depend on current function ABI but
6134 builtins.c would need updating then. Therefore we use the
6135 default ABI. */
6136 if (TARGET_64BIT && ix86_abi == MS_ABI)
6137 return false;
6138 return TARGET_FLOAT_RETURNS_IN_80387;
6139
6140 case FIRST_SSE_REG:
6141 return TARGET_SSE;
6142
6143 case FIRST_MMX_REG:
6144 if (TARGET_MACHO || TARGET_64BIT)
6145 return false;
6146 return TARGET_MMX;
6147 }
6148
6149 return false;
6150 }
6151
6152 /* Define how to find the value returned by a function.
6153 VALTYPE is the data type of the value (as a tree).
6154 If the precise function being called is known, FUNC is its FUNCTION_DECL;
6155 otherwise, FUNC is 0. */
6156
6157 static rtx
6158 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6159 const_tree fntype, const_tree fn)
6160 {
6161 unsigned int regno;
6162
6163 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6164 we normally prevent this case when mmx is not available. However
6165 some ABIs may require the result to be returned like DImode. */
6166 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6167 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6168
6169 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
6170 we prevent this case when sse is not available. However some ABIs
6171 may require the result to be returned like integer TImode. */
6172 else if (mode == TImode
6173 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6174 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6175
6176 /* 32-byte vector modes in %ymm0. */
6177 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6178 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6179
6180 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
6181 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6182 regno = FIRST_FLOAT_REG;
6183 else
6184 /* Most things go in %eax. */
6185 regno = AX_REG;
6186
6187 /* Override FP return register with %xmm0 for local functions when
6188 SSE math is enabled or for functions with sseregparm attribute. */
6189 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6190 {
6191 int sse_level = ix86_function_sseregparm (fntype, fn, false);
6192 if ((sse_level >= 1 && mode == SFmode)
6193 || (sse_level == 2 && mode == DFmode))
6194 regno = FIRST_SSE_REG;
6195 }
6196
6197 /* OImode shouldn't be used directly. */
6198 gcc_assert (mode != OImode);
6199
6200 return gen_rtx_REG (orig_mode, regno);
6201 }
6202
6203 static rtx
6204 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6205 const_tree valtype)
6206 {
6207 rtx ret;
6208
6209 /* Handle libcalls, which don't provide a type node. */
6210 if (valtype == NULL)
6211 {
6212 switch (mode)
6213 {
6214 case SFmode:
6215 case SCmode:
6216 case DFmode:
6217 case DCmode:
6218 case TFmode:
6219 case SDmode:
6220 case DDmode:
6221 case TDmode:
6222 return gen_rtx_REG (mode, FIRST_SSE_REG);
6223 case XFmode:
6224 case XCmode:
6225 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6226 case TCmode:
6227 return NULL;
6228 default:
6229 return gen_rtx_REG (mode, AX_REG);
6230 }
6231 }
6232
6233 ret = construct_container (mode, orig_mode, valtype, 1,
6234 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6235 x86_64_int_return_registers, 0);
6236
6237 /* For zero sized structures, construct_container returns NULL, but we
6238 need to keep rest of compiler happy by returning meaningful value. */
6239 if (!ret)
6240 ret = gen_rtx_REG (orig_mode, AX_REG);
6241
6242 return ret;
6243 }
6244
6245 static rtx
6246 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6247 {
6248 unsigned int regno = AX_REG;
6249
6250 if (TARGET_SSE)
6251 {
6252 switch (GET_MODE_SIZE (mode))
6253 {
6254 case 16:
6255 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6256 && !COMPLEX_MODE_P (mode))
6257 regno = FIRST_SSE_REG;
6258 break;
6259 case 8:
6260 case 4:
6261 if (mode == SFmode || mode == DFmode)
6262 regno = FIRST_SSE_REG;
6263 break;
6264 default:
6265 break;
6266 }
6267 }
6268 return gen_rtx_REG (orig_mode, regno);
6269 }
6270
6271 static rtx
6272 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6273 enum machine_mode orig_mode, enum machine_mode mode)
6274 {
6275 const_tree fn, fntype;
6276
6277 fn = NULL_TREE;
6278 if (fntype_or_decl && DECL_P (fntype_or_decl))
6279 fn = fntype_or_decl;
6280 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6281
6282 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6283 return function_value_ms_64 (orig_mode, mode);
6284 else if (TARGET_64BIT)
6285 return function_value_64 (orig_mode, mode, valtype);
6286 else
6287 return function_value_32 (orig_mode, mode, fntype, fn);
6288 }
6289
6290 static rtx
6291 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6292 bool outgoing ATTRIBUTE_UNUSED)
6293 {
6294 enum machine_mode mode, orig_mode;
6295
6296 orig_mode = TYPE_MODE (valtype);
6297 mode = type_natural_mode (valtype, NULL);
6298 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6299 }
6300
6301 rtx
6302 ix86_libcall_value (enum machine_mode mode)
6303 {
6304 return ix86_function_value_1 (NULL, NULL, mode, mode);
6305 }
6306
6307 /* Return true iff type is returned in memory. */
6308
6309 static int ATTRIBUTE_UNUSED
6310 return_in_memory_32 (const_tree type, enum machine_mode mode)
6311 {
6312 HOST_WIDE_INT size;
6313
6314 if (mode == BLKmode)
6315 return 1;
6316
6317 size = int_size_in_bytes (type);
6318
6319 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6320 return 0;
6321
6322 if (VECTOR_MODE_P (mode) || mode == TImode)
6323 {
6324 /* User-created vectors small enough to fit in EAX. */
6325 if (size < 8)
6326 return 0;
6327
6328 /* MMX/3dNow values are returned in MM0,
6329 except when it doesn't exits. */
6330 if (size == 8)
6331 return (TARGET_MMX ? 0 : 1);
6332
6333 /* SSE values are returned in XMM0, except when it doesn't exist. */
6334 if (size == 16)
6335 return (TARGET_SSE ? 0 : 1);
6336
6337 /* AVX values are returned in YMM0, except when it doesn't exist. */
6338 if (size == 32)
6339 return TARGET_AVX ? 0 : 1;
6340 }
6341
6342 if (mode == XFmode)
6343 return 0;
6344
6345 if (size > 12)
6346 return 1;
6347
6348 /* OImode shouldn't be used directly. */
6349 gcc_assert (mode != OImode);
6350
6351 return 0;
6352 }
6353
6354 static int ATTRIBUTE_UNUSED
6355 return_in_memory_64 (const_tree type, enum machine_mode mode)
6356 {
6357 int needed_intregs, needed_sseregs;
6358 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6359 }
6360
6361 static int ATTRIBUTE_UNUSED
6362 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6363 {
6364 HOST_WIDE_INT size = int_size_in_bytes (type);
6365
6366 /* __m128 is returned in xmm0. */
6367 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6368 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6369 return 0;
6370
6371 /* Otherwise, the size must be exactly in [1248]. */
6372 return (size != 1 && size != 2 && size != 4 && size != 8);
6373 }
6374
6375 static bool
6376 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6377 {
6378 #ifdef SUBTARGET_RETURN_IN_MEMORY
6379 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6380 #else
6381 const enum machine_mode mode = type_natural_mode (type, NULL);
6382
6383 if (TARGET_64BIT)
6384 {
6385 if (ix86_function_type_abi (fntype) == MS_ABI)
6386 return return_in_memory_ms_64 (type, mode);
6387 else
6388 return return_in_memory_64 (type, mode);
6389 }
6390 else
6391 return return_in_memory_32 (type, mode);
6392 #endif
6393 }
6394
6395 /* Return false iff TYPE is returned in memory. This version is used
6396 on Solaris 10. It is similar to the generic ix86_return_in_memory,
6397 but differs notably in that when MMX is available, 8-byte vectors
6398 are returned in memory, rather than in MMX registers. */
6399
6400 bool
6401 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6402 {
6403 int size;
6404 enum machine_mode mode = type_natural_mode (type, NULL);
6405
6406 if (TARGET_64BIT)
6407 return return_in_memory_64 (type, mode);
6408
6409 if (mode == BLKmode)
6410 return 1;
6411
6412 size = int_size_in_bytes (type);
6413
6414 if (VECTOR_MODE_P (mode))
6415 {
6416 /* Return in memory only if MMX registers *are* available. This
6417 seems backwards, but it is consistent with the existing
6418 Solaris x86 ABI. */
6419 if (size == 8)
6420 return TARGET_MMX;
6421 if (size == 16)
6422 return !TARGET_SSE;
6423 }
6424 else if (mode == TImode)
6425 return !TARGET_SSE;
6426 else if (mode == XFmode)
6427 return 0;
6428
6429 return size > 12;
6430 }
6431
6432 /* When returning SSE vector types, we have a choice of either
6433 (1) being abi incompatible with a -march switch, or
6434 (2) generating an error.
6435 Given no good solution, I think the safest thing is one warning.
6436 The user won't be able to use -Werror, but....
6437
6438 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6439 called in response to actually generating a caller or callee that
6440 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6441 via aggregate_value_p for general type probing from tree-ssa. */
6442
6443 static rtx
6444 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6445 {
6446 static bool warnedsse, warnedmmx;
6447
6448 if (!TARGET_64BIT && type)
6449 {
6450 /* Look at the return type of the function, not the function type. */
6451 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6452
6453 if (!TARGET_SSE && !warnedsse)
6454 {
6455 if (mode == TImode
6456 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6457 {
6458 warnedsse = true;
6459 warning (0, "SSE vector return without SSE enabled "
6460 "changes the ABI");
6461 }
6462 }
6463
6464 if (!TARGET_MMX && !warnedmmx)
6465 {
6466 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6467 {
6468 warnedmmx = true;
6469 warning (0, "MMX vector return without MMX enabled "
6470 "changes the ABI");
6471 }
6472 }
6473 }
6474
6475 return NULL;
6476 }
6477
6478 \f
6479 /* Create the va_list data type. */
6480
6481 /* Returns the calling convention specific va_list date type.
6482 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6483
6484 static tree
6485 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6486 {
6487 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6488
6489 /* For i386 we use plain pointer to argument area. */
6490 if (!TARGET_64BIT || abi == MS_ABI)
6491 return build_pointer_type (char_type_node);
6492
6493 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6494 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
6495
6496 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
6497 unsigned_type_node);
6498 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
6499 unsigned_type_node);
6500 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
6501 ptr_type_node);
6502 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
6503 ptr_type_node);
6504
6505 va_list_gpr_counter_field = f_gpr;
6506 va_list_fpr_counter_field = f_fpr;
6507
6508 DECL_FIELD_CONTEXT (f_gpr) = record;
6509 DECL_FIELD_CONTEXT (f_fpr) = record;
6510 DECL_FIELD_CONTEXT (f_ovf) = record;
6511 DECL_FIELD_CONTEXT (f_sav) = record;
6512
6513 TREE_CHAIN (record) = type_decl;
6514 TYPE_NAME (record) = type_decl;
6515 TYPE_FIELDS (record) = f_gpr;
6516 TREE_CHAIN (f_gpr) = f_fpr;
6517 TREE_CHAIN (f_fpr) = f_ovf;
6518 TREE_CHAIN (f_ovf) = f_sav;
6519
6520 layout_type (record);
6521
6522 /* The correct type is an array type of one element. */
6523 return build_array_type (record, build_index_type (size_zero_node));
6524 }
6525
6526 /* Setup the builtin va_list data type and for 64-bit the additional
6527 calling convention specific va_list data types. */
6528
6529 static tree
6530 ix86_build_builtin_va_list (void)
6531 {
6532 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
6533
6534 /* Initialize abi specific va_list builtin types. */
6535 if (TARGET_64BIT)
6536 {
6537 tree t;
6538 if (ix86_abi == MS_ABI)
6539 {
6540 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
6541 if (TREE_CODE (t) != RECORD_TYPE)
6542 t = build_variant_type_copy (t);
6543 sysv_va_list_type_node = t;
6544 }
6545 else
6546 {
6547 t = ret;
6548 if (TREE_CODE (t) != RECORD_TYPE)
6549 t = build_variant_type_copy (t);
6550 sysv_va_list_type_node = t;
6551 }
6552 if (ix86_abi != MS_ABI)
6553 {
6554 t = ix86_build_builtin_va_list_abi (MS_ABI);
6555 if (TREE_CODE (t) != RECORD_TYPE)
6556 t = build_variant_type_copy (t);
6557 ms_va_list_type_node = t;
6558 }
6559 else
6560 {
6561 t = ret;
6562 if (TREE_CODE (t) != RECORD_TYPE)
6563 t = build_variant_type_copy (t);
6564 ms_va_list_type_node = t;
6565 }
6566 }
6567
6568 return ret;
6569 }
6570
6571 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
6572
6573 static void
6574 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
6575 {
6576 rtx save_area, mem;
6577 rtx label;
6578 rtx label_ref;
6579 rtx tmp_reg;
6580 rtx nsse_reg;
6581 alias_set_type set;
6582 int i;
6583 int regparm = ix86_regparm;
6584
6585 if (cum->call_abi != ix86_abi)
6586 regparm = ix86_abi != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
6587
6588 /* GPR size of varargs save area. */
6589 if (cfun->va_list_gpr_size)
6590 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
6591 else
6592 ix86_varargs_gpr_size = 0;
6593
6594 /* FPR size of varargs save area. We don't need it if we don't pass
6595 anything in SSE registers. */
6596 if (cum->sse_nregs && cfun->va_list_fpr_size)
6597 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
6598 else
6599 ix86_varargs_fpr_size = 0;
6600
6601 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
6602 return;
6603
6604 save_area = frame_pointer_rtx;
6605 set = get_varargs_alias_set ();
6606
6607 for (i = cum->regno;
6608 i < regparm
6609 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
6610 i++)
6611 {
6612 mem = gen_rtx_MEM (Pmode,
6613 plus_constant (save_area, i * UNITS_PER_WORD));
6614 MEM_NOTRAP_P (mem) = 1;
6615 set_mem_alias_set (mem, set);
6616 emit_move_insn (mem, gen_rtx_REG (Pmode,
6617 x86_64_int_parameter_registers[i]));
6618 }
6619
6620 if (ix86_varargs_fpr_size)
6621 {
6622 /* Now emit code to save SSE registers. The AX parameter contains number
6623 of SSE parameter registers used to call this function. We use
6624 sse_prologue_save insn template that produces computed jump across
6625 SSE saves. We need some preparation work to get this working. */
6626
6627 label = gen_label_rtx ();
6628 label_ref = gen_rtx_LABEL_REF (Pmode, label);
6629
6630 /* Compute address to jump to :
6631 label - eax*4 + nnamed_sse_arguments*4 Or
6632 label - eax*5 + nnamed_sse_arguments*5 for AVX. */
6633 tmp_reg = gen_reg_rtx (Pmode);
6634 nsse_reg = gen_reg_rtx (Pmode);
6635 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
6636 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6637 gen_rtx_MULT (Pmode, nsse_reg,
6638 GEN_INT (4))));
6639
6640 /* vmovaps is one byte longer than movaps. */
6641 if (TARGET_AVX)
6642 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6643 gen_rtx_PLUS (Pmode, tmp_reg,
6644 nsse_reg)));
6645
6646 if (cum->sse_regno)
6647 emit_move_insn
6648 (nsse_reg,
6649 gen_rtx_CONST (DImode,
6650 gen_rtx_PLUS (DImode,
6651 label_ref,
6652 GEN_INT (cum->sse_regno
6653 * (TARGET_AVX ? 5 : 4)))));
6654 else
6655 emit_move_insn (nsse_reg, label_ref);
6656 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
6657
6658 /* Compute address of memory block we save into. We always use pointer
6659 pointing 127 bytes after first byte to store - this is needed to keep
6660 instruction size limited by 4 bytes (5 bytes for AVX) with one
6661 byte displacement. */
6662 tmp_reg = gen_reg_rtx (Pmode);
6663 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6664 plus_constant (save_area,
6665 ix86_varargs_gpr_size + 127)));
6666 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
6667 MEM_NOTRAP_P (mem) = 1;
6668 set_mem_alias_set (mem, set);
6669 set_mem_align (mem, BITS_PER_WORD);
6670
6671 /* And finally do the dirty job! */
6672 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
6673 GEN_INT (cum->sse_regno), label));
6674 }
6675 }
6676
6677 static void
6678 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
6679 {
6680 alias_set_type set = get_varargs_alias_set ();
6681 int i;
6682
6683 for (i = cum->regno; i < X64_REGPARM_MAX; i++)
6684 {
6685 rtx reg, mem;
6686
6687 mem = gen_rtx_MEM (Pmode,
6688 plus_constant (virtual_incoming_args_rtx,
6689 i * UNITS_PER_WORD));
6690 MEM_NOTRAP_P (mem) = 1;
6691 set_mem_alias_set (mem, set);
6692
6693 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
6694 emit_move_insn (mem, reg);
6695 }
6696 }
6697
6698 static void
6699 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6700 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6701 int no_rtl)
6702 {
6703 CUMULATIVE_ARGS next_cum;
6704 tree fntype;
6705
6706 /* This argument doesn't appear to be used anymore. Which is good,
6707 because the old code here didn't suppress rtl generation. */
6708 gcc_assert (!no_rtl);
6709
6710 if (!TARGET_64BIT)
6711 return;
6712
6713 fntype = TREE_TYPE (current_function_decl);
6714
6715 /* For varargs, we do not want to skip the dummy va_dcl argument.
6716 For stdargs, we do want to skip the last named argument. */
6717 next_cum = *cum;
6718 if (stdarg_p (fntype))
6719 function_arg_advance (&next_cum, mode, type, 1);
6720
6721 if (cum->call_abi == MS_ABI)
6722 setup_incoming_varargs_ms_64 (&next_cum);
6723 else
6724 setup_incoming_varargs_64 (&next_cum);
6725 }
6726
6727 /* Checks if TYPE is of kind va_list char *. */
6728
6729 static bool
6730 is_va_list_char_pointer (tree type)
6731 {
6732 tree canonic;
6733
6734 /* For 32-bit it is always true. */
6735 if (!TARGET_64BIT)
6736 return true;
6737 canonic = ix86_canonical_va_list_type (type);
6738 return (canonic == ms_va_list_type_node
6739 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
6740 }
6741
6742 /* Implement va_start. */
6743
6744 static void
6745 ix86_va_start (tree valist, rtx nextarg)
6746 {
6747 HOST_WIDE_INT words, n_gpr, n_fpr;
6748 tree f_gpr, f_fpr, f_ovf, f_sav;
6749 tree gpr, fpr, ovf, sav, t;
6750 tree type;
6751
6752 /* Only 64bit target needs something special. */
6753 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6754 {
6755 std_expand_builtin_va_start (valist, nextarg);
6756 return;
6757 }
6758
6759 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6760 f_fpr = TREE_CHAIN (f_gpr);
6761 f_ovf = TREE_CHAIN (f_fpr);
6762 f_sav = TREE_CHAIN (f_ovf);
6763
6764 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
6765 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
6766 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6767 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6768 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6769
6770 /* Count number of gp and fp argument registers used. */
6771 words = crtl->args.info.words;
6772 n_gpr = crtl->args.info.regno;
6773 n_fpr = crtl->args.info.sse_regno;
6774
6775 if (cfun->va_list_gpr_size)
6776 {
6777 type = TREE_TYPE (gpr);
6778 t = build2 (MODIFY_EXPR, type,
6779 gpr, build_int_cst (type, n_gpr * 8));
6780 TREE_SIDE_EFFECTS (t) = 1;
6781 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6782 }
6783
6784 if (TARGET_SSE && cfun->va_list_fpr_size)
6785 {
6786 type = TREE_TYPE (fpr);
6787 t = build2 (MODIFY_EXPR, type, fpr,
6788 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
6789 TREE_SIDE_EFFECTS (t) = 1;
6790 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6791 }
6792
6793 /* Find the overflow area. */
6794 type = TREE_TYPE (ovf);
6795 t = make_tree (type, crtl->args.internal_arg_pointer);
6796 if (words != 0)
6797 t = build2 (POINTER_PLUS_EXPR, type, t,
6798 size_int (words * UNITS_PER_WORD));
6799 t = build2 (MODIFY_EXPR, type, ovf, t);
6800 TREE_SIDE_EFFECTS (t) = 1;
6801 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6802
6803 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
6804 {
6805 /* Find the register save area.
6806 Prologue of the function save it right above stack frame. */
6807 type = TREE_TYPE (sav);
6808 t = make_tree (type, frame_pointer_rtx);
6809 if (!ix86_varargs_gpr_size)
6810 t = build2 (POINTER_PLUS_EXPR, type, t,
6811 size_int (-8 * X86_64_REGPARM_MAX));
6812 t = build2 (MODIFY_EXPR, type, sav, t);
6813 TREE_SIDE_EFFECTS (t) = 1;
6814 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6815 }
6816 }
6817
6818 /* Implement va_arg. */
6819
6820 static tree
6821 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6822 gimple_seq *post_p)
6823 {
6824 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
6825 tree f_gpr, f_fpr, f_ovf, f_sav;
6826 tree gpr, fpr, ovf, sav, t;
6827 int size, rsize;
6828 tree lab_false, lab_over = NULL_TREE;
6829 tree addr, t2;
6830 rtx container;
6831 int indirect_p = 0;
6832 tree ptrtype;
6833 enum machine_mode nat_mode;
6834 int arg_boundary;
6835
6836 /* Only 64bit target needs something special. */
6837 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6838 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6839
6840 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6841 f_fpr = TREE_CHAIN (f_gpr);
6842 f_ovf = TREE_CHAIN (f_fpr);
6843 f_sav = TREE_CHAIN (f_ovf);
6844
6845 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
6846 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
6847 valist = build_va_arg_indirect_ref (valist);
6848 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6849 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6850 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6851
6852 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6853 if (indirect_p)
6854 type = build_pointer_type (type);
6855 size = int_size_in_bytes (type);
6856 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6857
6858 nat_mode = type_natural_mode (type, NULL);
6859 switch (nat_mode)
6860 {
6861 case V8SFmode:
6862 case V8SImode:
6863 case V32QImode:
6864 case V16HImode:
6865 case V4DFmode:
6866 case V4DImode:
6867 /* Unnamed 256bit vector mode parameters are passed on stack. */
6868 if (ix86_cfun_abi () == SYSV_ABI)
6869 {
6870 container = NULL;
6871 break;
6872 }
6873
6874 default:
6875 container = construct_container (nat_mode, TYPE_MODE (type),
6876 type, 0, X86_64_REGPARM_MAX,
6877 X86_64_SSE_REGPARM_MAX, intreg,
6878 0);
6879 break;
6880 }
6881
6882 /* Pull the value out of the saved registers. */
6883
6884 addr = create_tmp_var (ptr_type_node, "addr");
6885 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
6886
6887 if (container)
6888 {
6889 int needed_intregs, needed_sseregs;
6890 bool need_temp;
6891 tree int_addr, sse_addr;
6892
6893 lab_false = create_artificial_label ();
6894 lab_over = create_artificial_label ();
6895
6896 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
6897
6898 need_temp = (!REG_P (container)
6899 && ((needed_intregs && TYPE_ALIGN (type) > 64)
6900 || TYPE_ALIGN (type) > 128));
6901
6902 /* In case we are passing structure, verify that it is consecutive block
6903 on the register save area. If not we need to do moves. */
6904 if (!need_temp && !REG_P (container))
6905 {
6906 /* Verify that all registers are strictly consecutive */
6907 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
6908 {
6909 int i;
6910
6911 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6912 {
6913 rtx slot = XVECEXP (container, 0, i);
6914 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
6915 || INTVAL (XEXP (slot, 1)) != i * 16)
6916 need_temp = 1;
6917 }
6918 }
6919 else
6920 {
6921 int i;
6922
6923 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6924 {
6925 rtx slot = XVECEXP (container, 0, i);
6926 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
6927 || INTVAL (XEXP (slot, 1)) != i * 8)
6928 need_temp = 1;
6929 }
6930 }
6931 }
6932 if (!need_temp)
6933 {
6934 int_addr = addr;
6935 sse_addr = addr;
6936 }
6937 else
6938 {
6939 int_addr = create_tmp_var (ptr_type_node, "int_addr");
6940 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
6941 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
6942 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
6943 }
6944
6945 /* First ensure that we fit completely in registers. */
6946 if (needed_intregs)
6947 {
6948 t = build_int_cst (TREE_TYPE (gpr),
6949 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
6950 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
6951 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6952 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6953 gimplify_and_add (t, pre_p);
6954 }
6955 if (needed_sseregs)
6956 {
6957 t = build_int_cst (TREE_TYPE (fpr),
6958 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
6959 + X86_64_REGPARM_MAX * 8);
6960 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
6961 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6962 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6963 gimplify_and_add (t, pre_p);
6964 }
6965
6966 /* Compute index to start of area used for integer regs. */
6967 if (needed_intregs)
6968 {
6969 /* int_addr = gpr + sav; */
6970 t = fold_convert (sizetype, gpr);
6971 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6972 gimplify_assign (int_addr, t, pre_p);
6973 }
6974 if (needed_sseregs)
6975 {
6976 /* sse_addr = fpr + sav; */
6977 t = fold_convert (sizetype, fpr);
6978 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6979 gimplify_assign (sse_addr, t, pre_p);
6980 }
6981 if (need_temp)
6982 {
6983 int i;
6984 tree temp = create_tmp_var (type, "va_arg_tmp");
6985
6986 /* addr = &temp; */
6987 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
6988 gimplify_assign (addr, t, pre_p);
6989
6990 for (i = 0; i < XVECLEN (container, 0); i++)
6991 {
6992 rtx slot = XVECEXP (container, 0, i);
6993 rtx reg = XEXP (slot, 0);
6994 enum machine_mode mode = GET_MODE (reg);
6995 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
6996 tree addr_type = build_pointer_type (piece_type);
6997 tree daddr_type = build_pointer_type_for_mode (piece_type,
6998 ptr_mode, true);
6999 tree src_addr, src;
7000 int src_offset;
7001 tree dest_addr, dest;
7002
7003 if (SSE_REGNO_P (REGNO (reg)))
7004 {
7005 src_addr = sse_addr;
7006 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7007 }
7008 else
7009 {
7010 src_addr = int_addr;
7011 src_offset = REGNO (reg) * 8;
7012 }
7013 src_addr = fold_convert (addr_type, src_addr);
7014 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
7015 size_int (src_offset));
7016 src = build_va_arg_indirect_ref (src_addr);
7017
7018 dest_addr = fold_convert (daddr_type, addr);
7019 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
7020 size_int (INTVAL (XEXP (slot, 1))));
7021 dest = build_va_arg_indirect_ref (dest_addr);
7022
7023 gimplify_assign (dest, src, pre_p);
7024 }
7025 }
7026
7027 if (needed_intregs)
7028 {
7029 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7030 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7031 gimplify_assign (gpr, t, pre_p);
7032 }
7033
7034 if (needed_sseregs)
7035 {
7036 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7037 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7038 gimplify_assign (fpr, t, pre_p);
7039 }
7040
7041 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7042
7043 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7044 }
7045
7046 /* ... otherwise out of the overflow area. */
7047
7048 /* When we align parameter on stack for caller, if the parameter
7049 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7050 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7051 here with caller. */
7052 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
7053 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7054 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7055
7056 /* Care for on-stack alignment if needed. */
7057 if (arg_boundary <= 64
7058 || integer_zerop (TYPE_SIZE (type)))
7059 t = ovf;
7060 else
7061 {
7062 HOST_WIDE_INT align = arg_boundary / 8;
7063 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
7064 size_int (align - 1));
7065 t = fold_convert (sizetype, t);
7066 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7067 size_int (-align));
7068 t = fold_convert (TREE_TYPE (ovf), t);
7069 }
7070 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7071 gimplify_assign (addr, t, pre_p);
7072
7073 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
7074 size_int (rsize * UNITS_PER_WORD));
7075 gimplify_assign (unshare_expr (ovf), t, pre_p);
7076
7077 if (container)
7078 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7079
7080 ptrtype = build_pointer_type (type);
7081 addr = fold_convert (ptrtype, addr);
7082
7083 if (indirect_p)
7084 addr = build_va_arg_indirect_ref (addr);
7085 return build_va_arg_indirect_ref (addr);
7086 }
7087 \f
7088 /* Return nonzero if OPNUM's MEM should be matched
7089 in movabs* patterns. */
7090
7091 int
7092 ix86_check_movabs (rtx insn, int opnum)
7093 {
7094 rtx set, mem;
7095
7096 set = PATTERN (insn);
7097 if (GET_CODE (set) == PARALLEL)
7098 set = XVECEXP (set, 0, 0);
7099 gcc_assert (GET_CODE (set) == SET);
7100 mem = XEXP (set, opnum);
7101 while (GET_CODE (mem) == SUBREG)
7102 mem = SUBREG_REG (mem);
7103 gcc_assert (MEM_P (mem));
7104 return (volatile_ok || !MEM_VOLATILE_P (mem));
7105 }
7106 \f
7107 /* Initialize the table of extra 80387 mathematical constants. */
7108
7109 static void
7110 init_ext_80387_constants (void)
7111 {
7112 static const char * cst[5] =
7113 {
7114 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
7115 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
7116 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
7117 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
7118 "3.1415926535897932385128089594061862044", /* 4: fldpi */
7119 };
7120 int i;
7121
7122 for (i = 0; i < 5; i++)
7123 {
7124 real_from_string (&ext_80387_constants_table[i], cst[i]);
7125 /* Ensure each constant is rounded to XFmode precision. */
7126 real_convert (&ext_80387_constants_table[i],
7127 XFmode, &ext_80387_constants_table[i]);
7128 }
7129
7130 ext_80387_constants_init = 1;
7131 }
7132
7133 /* Return true if the constant is something that can be loaded with
7134 a special instruction. */
7135
7136 int
7137 standard_80387_constant_p (rtx x)
7138 {
7139 enum machine_mode mode = GET_MODE (x);
7140
7141 REAL_VALUE_TYPE r;
7142
7143 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7144 return -1;
7145
7146 if (x == CONST0_RTX (mode))
7147 return 1;
7148 if (x == CONST1_RTX (mode))
7149 return 2;
7150
7151 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7152
7153 /* For XFmode constants, try to find a special 80387 instruction when
7154 optimizing for size or on those CPUs that benefit from them. */
7155 if (mode == XFmode
7156 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7157 {
7158 int i;
7159
7160 if (! ext_80387_constants_init)
7161 init_ext_80387_constants ();
7162
7163 for (i = 0; i < 5; i++)
7164 if (real_identical (&r, &ext_80387_constants_table[i]))
7165 return i + 3;
7166 }
7167
7168 /* Load of the constant -0.0 or -1.0 will be split as
7169 fldz;fchs or fld1;fchs sequence. */
7170 if (real_isnegzero (&r))
7171 return 8;
7172 if (real_identical (&r, &dconstm1))
7173 return 9;
7174
7175 return 0;
7176 }
7177
7178 /* Return the opcode of the special instruction to be used to load
7179 the constant X. */
7180
7181 const char *
7182 standard_80387_constant_opcode (rtx x)
7183 {
7184 switch (standard_80387_constant_p (x))
7185 {
7186 case 1:
7187 return "fldz";
7188 case 2:
7189 return "fld1";
7190 case 3:
7191 return "fldlg2";
7192 case 4:
7193 return "fldln2";
7194 case 5:
7195 return "fldl2e";
7196 case 6:
7197 return "fldl2t";
7198 case 7:
7199 return "fldpi";
7200 case 8:
7201 case 9:
7202 return "#";
7203 default:
7204 gcc_unreachable ();
7205 }
7206 }
7207
7208 /* Return the CONST_DOUBLE representing the 80387 constant that is
7209 loaded by the specified special instruction. The argument IDX
7210 matches the return value from standard_80387_constant_p. */
7211
7212 rtx
7213 standard_80387_constant_rtx (int idx)
7214 {
7215 int i;
7216
7217 if (! ext_80387_constants_init)
7218 init_ext_80387_constants ();
7219
7220 switch (idx)
7221 {
7222 case 3:
7223 case 4:
7224 case 5:
7225 case 6:
7226 case 7:
7227 i = idx - 3;
7228 break;
7229
7230 default:
7231 gcc_unreachable ();
7232 }
7233
7234 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7235 XFmode);
7236 }
7237
7238 /* Return 1 if mode is a valid mode for sse. */
7239 static int
7240 standard_sse_mode_p (enum machine_mode mode)
7241 {
7242 switch (mode)
7243 {
7244 case V16QImode:
7245 case V8HImode:
7246 case V4SImode:
7247 case V2DImode:
7248 case V4SFmode:
7249 case V2DFmode:
7250 return 1;
7251
7252 default:
7253 return 0;
7254 }
7255 }
7256
7257 /* Return 1 if X is all 0s. For all 1s, return 2 if X is in 128bit
7258 SSE modes and SSE2 is enabled, return 3 if X is in 256bit AVX
7259 modes and AVX is enabled. */
7260
7261 int
7262 standard_sse_constant_p (rtx x)
7263 {
7264 enum machine_mode mode = GET_MODE (x);
7265
7266 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7267 return 1;
7268 if (vector_all_ones_operand (x, mode))
7269 {
7270 if (standard_sse_mode_p (mode))
7271 return TARGET_SSE2 ? 2 : -2;
7272 else if (VALID_AVX256_REG_MODE (mode))
7273 return TARGET_AVX ? 3 : -3;
7274 }
7275
7276 return 0;
7277 }
7278
7279 /* Return the opcode of the special instruction to be used to load
7280 the constant X. */
7281
7282 const char *
7283 standard_sse_constant_opcode (rtx insn, rtx x)
7284 {
7285 switch (standard_sse_constant_p (x))
7286 {
7287 case 1:
7288 switch (get_attr_mode (insn))
7289 {
7290 case MODE_V4SF:
7291 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7292 case MODE_V2DF:
7293 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7294 case MODE_TI:
7295 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7296 case MODE_V8SF:
7297 return "vxorps\t%x0, %x0, %x0";
7298 case MODE_V4DF:
7299 return "vxorpd\t%x0, %x0, %x0";
7300 case MODE_OI:
7301 return "vpxor\t%x0, %x0, %x0";
7302 default:
7303 gcc_unreachable ();
7304 }
7305 case 2:
7306 if (TARGET_AVX)
7307 switch (get_attr_mode (insn))
7308 {
7309 case MODE_V4SF:
7310 case MODE_V2DF:
7311 case MODE_TI:
7312 return "vpcmpeqd\t%0, %0, %0";
7313 break;
7314 default:
7315 gcc_unreachable ();
7316 }
7317 else
7318 return "pcmpeqd\t%0, %0";
7319 }
7320 gcc_unreachable ();
7321 }
7322
7323 /* Returns 1 if OP contains a symbol reference */
7324
7325 int
7326 symbolic_reference_mentioned_p (rtx op)
7327 {
7328 const char *fmt;
7329 int i;
7330
7331 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7332 return 1;
7333
7334 fmt = GET_RTX_FORMAT (GET_CODE (op));
7335 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7336 {
7337 if (fmt[i] == 'E')
7338 {
7339 int j;
7340
7341 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7342 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7343 return 1;
7344 }
7345
7346 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7347 return 1;
7348 }
7349
7350 return 0;
7351 }
7352
7353 /* Return 1 if it is appropriate to emit `ret' instructions in the
7354 body of a function. Do this only if the epilogue is simple, needing a
7355 couple of insns. Prior to reloading, we can't tell how many registers
7356 must be saved, so return 0 then. Return 0 if there is no frame
7357 marker to de-allocate. */
7358
7359 int
7360 ix86_can_use_return_insn_p (void)
7361 {
7362 struct ix86_frame frame;
7363
7364 if (! reload_completed || frame_pointer_needed)
7365 return 0;
7366
7367 /* Don't allow more than 32 pop, since that's all we can do
7368 with one instruction. */
7369 if (crtl->args.pops_args
7370 && crtl->args.size >= 32768)
7371 return 0;
7372
7373 ix86_compute_frame_layout (&frame);
7374 return frame.to_allocate == 0 && (frame.nregs + frame.nsseregs) == 0;
7375 }
7376 \f
7377 /* Value should be nonzero if functions must have frame pointers.
7378 Zero means the frame pointer need not be set up (and parms may
7379 be accessed via the stack pointer) in functions that seem suitable. */
7380
7381 int
7382 ix86_frame_pointer_required (void)
7383 {
7384 /* If we accessed previous frames, then the generated code expects
7385 to be able to access the saved ebp value in our frame. */
7386 if (cfun->machine->accesses_prev_frame)
7387 return 1;
7388
7389 /* Several x86 os'es need a frame pointer for other reasons,
7390 usually pertaining to setjmp. */
7391 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7392 return 1;
7393
7394 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
7395 the frame pointer by default. Turn it back on now if we've not
7396 got a leaf function. */
7397 if (TARGET_OMIT_LEAF_FRAME_POINTER
7398 && (!current_function_is_leaf
7399 || ix86_current_function_calls_tls_descriptor))
7400 return 1;
7401
7402 if (crtl->profile)
7403 return 1;
7404
7405 return 0;
7406 }
7407
7408 /* Record that the current function accesses previous call frames. */
7409
7410 void
7411 ix86_setup_frame_addresses (void)
7412 {
7413 cfun->machine->accesses_prev_frame = 1;
7414 }
7415 \f
7416 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7417 # define USE_HIDDEN_LINKONCE 1
7418 #else
7419 # define USE_HIDDEN_LINKONCE 0
7420 #endif
7421
7422 static int pic_labels_used;
7423
7424 /* Fills in the label name that should be used for a pc thunk for
7425 the given register. */
7426
7427 static void
7428 get_pc_thunk_name (char name[32], unsigned int regno)
7429 {
7430 gcc_assert (!TARGET_64BIT);
7431
7432 if (USE_HIDDEN_LINKONCE)
7433 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7434 else
7435 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7436 }
7437
7438
7439 /* This function generates code for -fpic that loads %ebx with
7440 the return address of the caller and then returns. */
7441
7442 void
7443 ix86_file_end (void)
7444 {
7445 rtx xops[2];
7446 int regno;
7447
7448 for (regno = 0; regno < 8; ++regno)
7449 {
7450 char name[32];
7451
7452 if (! ((pic_labels_used >> regno) & 1))
7453 continue;
7454
7455 get_pc_thunk_name (name, regno);
7456
7457 #if TARGET_MACHO
7458 if (TARGET_MACHO)
7459 {
7460 switch_to_section (darwin_sections[text_coal_section]);
7461 fputs ("\t.weak_definition\t", asm_out_file);
7462 assemble_name (asm_out_file, name);
7463 fputs ("\n\t.private_extern\t", asm_out_file);
7464 assemble_name (asm_out_file, name);
7465 fputs ("\n", asm_out_file);
7466 ASM_OUTPUT_LABEL (asm_out_file, name);
7467 }
7468 else
7469 #endif
7470 if (USE_HIDDEN_LINKONCE)
7471 {
7472 tree decl;
7473
7474 decl = build_decl (FUNCTION_DECL, get_identifier (name),
7475 error_mark_node);
7476 TREE_PUBLIC (decl) = 1;
7477 TREE_STATIC (decl) = 1;
7478 DECL_ONE_ONLY (decl) = 1;
7479
7480 (*targetm.asm_out.unique_section) (decl, 0);
7481 switch_to_section (get_named_section (decl, NULL, 0));
7482
7483 (*targetm.asm_out.globalize_label) (asm_out_file, name);
7484 fputs ("\t.hidden\t", asm_out_file);
7485 assemble_name (asm_out_file, name);
7486 fputc ('\n', asm_out_file);
7487 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
7488 }
7489 else
7490 {
7491 switch_to_section (text_section);
7492 ASM_OUTPUT_LABEL (asm_out_file, name);
7493 }
7494
7495 xops[0] = gen_rtx_REG (Pmode, regno);
7496 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
7497 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
7498 output_asm_insn ("ret", xops);
7499 }
7500
7501 if (NEED_INDICATE_EXEC_STACK)
7502 file_end_indicate_exec_stack ();
7503 }
7504
7505 /* Emit code for the SET_GOT patterns. */
7506
7507 const char *
7508 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
7509 {
7510 rtx xops[3];
7511
7512 xops[0] = dest;
7513
7514 if (TARGET_VXWORKS_RTP && flag_pic)
7515 {
7516 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
7517 xops[2] = gen_rtx_MEM (Pmode,
7518 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
7519 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
7520
7521 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
7522 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
7523 an unadorned address. */
7524 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7525 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
7526 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
7527 return "";
7528 }
7529
7530 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
7531
7532 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
7533 {
7534 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
7535
7536 if (!flag_pic)
7537 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
7538 else
7539 output_asm_insn ("call\t%a2", xops);
7540
7541 #if TARGET_MACHO
7542 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7543 is what will be referenced by the Mach-O PIC subsystem. */
7544 if (!label)
7545 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7546 #endif
7547
7548 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7549 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
7550
7551 if (flag_pic)
7552 output_asm_insn ("pop%z0\t%0", xops);
7553 }
7554 else
7555 {
7556 char name[32];
7557 get_pc_thunk_name (name, REGNO (dest));
7558 pic_labels_used |= 1 << REGNO (dest);
7559
7560 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
7561 xops[2] = gen_rtx_MEM (QImode, xops[2]);
7562 output_asm_insn ("call\t%X2", xops);
7563 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7564 is what will be referenced by the Mach-O PIC subsystem. */
7565 #if TARGET_MACHO
7566 if (!label)
7567 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7568 else
7569 targetm.asm_out.internal_label (asm_out_file, "L",
7570 CODE_LABEL_NUMBER (label));
7571 #endif
7572 }
7573
7574 if (TARGET_MACHO)
7575 return "";
7576
7577 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
7578 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
7579 else
7580 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
7581
7582 return "";
7583 }
7584
7585 /* Generate an "push" pattern for input ARG. */
7586
7587 static rtx
7588 gen_push (rtx arg)
7589 {
7590 return gen_rtx_SET (VOIDmode,
7591 gen_rtx_MEM (Pmode,
7592 gen_rtx_PRE_DEC (Pmode,
7593 stack_pointer_rtx)),
7594 arg);
7595 }
7596
7597 /* Return >= 0 if there is an unused call-clobbered register available
7598 for the entire function. */
7599
7600 static unsigned int
7601 ix86_select_alt_pic_regnum (void)
7602 {
7603 if (current_function_is_leaf && !crtl->profile
7604 && !ix86_current_function_calls_tls_descriptor)
7605 {
7606 int i, drap;
7607 /* Can't use the same register for both PIC and DRAP. */
7608 if (crtl->drap_reg)
7609 drap = REGNO (crtl->drap_reg);
7610 else
7611 drap = -1;
7612 for (i = 2; i >= 0; --i)
7613 if (i != drap && !df_regs_ever_live_p (i))
7614 return i;
7615 }
7616
7617 return INVALID_REGNUM;
7618 }
7619
7620 /* Return 1 if we need to save REGNO. */
7621 static int
7622 ix86_save_reg (unsigned int regno, int maybe_eh_return)
7623 {
7624 if (pic_offset_table_rtx
7625 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
7626 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
7627 || crtl->profile
7628 || crtl->calls_eh_return
7629 || crtl->uses_const_pool))
7630 {
7631 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
7632 return 0;
7633 return 1;
7634 }
7635
7636 if (crtl->calls_eh_return && maybe_eh_return)
7637 {
7638 unsigned i;
7639 for (i = 0; ; i++)
7640 {
7641 unsigned test = EH_RETURN_DATA_REGNO (i);
7642 if (test == INVALID_REGNUM)
7643 break;
7644 if (test == regno)
7645 return 1;
7646 }
7647 }
7648
7649 if (crtl->drap_reg
7650 && regno == REGNO (crtl->drap_reg))
7651 return 1;
7652
7653 return (df_regs_ever_live_p (regno)
7654 && !call_used_regs[regno]
7655 && !fixed_regs[regno]
7656 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
7657 }
7658
7659 /* Return number of saved general prupose registers. */
7660
7661 static int
7662 ix86_nsaved_regs (void)
7663 {
7664 int nregs = 0;
7665 int regno;
7666
7667 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7668 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7669 nregs ++;
7670 return nregs;
7671 }
7672
7673 /* Return number of saved SSE registrers. */
7674
7675 static int
7676 ix86_nsaved_sseregs (void)
7677 {
7678 int nregs = 0;
7679 int regno;
7680
7681 if (ix86_cfun_abi () != MS_ABI)
7682 return 0;
7683 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7684 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7685 nregs ++;
7686 return nregs;
7687 }
7688
7689 /* Given FROM and TO register numbers, say whether this elimination is
7690 allowed. If stack alignment is needed, we can only replace argument
7691 pointer with hard frame pointer, or replace frame pointer with stack
7692 pointer. Otherwise, frame pointer elimination is automatically
7693 handled and all other eliminations are valid. */
7694
7695 int
7696 ix86_can_eliminate (int from, int to)
7697 {
7698 if (stack_realign_fp)
7699 return ((from == ARG_POINTER_REGNUM
7700 && to == HARD_FRAME_POINTER_REGNUM)
7701 || (from == FRAME_POINTER_REGNUM
7702 && to == STACK_POINTER_REGNUM));
7703 else
7704 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : 1;
7705 }
7706
7707 /* Return the offset between two registers, one to be eliminated, and the other
7708 its replacement, at the start of a routine. */
7709
7710 HOST_WIDE_INT
7711 ix86_initial_elimination_offset (int from, int to)
7712 {
7713 struct ix86_frame frame;
7714 ix86_compute_frame_layout (&frame);
7715
7716 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7717 return frame.hard_frame_pointer_offset;
7718 else if (from == FRAME_POINTER_REGNUM
7719 && to == HARD_FRAME_POINTER_REGNUM)
7720 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
7721 else
7722 {
7723 gcc_assert (to == STACK_POINTER_REGNUM);
7724
7725 if (from == ARG_POINTER_REGNUM)
7726 return frame.stack_pointer_offset;
7727
7728 gcc_assert (from == FRAME_POINTER_REGNUM);
7729 return frame.stack_pointer_offset - frame.frame_pointer_offset;
7730 }
7731 }
7732
7733 /* In a dynamically-aligned function, we can't know the offset from
7734 stack pointer to frame pointer, so we must ensure that setjmp
7735 eliminates fp against the hard fp (%ebp) rather than trying to
7736 index from %esp up to the top of the frame across a gap that is
7737 of unknown (at compile-time) size. */
7738 static rtx
7739 ix86_builtin_setjmp_frame_value (void)
7740 {
7741 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
7742 }
7743
7744 /* Fill structure ix86_frame about frame of currently computed function. */
7745
7746 static void
7747 ix86_compute_frame_layout (struct ix86_frame *frame)
7748 {
7749 HOST_WIDE_INT total_size;
7750 unsigned int stack_alignment_needed;
7751 HOST_WIDE_INT offset;
7752 unsigned int preferred_alignment;
7753 HOST_WIDE_INT size = get_frame_size ();
7754
7755 frame->nregs = ix86_nsaved_regs ();
7756 frame->nsseregs = ix86_nsaved_sseregs ();
7757 total_size = size;
7758
7759 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
7760 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
7761
7762 /* MS ABI seem to require stack alignment to be always 16 except for function
7763 prologues. */
7764 if (ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
7765 {
7766 preferred_alignment = 16;
7767 stack_alignment_needed = 16;
7768 crtl->preferred_stack_boundary = 128;
7769 crtl->stack_alignment_needed = 128;
7770 }
7771
7772 gcc_assert (!size || stack_alignment_needed);
7773 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
7774 gcc_assert (preferred_alignment <= stack_alignment_needed);
7775
7776 /* During reload iteration the amount of registers saved can change.
7777 Recompute the value as needed. Do not recompute when amount of registers
7778 didn't change as reload does multiple calls to the function and does not
7779 expect the decision to change within single iteration. */
7780 if (!optimize_function_for_size_p (cfun)
7781 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
7782 {
7783 int count = frame->nregs;
7784
7785 cfun->machine->use_fast_prologue_epilogue_nregs = count;
7786 /* The fast prologue uses move instead of push to save registers. This
7787 is significantly longer, but also executes faster as modern hardware
7788 can execute the moves in parallel, but can't do that for push/pop.
7789
7790 Be careful about choosing what prologue to emit: When function takes
7791 many instructions to execute we may use slow version as well as in
7792 case function is known to be outside hot spot (this is known with
7793 feedback only). Weight the size of function by number of registers
7794 to save as it is cheap to use one or two push instructions but very
7795 slow to use many of them. */
7796 if (count)
7797 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
7798 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
7799 || (flag_branch_probabilities
7800 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
7801 cfun->machine->use_fast_prologue_epilogue = false;
7802 else
7803 cfun->machine->use_fast_prologue_epilogue
7804 = !expensive_function_p (count);
7805 }
7806 if (TARGET_PROLOGUE_USING_MOVE
7807 && cfun->machine->use_fast_prologue_epilogue)
7808 frame->save_regs_using_mov = true;
7809 else
7810 frame->save_regs_using_mov = false;
7811
7812
7813 /* Skip return address and saved base pointer. */
7814 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
7815
7816 frame->hard_frame_pointer_offset = offset;
7817
7818 /* Set offset to aligned because the realigned frame starts from
7819 here. */
7820 if (stack_realign_fp)
7821 offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed;
7822
7823 /* Register save area */
7824 offset += frame->nregs * UNITS_PER_WORD;
7825
7826 /* Align SSE reg save area. */
7827 if (frame->nsseregs)
7828 frame->padding0 = ((offset + 16 - 1) & -16) - offset;
7829 else
7830 frame->padding0 = 0;
7831
7832 /* SSE register save area. */
7833 offset += frame->padding0 + frame->nsseregs * 16;
7834
7835 /* Va-arg area */
7836 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
7837 offset += frame->va_arg_size;
7838
7839 /* Align start of frame for local function. */
7840 frame->padding1 = ((offset + stack_alignment_needed - 1)
7841 & -stack_alignment_needed) - offset;
7842
7843 offset += frame->padding1;
7844
7845 /* Frame pointer points here. */
7846 frame->frame_pointer_offset = offset;
7847
7848 offset += size;
7849
7850 /* Add outgoing arguments area. Can be skipped if we eliminated
7851 all the function calls as dead code.
7852 Skipping is however impossible when function calls alloca. Alloca
7853 expander assumes that last crtl->outgoing_args_size
7854 of stack frame are unused. */
7855 if (ACCUMULATE_OUTGOING_ARGS
7856 && (!current_function_is_leaf || cfun->calls_alloca
7857 || ix86_current_function_calls_tls_descriptor))
7858 {
7859 offset += crtl->outgoing_args_size;
7860 frame->outgoing_arguments_size = crtl->outgoing_args_size;
7861 }
7862 else
7863 frame->outgoing_arguments_size = 0;
7864
7865 /* Align stack boundary. Only needed if we're calling another function
7866 or using alloca. */
7867 if (!current_function_is_leaf || cfun->calls_alloca
7868 || ix86_current_function_calls_tls_descriptor)
7869 frame->padding2 = ((offset + preferred_alignment - 1)
7870 & -preferred_alignment) - offset;
7871 else
7872 frame->padding2 = 0;
7873
7874 offset += frame->padding2;
7875
7876 /* We've reached end of stack frame. */
7877 frame->stack_pointer_offset = offset;
7878
7879 /* Size prologue needs to allocate. */
7880 frame->to_allocate =
7881 (size + frame->padding1 + frame->padding2
7882 + frame->outgoing_arguments_size + frame->va_arg_size);
7883
7884 if ((!frame->to_allocate && frame->nregs <= 1)
7885 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
7886 frame->save_regs_using_mov = false;
7887
7888 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && current_function_sp_is_unchanging
7889 && current_function_is_leaf
7890 && !ix86_current_function_calls_tls_descriptor)
7891 {
7892 frame->red_zone_size = frame->to_allocate;
7893 if (frame->save_regs_using_mov)
7894 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
7895 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
7896 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
7897 }
7898 else
7899 frame->red_zone_size = 0;
7900 frame->to_allocate -= frame->red_zone_size;
7901 frame->stack_pointer_offset -= frame->red_zone_size;
7902 #if 0
7903 fprintf (stderr, "\n");
7904 fprintf (stderr, "size: %ld\n", (long)size);
7905 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
7906 fprintf (stderr, "nsseregs: %ld\n", (long)frame->nsseregs);
7907 fprintf (stderr, "padding0: %ld\n", (long)frame->padding0);
7908 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
7909 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
7910 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
7911 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
7912 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
7913 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
7914 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
7915 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
7916 (long)frame->hard_frame_pointer_offset);
7917 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
7918 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
7919 fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
7920 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
7921 #endif
7922 }
7923
7924 /* Emit code to save registers in the prologue. */
7925
7926 static void
7927 ix86_emit_save_regs (void)
7928 {
7929 unsigned int regno;
7930 rtx insn;
7931
7932 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
7933 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7934 {
7935 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
7936 RTX_FRAME_RELATED_P (insn) = 1;
7937 }
7938 }
7939
7940 /* Emit code to save registers using MOV insns. First register
7941 is restored from POINTER + OFFSET. */
7942 static void
7943 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7944 {
7945 unsigned int regno;
7946 rtx insn;
7947
7948 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7949 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7950 {
7951 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
7952 Pmode, offset),
7953 gen_rtx_REG (Pmode, regno));
7954 RTX_FRAME_RELATED_P (insn) = 1;
7955 offset += UNITS_PER_WORD;
7956 }
7957 }
7958
7959 /* Emit code to save registers using MOV insns. First register
7960 is restored from POINTER + OFFSET. */
7961 static void
7962 ix86_emit_save_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7963 {
7964 unsigned int regno;
7965 rtx insn;
7966 rtx mem;
7967
7968 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7969 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7970 {
7971 mem = adjust_address (gen_rtx_MEM (TImode, pointer), TImode, offset);
7972 set_mem_align (mem, 128);
7973 insn = emit_move_insn (mem, gen_rtx_REG (TImode, regno));
7974 RTX_FRAME_RELATED_P (insn) = 1;
7975 offset += 16;
7976 }
7977 }
7978
7979 /* Expand prologue or epilogue stack adjustment.
7980 The pattern exist to put a dependency on all ebp-based memory accesses.
7981 STYLE should be negative if instructions should be marked as frame related,
7982 zero if %r11 register is live and cannot be freely used and positive
7983 otherwise. */
7984
7985 static void
7986 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
7987 {
7988 rtx insn;
7989
7990 if (! TARGET_64BIT)
7991 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
7992 else if (x86_64_immediate_operand (offset, DImode))
7993 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
7994 else
7995 {
7996 rtx r11;
7997 /* r11 is used by indirect sibcall return as well, set before the
7998 epilogue and used after the epilogue. ATM indirect sibcall
7999 shouldn't be used together with huge frame sizes in one
8000 function because of the frame_size check in sibcall.c. */
8001 gcc_assert (style);
8002 r11 = gen_rtx_REG (DImode, R11_REG);
8003 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
8004 if (style < 0)
8005 RTX_FRAME_RELATED_P (insn) = 1;
8006 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
8007 offset));
8008 }
8009 if (style < 0)
8010 RTX_FRAME_RELATED_P (insn) = 1;
8011 }
8012
8013 /* Find an available register to be used as dynamic realign argument
8014 pointer regsiter. Such a register will be written in prologue and
8015 used in begin of body, so it must not be
8016 1. parameter passing register.
8017 2. GOT pointer.
8018 We reuse static-chain register if it is available. Otherwise, we
8019 use DI for i386 and R13 for x86-64. We chose R13 since it has
8020 shorter encoding.
8021
8022 Return: the regno of chosen register. */
8023
8024 static unsigned int
8025 find_drap_reg (void)
8026 {
8027 tree decl = cfun->decl;
8028
8029 if (TARGET_64BIT)
8030 {
8031 /* Use R13 for nested function or function need static chain.
8032 Since function with tail call may use any caller-saved
8033 registers in epilogue, DRAP must not use caller-saved
8034 register in such case. */
8035 if ((decl_function_context (decl)
8036 && !DECL_NO_STATIC_CHAIN (decl))
8037 || crtl->tail_call_emit)
8038 return R13_REG;
8039
8040 return R10_REG;
8041 }
8042 else
8043 {
8044 /* Use DI for nested function or function need static chain.
8045 Since function with tail call may use any caller-saved
8046 registers in epilogue, DRAP must not use caller-saved
8047 register in such case. */
8048 if ((decl_function_context (decl)
8049 && !DECL_NO_STATIC_CHAIN (decl))
8050 || crtl->tail_call_emit)
8051 return DI_REG;
8052
8053 /* Reuse static chain register if it isn't used for parameter
8054 passing. */
8055 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
8056 && !lookup_attribute ("fastcall",
8057 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
8058 return CX_REG;
8059 else
8060 return DI_REG;
8061 }
8062 }
8063
8064 /* Update incoming stack boundary and estimated stack alignment. */
8065
8066 static void
8067 ix86_update_stack_boundary (void)
8068 {
8069 /* Prefer the one specified at command line. */
8070 ix86_incoming_stack_boundary
8071 = (ix86_user_incoming_stack_boundary
8072 ? ix86_user_incoming_stack_boundary
8073 : ix86_default_incoming_stack_boundary);
8074
8075 /* Incoming stack alignment can be changed on individual functions
8076 via force_align_arg_pointer attribute. We use the smallest
8077 incoming stack boundary. */
8078 if (ix86_incoming_stack_boundary > MIN_STACK_BOUNDARY
8079 && lookup_attribute (ix86_force_align_arg_pointer_string,
8080 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
8081 ix86_incoming_stack_boundary = MIN_STACK_BOUNDARY;
8082
8083 /* The incoming stack frame has to be aligned at least at
8084 parm_stack_boundary. */
8085 if (ix86_incoming_stack_boundary < crtl->parm_stack_boundary)
8086 ix86_incoming_stack_boundary = crtl->parm_stack_boundary;
8087
8088 /* Stack at entrance of main is aligned by runtime. We use the
8089 smallest incoming stack boundary. */
8090 if (ix86_incoming_stack_boundary > MAIN_STACK_BOUNDARY
8091 && DECL_NAME (current_function_decl)
8092 && MAIN_NAME_P (DECL_NAME (current_function_decl))
8093 && DECL_FILE_SCOPE_P (current_function_decl))
8094 ix86_incoming_stack_boundary = MAIN_STACK_BOUNDARY;
8095
8096 /* x86_64 vararg needs 16byte stack alignment for register save
8097 area. */
8098 if (TARGET_64BIT
8099 && cfun->stdarg
8100 && crtl->stack_alignment_estimated < 128)
8101 crtl->stack_alignment_estimated = 128;
8102 }
8103
8104 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
8105 needed or an rtx for DRAP otherwise. */
8106
8107 static rtx
8108 ix86_get_drap_rtx (void)
8109 {
8110 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
8111 crtl->need_drap = true;
8112
8113 if (stack_realign_drap)
8114 {
8115 /* Assign DRAP to vDRAP and returns vDRAP */
8116 unsigned int regno = find_drap_reg ();
8117 rtx drap_vreg;
8118 rtx arg_ptr;
8119 rtx seq, insn;
8120
8121 arg_ptr = gen_rtx_REG (Pmode, regno);
8122 crtl->drap_reg = arg_ptr;
8123
8124 start_sequence ();
8125 drap_vreg = copy_to_reg (arg_ptr);
8126 seq = get_insns ();
8127 end_sequence ();
8128
8129 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
8130 RTX_FRAME_RELATED_P (insn) = 1;
8131 return drap_vreg;
8132 }
8133 else
8134 return NULL;
8135 }
8136
8137 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
8138
8139 static rtx
8140 ix86_internal_arg_pointer (void)
8141 {
8142 return virtual_incoming_args_rtx;
8143 }
8144
8145 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
8146 This is called from dwarf2out.c to emit call frame instructions
8147 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
8148 static void
8149 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
8150 {
8151 rtx unspec = SET_SRC (pattern);
8152 gcc_assert (GET_CODE (unspec) == UNSPEC);
8153
8154 switch (index)
8155 {
8156 case UNSPEC_REG_SAVE:
8157 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
8158 SET_DEST (pattern));
8159 break;
8160 case UNSPEC_DEF_CFA:
8161 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
8162 INTVAL (XVECEXP (unspec, 0, 0)));
8163 break;
8164 default:
8165 gcc_unreachable ();
8166 }
8167 }
8168
8169 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
8170 to be generated in correct form. */
8171 static void
8172 ix86_finalize_stack_realign_flags (void)
8173 {
8174 /* Check if stack realign is really needed after reload, and
8175 stores result in cfun */
8176 unsigned int incoming_stack_boundary
8177 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
8178 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
8179 unsigned int stack_realign = (incoming_stack_boundary
8180 < (current_function_is_leaf
8181 ? crtl->max_used_stack_slot_alignment
8182 : crtl->stack_alignment_needed));
8183
8184 if (crtl->stack_realign_finalized)
8185 {
8186 /* After stack_realign_needed is finalized, we can't no longer
8187 change it. */
8188 gcc_assert (crtl->stack_realign_needed == stack_realign);
8189 }
8190 else
8191 {
8192 crtl->stack_realign_needed = stack_realign;
8193 crtl->stack_realign_finalized = true;
8194 }
8195 }
8196
8197 /* Expand the prologue into a bunch of separate insns. */
8198
8199 void
8200 ix86_expand_prologue (void)
8201 {
8202 rtx insn;
8203 bool pic_reg_used;
8204 struct ix86_frame frame;
8205 HOST_WIDE_INT allocate;
8206
8207 ix86_finalize_stack_realign_flags ();
8208
8209 /* DRAP should not coexist with stack_realign_fp */
8210 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
8211
8212 ix86_compute_frame_layout (&frame);
8213
8214 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
8215 of DRAP is needed and stack realignment is really needed after reload */
8216 if (crtl->drap_reg && crtl->stack_realign_needed)
8217 {
8218 rtx x, y;
8219 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8220 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8221 ? 0 : UNITS_PER_WORD);
8222
8223 gcc_assert (stack_realign_drap);
8224
8225 /* Grab the argument pointer. */
8226 x = plus_constant (stack_pointer_rtx,
8227 (UNITS_PER_WORD + param_ptr_offset));
8228 y = crtl->drap_reg;
8229
8230 /* Only need to push parameter pointer reg if it is caller
8231 saved reg */
8232 if (!call_used_regs[REGNO (crtl->drap_reg)])
8233 {
8234 /* Push arg pointer reg */
8235 insn = emit_insn (gen_push (y));
8236 RTX_FRAME_RELATED_P (insn) = 1;
8237 }
8238
8239 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
8240 RTX_FRAME_RELATED_P (insn) = 1;
8241
8242 /* Align the stack. */
8243 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8244 stack_pointer_rtx,
8245 GEN_INT (-align_bytes)));
8246 RTX_FRAME_RELATED_P (insn) = 1;
8247
8248 /* Replicate the return address on the stack so that return
8249 address can be reached via (argp - 1) slot. This is needed
8250 to implement macro RETURN_ADDR_RTX and intrinsic function
8251 expand_builtin_return_addr etc. */
8252 x = crtl->drap_reg;
8253 x = gen_frame_mem (Pmode,
8254 plus_constant (x, -UNITS_PER_WORD));
8255 insn = emit_insn (gen_push (x));
8256 RTX_FRAME_RELATED_P (insn) = 1;
8257 }
8258
8259 /* Note: AT&T enter does NOT have reversed args. Enter is probably
8260 slower on all targets. Also sdb doesn't like it. */
8261
8262 if (frame_pointer_needed)
8263 {
8264 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
8265 RTX_FRAME_RELATED_P (insn) = 1;
8266
8267 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8268 RTX_FRAME_RELATED_P (insn) = 1;
8269 }
8270
8271 if (stack_realign_fp)
8272 {
8273 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8274 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
8275
8276 /* Align the stack. */
8277 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8278 stack_pointer_rtx,
8279 GEN_INT (-align_bytes)));
8280 RTX_FRAME_RELATED_P (insn) = 1;
8281 }
8282
8283 allocate = frame.to_allocate + frame.nsseregs * 16 + frame.padding0;
8284
8285 if (!frame.save_regs_using_mov)
8286 ix86_emit_save_regs ();
8287 else
8288 allocate += frame.nregs * UNITS_PER_WORD;
8289
8290 /* When using red zone we may start register saving before allocating
8291 the stack frame saving one cycle of the prologue. However I will
8292 avoid doing this if I am going to have to probe the stack since
8293 at least on x86_64 the stack probe can turn into a call that clobbers
8294 a red zone location */
8295 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
8296 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
8297 ix86_emit_save_regs_using_mov ((frame_pointer_needed
8298 && !crtl->stack_realign_needed)
8299 ? hard_frame_pointer_rtx
8300 : stack_pointer_rtx,
8301 -frame.nregs * UNITS_PER_WORD);
8302
8303 if (allocate == 0)
8304 ;
8305 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
8306 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8307 GEN_INT (-allocate), -1);
8308 else
8309 {
8310 /* Only valid for Win32. */
8311 rtx eax = gen_rtx_REG (Pmode, AX_REG);
8312 bool eax_live;
8313 rtx t;
8314
8315 gcc_assert (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI);
8316
8317 if (cfun->machine->call_abi == MS_ABI)
8318 eax_live = false;
8319 else
8320 eax_live = ix86_eax_live_at_start_p ();
8321
8322 if (eax_live)
8323 {
8324 emit_insn (gen_push (eax));
8325 allocate -= UNITS_PER_WORD;
8326 }
8327
8328 emit_move_insn (eax, GEN_INT (allocate));
8329
8330 if (TARGET_64BIT)
8331 insn = gen_allocate_stack_worker_64 (eax, eax);
8332 else
8333 insn = gen_allocate_stack_worker_32 (eax, eax);
8334 insn = emit_insn (insn);
8335 RTX_FRAME_RELATED_P (insn) = 1;
8336 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
8337 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
8338 add_reg_note (insn, REG_FRAME_RELATED_EXPR, t);
8339
8340 if (eax_live)
8341 {
8342 if (frame_pointer_needed)
8343 t = plus_constant (hard_frame_pointer_rtx,
8344 allocate
8345 - frame.to_allocate
8346 - frame.nregs * UNITS_PER_WORD);
8347 else
8348 t = plus_constant (stack_pointer_rtx, allocate);
8349 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
8350 }
8351 }
8352
8353 if (frame.save_regs_using_mov
8354 && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
8355 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
8356 {
8357 if (!frame_pointer_needed
8358 || !frame.to_allocate
8359 || crtl->stack_realign_needed)
8360 ix86_emit_save_regs_using_mov (stack_pointer_rtx,
8361 frame.to_allocate
8362 + frame.nsseregs * 16 + frame.padding0);
8363 else
8364 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
8365 -frame.nregs * UNITS_PER_WORD);
8366 }
8367 if (!frame_pointer_needed
8368 || !frame.to_allocate
8369 || crtl->stack_realign_needed)
8370 ix86_emit_save_sse_regs_using_mov (stack_pointer_rtx,
8371 frame.to_allocate);
8372 else
8373 ix86_emit_save_sse_regs_using_mov (hard_frame_pointer_rtx,
8374 - frame.nregs * UNITS_PER_WORD
8375 - frame.nsseregs * 16
8376 - frame.padding0);
8377
8378 pic_reg_used = false;
8379 if (pic_offset_table_rtx
8380 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8381 || crtl->profile))
8382 {
8383 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
8384
8385 if (alt_pic_reg_used != INVALID_REGNUM)
8386 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
8387
8388 pic_reg_used = true;
8389 }
8390
8391 if (pic_reg_used)
8392 {
8393 if (TARGET_64BIT)
8394 {
8395 if (ix86_cmodel == CM_LARGE_PIC)
8396 {
8397 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
8398 rtx label = gen_label_rtx ();
8399 emit_label (label);
8400 LABEL_PRESERVE_P (label) = 1;
8401 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
8402 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
8403 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
8404 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
8405 pic_offset_table_rtx, tmp_reg));
8406 }
8407 else
8408 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
8409 }
8410 else
8411 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
8412 }
8413
8414 /* In the pic_reg_used case, make sure that the got load isn't deleted
8415 when mcount needs it. Blockage to avoid call movement across mcount
8416 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
8417 note. */
8418 if (crtl->profile && pic_reg_used)
8419 emit_insn (gen_prologue_use (pic_offset_table_rtx));
8420
8421 if (crtl->drap_reg && !crtl->stack_realign_needed)
8422 {
8423 /* vDRAP is setup but after reload it turns out stack realign
8424 isn't necessary, here we will emit prologue to setup DRAP
8425 without stack realign adjustment */
8426 int drap_bp_offset = UNITS_PER_WORD * 2;
8427 rtx x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset);
8428 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x));
8429 }
8430
8431 /* Prevent instructions from being scheduled into register save push
8432 sequence when access to the redzone area is done through frame pointer.
8433 The offset betweeh the frame pointer and the stack pointer is calculated
8434 relative to the value of the stack pointer at the end of the function
8435 prologue, and moving instructions that access redzone area via frame
8436 pointer inside push sequence violates this assumption. */
8437 if (frame_pointer_needed && frame.red_zone_size)
8438 emit_insn (gen_memory_blockage ());
8439
8440 /* Emit cld instruction if stringops are used in the function. */
8441 if (TARGET_CLD && ix86_current_function_needs_cld)
8442 emit_insn (gen_cld ());
8443 }
8444
8445 /* Emit code to restore saved registers using MOV insns. First register
8446 is restored from POINTER + OFFSET. */
8447 static void
8448 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8449 int maybe_eh_return)
8450 {
8451 int regno;
8452 rtx base_address = gen_rtx_MEM (Pmode, pointer);
8453
8454 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8455 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8456 {
8457 /* Ensure that adjust_address won't be forced to produce pointer
8458 out of range allowed by x86-64 instruction set. */
8459 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8460 {
8461 rtx r11;
8462
8463 r11 = gen_rtx_REG (DImode, R11_REG);
8464 emit_move_insn (r11, GEN_INT (offset));
8465 emit_insn (gen_adddi3 (r11, r11, pointer));
8466 base_address = gen_rtx_MEM (Pmode, r11);
8467 offset = 0;
8468 }
8469 emit_move_insn (gen_rtx_REG (Pmode, regno),
8470 adjust_address (base_address, Pmode, offset));
8471 offset += UNITS_PER_WORD;
8472 }
8473 }
8474
8475 /* Emit code to restore saved registers using MOV insns. First register
8476 is restored from POINTER + OFFSET. */
8477 static void
8478 ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8479 int maybe_eh_return)
8480 {
8481 int regno;
8482 rtx base_address = gen_rtx_MEM (TImode, pointer);
8483 rtx mem;
8484
8485 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8486 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8487 {
8488 /* Ensure that adjust_address won't be forced to produce pointer
8489 out of range allowed by x86-64 instruction set. */
8490 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8491 {
8492 rtx r11;
8493
8494 r11 = gen_rtx_REG (DImode, R11_REG);
8495 emit_move_insn (r11, GEN_INT (offset));
8496 emit_insn (gen_adddi3 (r11, r11, pointer));
8497 base_address = gen_rtx_MEM (TImode, r11);
8498 offset = 0;
8499 }
8500 mem = adjust_address (base_address, TImode, offset);
8501 set_mem_align (mem, 128);
8502 emit_move_insn (gen_rtx_REG (TImode, regno), mem);
8503 offset += 16;
8504 }
8505 }
8506
8507 /* Restore function stack, frame, and registers. */
8508
8509 void
8510 ix86_expand_epilogue (int style)
8511 {
8512 int regno;
8513 int sp_valid;
8514 struct ix86_frame frame;
8515 HOST_WIDE_INT offset;
8516
8517 ix86_finalize_stack_realign_flags ();
8518
8519 /* When stack is realigned, SP must be valid. */
8520 sp_valid = (!frame_pointer_needed
8521 || current_function_sp_is_unchanging
8522 || stack_realign_fp);
8523
8524 ix86_compute_frame_layout (&frame);
8525
8526 /* See the comment about red zone and frame
8527 pointer usage in ix86_expand_prologue. */
8528 if (frame_pointer_needed && frame.red_zone_size)
8529 emit_insn (gen_memory_blockage ());
8530
8531 /* Calculate start of saved registers relative to ebp. Special care
8532 must be taken for the normal return case of a function using
8533 eh_return: the eax and edx registers are marked as saved, but not
8534 restored along this path. */
8535 offset = frame.nregs;
8536 if (crtl->calls_eh_return && style != 2)
8537 offset -= 2;
8538 offset *= -UNITS_PER_WORD;
8539 offset -= frame.nsseregs * 16 + frame.padding0;
8540
8541 /* If we're only restoring one register and sp is not valid then
8542 using a move instruction to restore the register since it's
8543 less work than reloading sp and popping the register.
8544
8545 The default code result in stack adjustment using add/lea instruction,
8546 while this code results in LEAVE instruction (or discrete equivalent),
8547 so it is profitable in some other cases as well. Especially when there
8548 are no registers to restore. We also use this code when TARGET_USE_LEAVE
8549 and there is exactly one register to pop. This heuristic may need some
8550 tuning in future. */
8551 if ((!sp_valid && (frame.nregs + frame.nsseregs) <= 1)
8552 || (TARGET_EPILOGUE_USING_MOVE
8553 && cfun->machine->use_fast_prologue_epilogue
8554 && ((frame.nregs + frame.nsseregs) > 1 || frame.to_allocate))
8555 || (frame_pointer_needed && !(frame.nregs + frame.nsseregs) && frame.to_allocate)
8556 || (frame_pointer_needed && TARGET_USE_LEAVE
8557 && cfun->machine->use_fast_prologue_epilogue
8558 && (frame.nregs + frame.nsseregs) == 1)
8559 || crtl->calls_eh_return)
8560 {
8561 /* Restore registers. We can use ebp or esp to address the memory
8562 locations. If both are available, default to ebp, since offsets
8563 are known to be small. Only exception is esp pointing directly
8564 to the end of block of saved registers, where we may simplify
8565 addressing mode.
8566
8567 If we are realigning stack with bp and sp, regs restore can't
8568 be addressed by bp. sp must be used instead. */
8569
8570 if (!frame_pointer_needed
8571 || (sp_valid && !frame.to_allocate)
8572 || stack_realign_fp)
8573 {
8574 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8575 frame.to_allocate, style == 2);
8576 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
8577 frame.to_allocate
8578 + frame.nsseregs * 16
8579 + frame.padding0, style == 2);
8580 }
8581 else
8582 {
8583 ix86_emit_restore_sse_regs_using_mov (hard_frame_pointer_rtx,
8584 offset, style == 2);
8585 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
8586 offset
8587 + frame.nsseregs * 16
8588 + frame.padding0, style == 2);
8589 }
8590
8591 /* eh_return epilogues need %ecx added to the stack pointer. */
8592 if (style == 2)
8593 {
8594 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
8595
8596 /* Stack align doesn't work with eh_return. */
8597 gcc_assert (!crtl->stack_realign_needed);
8598
8599 if (frame_pointer_needed)
8600 {
8601 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
8602 tmp = plus_constant (tmp, UNITS_PER_WORD);
8603 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
8604
8605 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
8606 emit_move_insn (hard_frame_pointer_rtx, tmp);
8607
8608 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
8609 const0_rtx, style);
8610 }
8611 else
8612 {
8613 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
8614 tmp = plus_constant (tmp, (frame.to_allocate
8615 + frame.nregs * UNITS_PER_WORD
8616 + frame.nsseregs * 16
8617 + frame.padding0));
8618 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
8619 }
8620 }
8621 else if (!frame_pointer_needed)
8622 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8623 GEN_INT (frame.to_allocate
8624 + frame.nregs * UNITS_PER_WORD
8625 + frame.nsseregs * 16
8626 + frame.padding0),
8627 style);
8628 /* If not an i386, mov & pop is faster than "leave". */
8629 else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
8630 || !cfun->machine->use_fast_prologue_epilogue)
8631 emit_insn ((*ix86_gen_leave) ());
8632 else
8633 {
8634 pro_epilogue_adjust_stack (stack_pointer_rtx,
8635 hard_frame_pointer_rtx,
8636 const0_rtx, style);
8637
8638 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8639 }
8640 }
8641 else
8642 {
8643 /* First step is to deallocate the stack frame so that we can
8644 pop the registers.
8645
8646 If we realign stack with frame pointer, then stack pointer
8647 won't be able to recover via lea $offset(%bp), %sp, because
8648 there is a padding area between bp and sp for realign.
8649 "add $to_allocate, %sp" must be used instead. */
8650 if (!sp_valid)
8651 {
8652 gcc_assert (frame_pointer_needed);
8653 gcc_assert (!stack_realign_fp);
8654 pro_epilogue_adjust_stack (stack_pointer_rtx,
8655 hard_frame_pointer_rtx,
8656 GEN_INT (offset), style);
8657 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8658 frame.to_allocate, style == 2);
8659 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8660 GEN_INT (frame.nsseregs * 16), style);
8661 }
8662 else if (frame.to_allocate || frame.nsseregs)
8663 {
8664 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8665 frame.to_allocate,
8666 style == 2);
8667 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8668 GEN_INT (frame.to_allocate
8669 + frame.nsseregs * 16
8670 + frame.padding0), style);
8671 }
8672
8673 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8674 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
8675 emit_insn ((*ix86_gen_pop1) (gen_rtx_REG (Pmode, regno)));
8676 if (frame_pointer_needed)
8677 {
8678 /* Leave results in shorter dependency chains on CPUs that are
8679 able to grok it fast. */
8680 if (TARGET_USE_LEAVE)
8681 emit_insn ((*ix86_gen_leave) ());
8682 else
8683 {
8684 /* For stack realigned really happens, recover stack
8685 pointer to hard frame pointer is a must, if not using
8686 leave. */
8687 if (stack_realign_fp)
8688 pro_epilogue_adjust_stack (stack_pointer_rtx,
8689 hard_frame_pointer_rtx,
8690 const0_rtx, style);
8691 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8692 }
8693 }
8694 }
8695
8696 if (crtl->drap_reg && crtl->stack_realign_needed)
8697 {
8698 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8699 ? 0 : UNITS_PER_WORD);
8700 gcc_assert (stack_realign_drap);
8701 emit_insn ((*ix86_gen_add3) (stack_pointer_rtx,
8702 crtl->drap_reg,
8703 GEN_INT (-(UNITS_PER_WORD
8704 + param_ptr_offset))));
8705 if (!call_used_regs[REGNO (crtl->drap_reg)])
8706 emit_insn ((*ix86_gen_pop1) (crtl->drap_reg));
8707
8708 }
8709
8710 /* Sibcall epilogues don't want a return instruction. */
8711 if (style == 0)
8712 return;
8713
8714 if (crtl->args.pops_args && crtl->args.size)
8715 {
8716 rtx popc = GEN_INT (crtl->args.pops_args);
8717
8718 /* i386 can only pop 64K bytes. If asked to pop more, pop
8719 return address, do explicit add, and jump indirectly to the
8720 caller. */
8721
8722 if (crtl->args.pops_args >= 65536)
8723 {
8724 rtx ecx = gen_rtx_REG (SImode, CX_REG);
8725
8726 /* There is no "pascal" calling convention in any 64bit ABI. */
8727 gcc_assert (!TARGET_64BIT);
8728
8729 emit_insn (gen_popsi1 (ecx));
8730 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
8731 emit_jump_insn (gen_return_indirect_internal (ecx));
8732 }
8733 else
8734 emit_jump_insn (gen_return_pop_internal (popc));
8735 }
8736 else
8737 emit_jump_insn (gen_return_internal ());
8738 }
8739
8740 /* Reset from the function's potential modifications. */
8741
8742 static void
8743 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
8744 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
8745 {
8746 if (pic_offset_table_rtx)
8747 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
8748 #if TARGET_MACHO
8749 /* Mach-O doesn't support labels at the end of objects, so if
8750 it looks like we might want one, insert a NOP. */
8751 {
8752 rtx insn = get_last_insn ();
8753 while (insn
8754 && NOTE_P (insn)
8755 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
8756 insn = PREV_INSN (insn);
8757 if (insn
8758 && (LABEL_P (insn)
8759 || (NOTE_P (insn)
8760 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
8761 fputs ("\tnop\n", file);
8762 }
8763 #endif
8764
8765 }
8766 \f
8767 /* Extract the parts of an RTL expression that is a valid memory address
8768 for an instruction. Return 0 if the structure of the address is
8769 grossly off. Return -1 if the address contains ASHIFT, so it is not
8770 strictly valid, but still used for computing length of lea instruction. */
8771
8772 int
8773 ix86_decompose_address (rtx addr, struct ix86_address *out)
8774 {
8775 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
8776 rtx base_reg, index_reg;
8777 HOST_WIDE_INT scale = 1;
8778 rtx scale_rtx = NULL_RTX;
8779 int retval = 1;
8780 enum ix86_address_seg seg = SEG_DEFAULT;
8781
8782 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
8783 base = addr;
8784 else if (GET_CODE (addr) == PLUS)
8785 {
8786 rtx addends[4], op;
8787 int n = 0, i;
8788
8789 op = addr;
8790 do
8791 {
8792 if (n >= 4)
8793 return 0;
8794 addends[n++] = XEXP (op, 1);
8795 op = XEXP (op, 0);
8796 }
8797 while (GET_CODE (op) == PLUS);
8798 if (n >= 4)
8799 return 0;
8800 addends[n] = op;
8801
8802 for (i = n; i >= 0; --i)
8803 {
8804 op = addends[i];
8805 switch (GET_CODE (op))
8806 {
8807 case MULT:
8808 if (index)
8809 return 0;
8810 index = XEXP (op, 0);
8811 scale_rtx = XEXP (op, 1);
8812 break;
8813
8814 case UNSPEC:
8815 if (XINT (op, 1) == UNSPEC_TP
8816 && TARGET_TLS_DIRECT_SEG_REFS
8817 && seg == SEG_DEFAULT)
8818 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
8819 else
8820 return 0;
8821 break;
8822
8823 case REG:
8824 case SUBREG:
8825 if (!base)
8826 base = op;
8827 else if (!index)
8828 index = op;
8829 else
8830 return 0;
8831 break;
8832
8833 case CONST:
8834 case CONST_INT:
8835 case SYMBOL_REF:
8836 case LABEL_REF:
8837 if (disp)
8838 return 0;
8839 disp = op;
8840 break;
8841
8842 default:
8843 return 0;
8844 }
8845 }
8846 }
8847 else if (GET_CODE (addr) == MULT)
8848 {
8849 index = XEXP (addr, 0); /* index*scale */
8850 scale_rtx = XEXP (addr, 1);
8851 }
8852 else if (GET_CODE (addr) == ASHIFT)
8853 {
8854 rtx tmp;
8855
8856 /* We're called for lea too, which implements ashift on occasion. */
8857 index = XEXP (addr, 0);
8858 tmp = XEXP (addr, 1);
8859 if (!CONST_INT_P (tmp))
8860 return 0;
8861 scale = INTVAL (tmp);
8862 if ((unsigned HOST_WIDE_INT) scale > 3)
8863 return 0;
8864 scale = 1 << scale;
8865 retval = -1;
8866 }
8867 else
8868 disp = addr; /* displacement */
8869
8870 /* Extract the integral value of scale. */
8871 if (scale_rtx)
8872 {
8873 if (!CONST_INT_P (scale_rtx))
8874 return 0;
8875 scale = INTVAL (scale_rtx);
8876 }
8877
8878 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
8879 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
8880
8881 /* Allow arg pointer and stack pointer as index if there is not scaling. */
8882 if (base_reg && index_reg && scale == 1
8883 && (index_reg == arg_pointer_rtx
8884 || index_reg == frame_pointer_rtx
8885 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
8886 {
8887 rtx tmp;
8888 tmp = base, base = index, index = tmp;
8889 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
8890 }
8891
8892 /* Special case: %ebp cannot be encoded as a base without a displacement. */
8893 if ((base_reg == hard_frame_pointer_rtx
8894 || base_reg == frame_pointer_rtx
8895 || base_reg == arg_pointer_rtx) && !disp)
8896 disp = const0_rtx;
8897
8898 /* Special case: on K6, [%esi] makes the instruction vector decoded.
8899 Avoid this by transforming to [%esi+0].
8900 Reload calls address legitimization without cfun defined, so we need
8901 to test cfun for being non-NULL. */
8902 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
8903 && base_reg && !index_reg && !disp
8904 && REG_P (base_reg)
8905 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
8906 disp = const0_rtx;
8907
8908 /* Special case: encode reg+reg instead of reg*2. */
8909 if (!base && index && scale && scale == 2)
8910 base = index, base_reg = index_reg, scale = 1;
8911
8912 /* Special case: scaling cannot be encoded without base or displacement. */
8913 if (!base && !disp && index && scale != 1)
8914 disp = const0_rtx;
8915
8916 out->base = base;
8917 out->index = index;
8918 out->disp = disp;
8919 out->scale = scale;
8920 out->seg = seg;
8921
8922 return retval;
8923 }
8924 \f
8925 /* Return cost of the memory address x.
8926 For i386, it is better to use a complex address than let gcc copy
8927 the address into a reg and make a new pseudo. But not if the address
8928 requires to two regs - that would mean more pseudos with longer
8929 lifetimes. */
8930 static int
8931 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
8932 {
8933 struct ix86_address parts;
8934 int cost = 1;
8935 int ok = ix86_decompose_address (x, &parts);
8936
8937 gcc_assert (ok);
8938
8939 if (parts.base && GET_CODE (parts.base) == SUBREG)
8940 parts.base = SUBREG_REG (parts.base);
8941 if (parts.index && GET_CODE (parts.index) == SUBREG)
8942 parts.index = SUBREG_REG (parts.index);
8943
8944 /* Attempt to minimize number of registers in the address. */
8945 if ((parts.base
8946 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
8947 || (parts.index
8948 && (!REG_P (parts.index)
8949 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
8950 cost++;
8951
8952 if (parts.base
8953 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
8954 && parts.index
8955 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
8956 && parts.base != parts.index)
8957 cost++;
8958
8959 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
8960 since it's predecode logic can't detect the length of instructions
8961 and it degenerates to vector decoded. Increase cost of such
8962 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
8963 to split such addresses or even refuse such addresses at all.
8964
8965 Following addressing modes are affected:
8966 [base+scale*index]
8967 [scale*index+disp]
8968 [base+index]
8969
8970 The first and last case may be avoidable by explicitly coding the zero in
8971 memory address, but I don't have AMD-K6 machine handy to check this
8972 theory. */
8973
8974 if (TARGET_K6
8975 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
8976 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
8977 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
8978 cost += 10;
8979
8980 return cost;
8981 }
8982 \f
8983 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
8984 this is used for to form addresses to local data when -fPIC is in
8985 use. */
8986
8987 static bool
8988 darwin_local_data_pic (rtx disp)
8989 {
8990 return (GET_CODE (disp) == UNSPEC
8991 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
8992 }
8993
8994 /* Determine if a given RTX is a valid constant. We already know this
8995 satisfies CONSTANT_P. */
8996
8997 bool
8998 legitimate_constant_p (rtx x)
8999 {
9000 switch (GET_CODE (x))
9001 {
9002 case CONST:
9003 x = XEXP (x, 0);
9004
9005 if (GET_CODE (x) == PLUS)
9006 {
9007 if (!CONST_INT_P (XEXP (x, 1)))
9008 return false;
9009 x = XEXP (x, 0);
9010 }
9011
9012 if (TARGET_MACHO && darwin_local_data_pic (x))
9013 return true;
9014
9015 /* Only some unspecs are valid as "constants". */
9016 if (GET_CODE (x) == UNSPEC)
9017 switch (XINT (x, 1))
9018 {
9019 case UNSPEC_GOT:
9020 case UNSPEC_GOTOFF:
9021 case UNSPEC_PLTOFF:
9022 return TARGET_64BIT;
9023 case UNSPEC_TPOFF:
9024 case UNSPEC_NTPOFF:
9025 x = XVECEXP (x, 0, 0);
9026 return (GET_CODE (x) == SYMBOL_REF
9027 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9028 case UNSPEC_DTPOFF:
9029 x = XVECEXP (x, 0, 0);
9030 return (GET_CODE (x) == SYMBOL_REF
9031 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
9032 default:
9033 return false;
9034 }
9035
9036 /* We must have drilled down to a symbol. */
9037 if (GET_CODE (x) == LABEL_REF)
9038 return true;
9039 if (GET_CODE (x) != SYMBOL_REF)
9040 return false;
9041 /* FALLTHRU */
9042
9043 case SYMBOL_REF:
9044 /* TLS symbols are never valid. */
9045 if (SYMBOL_REF_TLS_MODEL (x))
9046 return false;
9047
9048 /* DLLIMPORT symbols are never valid. */
9049 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9050 && SYMBOL_REF_DLLIMPORT_P (x))
9051 return false;
9052 break;
9053
9054 case CONST_DOUBLE:
9055 if (GET_MODE (x) == TImode
9056 && x != CONST0_RTX (TImode)
9057 && !TARGET_64BIT)
9058 return false;
9059 break;
9060
9061 case CONST_VECTOR:
9062 if (!standard_sse_constant_p (x))
9063 return false;
9064
9065 default:
9066 break;
9067 }
9068
9069 /* Otherwise we handle everything else in the move patterns. */
9070 return true;
9071 }
9072
9073 /* Determine if it's legal to put X into the constant pool. This
9074 is not possible for the address of thread-local symbols, which
9075 is checked above. */
9076
9077 static bool
9078 ix86_cannot_force_const_mem (rtx x)
9079 {
9080 /* We can always put integral constants and vectors in memory. */
9081 switch (GET_CODE (x))
9082 {
9083 case CONST_INT:
9084 case CONST_DOUBLE:
9085 case CONST_VECTOR:
9086 return false;
9087
9088 default:
9089 break;
9090 }
9091 return !legitimate_constant_p (x);
9092 }
9093
9094 /* Determine if a given RTX is a valid constant address. */
9095
9096 bool
9097 constant_address_p (rtx x)
9098 {
9099 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
9100 }
9101
9102 /* Nonzero if the constant value X is a legitimate general operand
9103 when generating PIC code. It is given that flag_pic is on and
9104 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
9105
9106 bool
9107 legitimate_pic_operand_p (rtx x)
9108 {
9109 rtx inner;
9110
9111 switch (GET_CODE (x))
9112 {
9113 case CONST:
9114 inner = XEXP (x, 0);
9115 if (GET_CODE (inner) == PLUS
9116 && CONST_INT_P (XEXP (inner, 1)))
9117 inner = XEXP (inner, 0);
9118
9119 /* Only some unspecs are valid as "constants". */
9120 if (GET_CODE (inner) == UNSPEC)
9121 switch (XINT (inner, 1))
9122 {
9123 case UNSPEC_GOT:
9124 case UNSPEC_GOTOFF:
9125 case UNSPEC_PLTOFF:
9126 return TARGET_64BIT;
9127 case UNSPEC_TPOFF:
9128 x = XVECEXP (inner, 0, 0);
9129 return (GET_CODE (x) == SYMBOL_REF
9130 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9131 case UNSPEC_MACHOPIC_OFFSET:
9132 return legitimate_pic_address_disp_p (x);
9133 default:
9134 return false;
9135 }
9136 /* FALLTHRU */
9137
9138 case SYMBOL_REF:
9139 case LABEL_REF:
9140 return legitimate_pic_address_disp_p (x);
9141
9142 default:
9143 return true;
9144 }
9145 }
9146
9147 /* Determine if a given CONST RTX is a valid memory displacement
9148 in PIC mode. */
9149
9150 int
9151 legitimate_pic_address_disp_p (rtx disp)
9152 {
9153 bool saw_plus;
9154
9155 /* In 64bit mode we can allow direct addresses of symbols and labels
9156 when they are not dynamic symbols. */
9157 if (TARGET_64BIT)
9158 {
9159 rtx op0 = disp, op1;
9160
9161 switch (GET_CODE (disp))
9162 {
9163 case LABEL_REF:
9164 return true;
9165
9166 case CONST:
9167 if (GET_CODE (XEXP (disp, 0)) != PLUS)
9168 break;
9169 op0 = XEXP (XEXP (disp, 0), 0);
9170 op1 = XEXP (XEXP (disp, 0), 1);
9171 if (!CONST_INT_P (op1)
9172 || INTVAL (op1) >= 16*1024*1024
9173 || INTVAL (op1) < -16*1024*1024)
9174 break;
9175 if (GET_CODE (op0) == LABEL_REF)
9176 return true;
9177 if (GET_CODE (op0) != SYMBOL_REF)
9178 break;
9179 /* FALLTHRU */
9180
9181 case SYMBOL_REF:
9182 /* TLS references should always be enclosed in UNSPEC. */
9183 if (SYMBOL_REF_TLS_MODEL (op0))
9184 return false;
9185 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
9186 && ix86_cmodel != CM_LARGE_PIC)
9187 return true;
9188 break;
9189
9190 default:
9191 break;
9192 }
9193 }
9194 if (GET_CODE (disp) != CONST)
9195 return 0;
9196 disp = XEXP (disp, 0);
9197
9198 if (TARGET_64BIT)
9199 {
9200 /* We are unsafe to allow PLUS expressions. This limit allowed distance
9201 of GOT tables. We should not need these anyway. */
9202 if (GET_CODE (disp) != UNSPEC
9203 || (XINT (disp, 1) != UNSPEC_GOTPCREL
9204 && XINT (disp, 1) != UNSPEC_GOTOFF
9205 && XINT (disp, 1) != UNSPEC_PLTOFF))
9206 return 0;
9207
9208 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
9209 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
9210 return 0;
9211 return 1;
9212 }
9213
9214 saw_plus = false;
9215 if (GET_CODE (disp) == PLUS)
9216 {
9217 if (!CONST_INT_P (XEXP (disp, 1)))
9218 return 0;
9219 disp = XEXP (disp, 0);
9220 saw_plus = true;
9221 }
9222
9223 if (TARGET_MACHO && darwin_local_data_pic (disp))
9224 return 1;
9225
9226 if (GET_CODE (disp) != UNSPEC)
9227 return 0;
9228
9229 switch (XINT (disp, 1))
9230 {
9231 case UNSPEC_GOT:
9232 if (saw_plus)
9233 return false;
9234 /* We need to check for both symbols and labels because VxWorks loads
9235 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
9236 details. */
9237 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9238 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
9239 case UNSPEC_GOTOFF:
9240 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
9241 While ABI specify also 32bit relocation but we don't produce it in
9242 small PIC model at all. */
9243 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9244 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
9245 && !TARGET_64BIT)
9246 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
9247 return false;
9248 case UNSPEC_GOTTPOFF:
9249 case UNSPEC_GOTNTPOFF:
9250 case UNSPEC_INDNTPOFF:
9251 if (saw_plus)
9252 return false;
9253 disp = XVECEXP (disp, 0, 0);
9254 return (GET_CODE (disp) == SYMBOL_REF
9255 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
9256 case UNSPEC_NTPOFF:
9257 disp = XVECEXP (disp, 0, 0);
9258 return (GET_CODE (disp) == SYMBOL_REF
9259 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
9260 case UNSPEC_DTPOFF:
9261 disp = XVECEXP (disp, 0, 0);
9262 return (GET_CODE (disp) == SYMBOL_REF
9263 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
9264 }
9265
9266 return 0;
9267 }
9268
9269 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
9270 memory address for an instruction. The MODE argument is the machine mode
9271 for the MEM expression that wants to use this address.
9272
9273 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
9274 convert common non-canonical forms to canonical form so that they will
9275 be recognized. */
9276
9277 int
9278 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
9279 rtx addr, int strict)
9280 {
9281 struct ix86_address parts;
9282 rtx base, index, disp;
9283 HOST_WIDE_INT scale;
9284 const char *reason = NULL;
9285 rtx reason_rtx = NULL_RTX;
9286
9287 if (ix86_decompose_address (addr, &parts) <= 0)
9288 {
9289 reason = "decomposition failed";
9290 goto report_error;
9291 }
9292
9293 base = parts.base;
9294 index = parts.index;
9295 disp = parts.disp;
9296 scale = parts.scale;
9297
9298 /* Validate base register.
9299
9300 Don't allow SUBREG's that span more than a word here. It can lead to spill
9301 failures when the base is one word out of a two word structure, which is
9302 represented internally as a DImode int. */
9303
9304 if (base)
9305 {
9306 rtx reg;
9307 reason_rtx = base;
9308
9309 if (REG_P (base))
9310 reg = base;
9311 else if (GET_CODE (base) == SUBREG
9312 && REG_P (SUBREG_REG (base))
9313 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
9314 <= UNITS_PER_WORD)
9315 reg = SUBREG_REG (base);
9316 else
9317 {
9318 reason = "base is not a register";
9319 goto report_error;
9320 }
9321
9322 if (GET_MODE (base) != Pmode)
9323 {
9324 reason = "base is not in Pmode";
9325 goto report_error;
9326 }
9327
9328 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
9329 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
9330 {
9331 reason = "base is not valid";
9332 goto report_error;
9333 }
9334 }
9335
9336 /* Validate index register.
9337
9338 Don't allow SUBREG's that span more than a word here -- same as above. */
9339
9340 if (index)
9341 {
9342 rtx reg;
9343 reason_rtx = index;
9344
9345 if (REG_P (index))
9346 reg = index;
9347 else if (GET_CODE (index) == SUBREG
9348 && REG_P (SUBREG_REG (index))
9349 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
9350 <= UNITS_PER_WORD)
9351 reg = SUBREG_REG (index);
9352 else
9353 {
9354 reason = "index is not a register";
9355 goto report_error;
9356 }
9357
9358 if (GET_MODE (index) != Pmode)
9359 {
9360 reason = "index is not in Pmode";
9361 goto report_error;
9362 }
9363
9364 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
9365 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
9366 {
9367 reason = "index is not valid";
9368 goto report_error;
9369 }
9370 }
9371
9372 /* Validate scale factor. */
9373 if (scale != 1)
9374 {
9375 reason_rtx = GEN_INT (scale);
9376 if (!index)
9377 {
9378 reason = "scale without index";
9379 goto report_error;
9380 }
9381
9382 if (scale != 2 && scale != 4 && scale != 8)
9383 {
9384 reason = "scale is not a valid multiplier";
9385 goto report_error;
9386 }
9387 }
9388
9389 /* Validate displacement. */
9390 if (disp)
9391 {
9392 reason_rtx = disp;
9393
9394 if (GET_CODE (disp) == CONST
9395 && GET_CODE (XEXP (disp, 0)) == UNSPEC
9396 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
9397 switch (XINT (XEXP (disp, 0), 1))
9398 {
9399 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
9400 used. While ABI specify also 32bit relocations, we don't produce
9401 them at all and use IP relative instead. */
9402 case UNSPEC_GOT:
9403 case UNSPEC_GOTOFF:
9404 gcc_assert (flag_pic);
9405 if (!TARGET_64BIT)
9406 goto is_legitimate_pic;
9407 reason = "64bit address unspec";
9408 goto report_error;
9409
9410 case UNSPEC_GOTPCREL:
9411 gcc_assert (flag_pic);
9412 goto is_legitimate_pic;
9413
9414 case UNSPEC_GOTTPOFF:
9415 case UNSPEC_GOTNTPOFF:
9416 case UNSPEC_INDNTPOFF:
9417 case UNSPEC_NTPOFF:
9418 case UNSPEC_DTPOFF:
9419 break;
9420
9421 default:
9422 reason = "invalid address unspec";
9423 goto report_error;
9424 }
9425
9426 else if (SYMBOLIC_CONST (disp)
9427 && (flag_pic
9428 || (TARGET_MACHO
9429 #if TARGET_MACHO
9430 && MACHOPIC_INDIRECT
9431 && !machopic_operand_p (disp)
9432 #endif
9433 )))
9434 {
9435
9436 is_legitimate_pic:
9437 if (TARGET_64BIT && (index || base))
9438 {
9439 /* foo@dtpoff(%rX) is ok. */
9440 if (GET_CODE (disp) != CONST
9441 || GET_CODE (XEXP (disp, 0)) != PLUS
9442 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
9443 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
9444 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
9445 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
9446 {
9447 reason = "non-constant pic memory reference";
9448 goto report_error;
9449 }
9450 }
9451 else if (! legitimate_pic_address_disp_p (disp))
9452 {
9453 reason = "displacement is an invalid pic construct";
9454 goto report_error;
9455 }
9456
9457 /* This code used to verify that a symbolic pic displacement
9458 includes the pic_offset_table_rtx register.
9459
9460 While this is good idea, unfortunately these constructs may
9461 be created by "adds using lea" optimization for incorrect
9462 code like:
9463
9464 int a;
9465 int foo(int i)
9466 {
9467 return *(&a+i);
9468 }
9469
9470 This code is nonsensical, but results in addressing
9471 GOT table with pic_offset_table_rtx base. We can't
9472 just refuse it easily, since it gets matched by
9473 "addsi3" pattern, that later gets split to lea in the
9474 case output register differs from input. While this
9475 can be handled by separate addsi pattern for this case
9476 that never results in lea, this seems to be easier and
9477 correct fix for crash to disable this test. */
9478 }
9479 else if (GET_CODE (disp) != LABEL_REF
9480 && !CONST_INT_P (disp)
9481 && (GET_CODE (disp) != CONST
9482 || !legitimate_constant_p (disp))
9483 && (GET_CODE (disp) != SYMBOL_REF
9484 || !legitimate_constant_p (disp)))
9485 {
9486 reason = "displacement is not constant";
9487 goto report_error;
9488 }
9489 else if (TARGET_64BIT
9490 && !x86_64_immediate_operand (disp, VOIDmode))
9491 {
9492 reason = "displacement is out of range";
9493 goto report_error;
9494 }
9495 }
9496
9497 /* Everything looks valid. */
9498 return TRUE;
9499
9500 report_error:
9501 return FALSE;
9502 }
9503 \f
9504 /* Return a unique alias set for the GOT. */
9505
9506 static alias_set_type
9507 ix86_GOT_alias_set (void)
9508 {
9509 static alias_set_type set = -1;
9510 if (set == -1)
9511 set = new_alias_set ();
9512 return set;
9513 }
9514
9515 /* Return a legitimate reference for ORIG (an address) using the
9516 register REG. If REG is 0, a new pseudo is generated.
9517
9518 There are two types of references that must be handled:
9519
9520 1. Global data references must load the address from the GOT, via
9521 the PIC reg. An insn is emitted to do this load, and the reg is
9522 returned.
9523
9524 2. Static data references, constant pool addresses, and code labels
9525 compute the address as an offset from the GOT, whose base is in
9526 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
9527 differentiate them from global data objects. The returned
9528 address is the PIC reg + an unspec constant.
9529
9530 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
9531 reg also appears in the address. */
9532
9533 static rtx
9534 legitimize_pic_address (rtx orig, rtx reg)
9535 {
9536 rtx addr = orig;
9537 rtx new_rtx = orig;
9538 rtx base;
9539
9540 #if TARGET_MACHO
9541 if (TARGET_MACHO && !TARGET_64BIT)
9542 {
9543 if (reg == 0)
9544 reg = gen_reg_rtx (Pmode);
9545 /* Use the generic Mach-O PIC machinery. */
9546 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
9547 }
9548 #endif
9549
9550 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
9551 new_rtx = addr;
9552 else if (TARGET_64BIT
9553 && ix86_cmodel != CM_SMALL_PIC
9554 && gotoff_operand (addr, Pmode))
9555 {
9556 rtx tmpreg;
9557 /* This symbol may be referenced via a displacement from the PIC
9558 base address (@GOTOFF). */
9559
9560 if (reload_in_progress)
9561 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9562 if (GET_CODE (addr) == CONST)
9563 addr = XEXP (addr, 0);
9564 if (GET_CODE (addr) == PLUS)
9565 {
9566 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9567 UNSPEC_GOTOFF);
9568 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9569 }
9570 else
9571 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9572 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9573 if (!reg)
9574 tmpreg = gen_reg_rtx (Pmode);
9575 else
9576 tmpreg = reg;
9577 emit_move_insn (tmpreg, new_rtx);
9578
9579 if (reg != 0)
9580 {
9581 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
9582 tmpreg, 1, OPTAB_DIRECT);
9583 new_rtx = reg;
9584 }
9585 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
9586 }
9587 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
9588 {
9589 /* This symbol may be referenced via a displacement from the PIC
9590 base address (@GOTOFF). */
9591
9592 if (reload_in_progress)
9593 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9594 if (GET_CODE (addr) == CONST)
9595 addr = XEXP (addr, 0);
9596 if (GET_CODE (addr) == PLUS)
9597 {
9598 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9599 UNSPEC_GOTOFF);
9600 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9601 }
9602 else
9603 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9604 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9605 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9606
9607 if (reg != 0)
9608 {
9609 emit_move_insn (reg, new_rtx);
9610 new_rtx = reg;
9611 }
9612 }
9613 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
9614 /* We can't use @GOTOFF for text labels on VxWorks;
9615 see gotoff_operand. */
9616 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
9617 {
9618 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
9619 {
9620 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
9621 return legitimize_dllimport_symbol (addr, true);
9622 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
9623 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
9624 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
9625 {
9626 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
9627 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
9628 }
9629 }
9630
9631 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
9632 {
9633 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
9634 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9635 new_rtx = gen_const_mem (Pmode, new_rtx);
9636 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9637
9638 if (reg == 0)
9639 reg = gen_reg_rtx (Pmode);
9640 /* Use directly gen_movsi, otherwise the address is loaded
9641 into register for CSE. We don't want to CSE this addresses,
9642 instead we CSE addresses from the GOT table, so skip this. */
9643 emit_insn (gen_movsi (reg, new_rtx));
9644 new_rtx = reg;
9645 }
9646 else
9647 {
9648 /* This symbol must be referenced via a load from the
9649 Global Offset Table (@GOT). */
9650
9651 if (reload_in_progress)
9652 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9653 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
9654 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9655 if (TARGET_64BIT)
9656 new_rtx = force_reg (Pmode, new_rtx);
9657 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9658 new_rtx = gen_const_mem (Pmode, new_rtx);
9659 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9660
9661 if (reg == 0)
9662 reg = gen_reg_rtx (Pmode);
9663 emit_move_insn (reg, new_rtx);
9664 new_rtx = reg;
9665 }
9666 }
9667 else
9668 {
9669 if (CONST_INT_P (addr)
9670 && !x86_64_immediate_operand (addr, VOIDmode))
9671 {
9672 if (reg)
9673 {
9674 emit_move_insn (reg, addr);
9675 new_rtx = reg;
9676 }
9677 else
9678 new_rtx = force_reg (Pmode, addr);
9679 }
9680 else if (GET_CODE (addr) == CONST)
9681 {
9682 addr = XEXP (addr, 0);
9683
9684 /* We must match stuff we generate before. Assume the only
9685 unspecs that can get here are ours. Not that we could do
9686 anything with them anyway.... */
9687 if (GET_CODE (addr) == UNSPEC
9688 || (GET_CODE (addr) == PLUS
9689 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
9690 return orig;
9691 gcc_assert (GET_CODE (addr) == PLUS);
9692 }
9693 if (GET_CODE (addr) == PLUS)
9694 {
9695 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
9696
9697 /* Check first to see if this is a constant offset from a @GOTOFF
9698 symbol reference. */
9699 if (gotoff_operand (op0, Pmode)
9700 && CONST_INT_P (op1))
9701 {
9702 if (!TARGET_64BIT)
9703 {
9704 if (reload_in_progress)
9705 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9706 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
9707 UNSPEC_GOTOFF);
9708 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
9709 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9710 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9711
9712 if (reg != 0)
9713 {
9714 emit_move_insn (reg, new_rtx);
9715 new_rtx = reg;
9716 }
9717 }
9718 else
9719 {
9720 if (INTVAL (op1) < -16*1024*1024
9721 || INTVAL (op1) >= 16*1024*1024)
9722 {
9723 if (!x86_64_immediate_operand (op1, Pmode))
9724 op1 = force_reg (Pmode, op1);
9725 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
9726 }
9727 }
9728 }
9729 else
9730 {
9731 base = legitimize_pic_address (XEXP (addr, 0), reg);
9732 new_rtx = legitimize_pic_address (XEXP (addr, 1),
9733 base == reg ? NULL_RTX : reg);
9734
9735 if (CONST_INT_P (new_rtx))
9736 new_rtx = plus_constant (base, INTVAL (new_rtx));
9737 else
9738 {
9739 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
9740 {
9741 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
9742 new_rtx = XEXP (new_rtx, 1);
9743 }
9744 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
9745 }
9746 }
9747 }
9748 }
9749 return new_rtx;
9750 }
9751 \f
9752 /* Load the thread pointer. If TO_REG is true, force it into a register. */
9753
9754 static rtx
9755 get_thread_pointer (int to_reg)
9756 {
9757 rtx tp, reg, insn;
9758
9759 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
9760 if (!to_reg)
9761 return tp;
9762
9763 reg = gen_reg_rtx (Pmode);
9764 insn = gen_rtx_SET (VOIDmode, reg, tp);
9765 insn = emit_insn (insn);
9766
9767 return reg;
9768 }
9769
9770 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
9771 false if we expect this to be used for a memory address and true if
9772 we expect to load the address into a register. */
9773
9774 static rtx
9775 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
9776 {
9777 rtx dest, base, off, pic, tp;
9778 int type;
9779
9780 switch (model)
9781 {
9782 case TLS_MODEL_GLOBAL_DYNAMIC:
9783 dest = gen_reg_rtx (Pmode);
9784 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9785
9786 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9787 {
9788 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
9789
9790 start_sequence ();
9791 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
9792 insns = get_insns ();
9793 end_sequence ();
9794
9795 RTL_CONST_CALL_P (insns) = 1;
9796 emit_libcall_block (insns, dest, rax, x);
9797 }
9798 else if (TARGET_64BIT && TARGET_GNU2_TLS)
9799 emit_insn (gen_tls_global_dynamic_64 (dest, x));
9800 else
9801 emit_insn (gen_tls_global_dynamic_32 (dest, x));
9802
9803 if (TARGET_GNU2_TLS)
9804 {
9805 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
9806
9807 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9808 }
9809 break;
9810
9811 case TLS_MODEL_LOCAL_DYNAMIC:
9812 base = gen_reg_rtx (Pmode);
9813 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9814
9815 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9816 {
9817 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
9818
9819 start_sequence ();
9820 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
9821 insns = get_insns ();
9822 end_sequence ();
9823
9824 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
9825 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
9826 RTL_CONST_CALL_P (insns) = 1;
9827 emit_libcall_block (insns, base, rax, note);
9828 }
9829 else if (TARGET_64BIT && TARGET_GNU2_TLS)
9830 emit_insn (gen_tls_local_dynamic_base_64 (base));
9831 else
9832 emit_insn (gen_tls_local_dynamic_base_32 (base));
9833
9834 if (TARGET_GNU2_TLS)
9835 {
9836 rtx x = ix86_tls_module_base ();
9837
9838 set_unique_reg_note (get_last_insn (), REG_EQUIV,
9839 gen_rtx_MINUS (Pmode, x, tp));
9840 }
9841
9842 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
9843 off = gen_rtx_CONST (Pmode, off);
9844
9845 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
9846
9847 if (TARGET_GNU2_TLS)
9848 {
9849 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
9850
9851 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9852 }
9853
9854 break;
9855
9856 case TLS_MODEL_INITIAL_EXEC:
9857 if (TARGET_64BIT)
9858 {
9859 pic = NULL;
9860 type = UNSPEC_GOTNTPOFF;
9861 }
9862 else if (flag_pic)
9863 {
9864 if (reload_in_progress)
9865 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9866 pic = pic_offset_table_rtx;
9867 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
9868 }
9869 else if (!TARGET_ANY_GNU_TLS)
9870 {
9871 pic = gen_reg_rtx (Pmode);
9872 emit_insn (gen_set_got (pic));
9873 type = UNSPEC_GOTTPOFF;
9874 }
9875 else
9876 {
9877 pic = NULL;
9878 type = UNSPEC_INDNTPOFF;
9879 }
9880
9881 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
9882 off = gen_rtx_CONST (Pmode, off);
9883 if (pic)
9884 off = gen_rtx_PLUS (Pmode, pic, off);
9885 off = gen_const_mem (Pmode, off);
9886 set_mem_alias_set (off, ix86_GOT_alias_set ());
9887
9888 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9889 {
9890 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9891 off = force_reg (Pmode, off);
9892 return gen_rtx_PLUS (Pmode, base, off);
9893 }
9894 else
9895 {
9896 base = get_thread_pointer (true);
9897 dest = gen_reg_rtx (Pmode);
9898 emit_insn (gen_subsi3 (dest, base, off));
9899 }
9900 break;
9901
9902 case TLS_MODEL_LOCAL_EXEC:
9903 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
9904 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9905 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
9906 off = gen_rtx_CONST (Pmode, off);
9907
9908 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9909 {
9910 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9911 return gen_rtx_PLUS (Pmode, base, off);
9912 }
9913 else
9914 {
9915 base = get_thread_pointer (true);
9916 dest = gen_reg_rtx (Pmode);
9917 emit_insn (gen_subsi3 (dest, base, off));
9918 }
9919 break;
9920
9921 default:
9922 gcc_unreachable ();
9923 }
9924
9925 return dest;
9926 }
9927
9928 /* Create or return the unique __imp_DECL dllimport symbol corresponding
9929 to symbol DECL. */
9930
9931 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
9932 htab_t dllimport_map;
9933
9934 static tree
9935 get_dllimport_decl (tree decl)
9936 {
9937 struct tree_map *h, in;
9938 void **loc;
9939 const char *name;
9940 const char *prefix;
9941 size_t namelen, prefixlen;
9942 char *imp_name;
9943 tree to;
9944 rtx rtl;
9945
9946 if (!dllimport_map)
9947 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
9948
9949 in.hash = htab_hash_pointer (decl);
9950 in.base.from = decl;
9951 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
9952 h = (struct tree_map *) *loc;
9953 if (h)
9954 return h->to;
9955
9956 *loc = h = GGC_NEW (struct tree_map);
9957 h->hash = in.hash;
9958 h->base.from = decl;
9959 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
9960 DECL_ARTIFICIAL (to) = 1;
9961 DECL_IGNORED_P (to) = 1;
9962 DECL_EXTERNAL (to) = 1;
9963 TREE_READONLY (to) = 1;
9964
9965 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
9966 name = targetm.strip_name_encoding (name);
9967 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
9968 ? "*__imp_" : "*__imp__";
9969 namelen = strlen (name);
9970 prefixlen = strlen (prefix);
9971 imp_name = (char *) alloca (namelen + prefixlen + 1);
9972 memcpy (imp_name, prefix, prefixlen);
9973 memcpy (imp_name + prefixlen, name, namelen + 1);
9974
9975 name = ggc_alloc_string (imp_name, namelen + prefixlen);
9976 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
9977 SET_SYMBOL_REF_DECL (rtl, to);
9978 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
9979
9980 rtl = gen_const_mem (Pmode, rtl);
9981 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
9982
9983 SET_DECL_RTL (to, rtl);
9984 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
9985
9986 return to;
9987 }
9988
9989 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
9990 true if we require the result be a register. */
9991
9992 static rtx
9993 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
9994 {
9995 tree imp_decl;
9996 rtx x;
9997
9998 gcc_assert (SYMBOL_REF_DECL (symbol));
9999 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
10000
10001 x = DECL_RTL (imp_decl);
10002 if (want_reg)
10003 x = force_reg (Pmode, x);
10004 return x;
10005 }
10006
10007 /* Try machine-dependent ways of modifying an illegitimate address
10008 to be legitimate. If we find one, return the new, valid address.
10009 This macro is used in only one place: `memory_address' in explow.c.
10010
10011 OLDX is the address as it was before break_out_memory_refs was called.
10012 In some cases it is useful to look at this to decide what needs to be done.
10013
10014 MODE and WIN are passed so that this macro can use
10015 GO_IF_LEGITIMATE_ADDRESS.
10016
10017 It is always safe for this macro to do nothing. It exists to recognize
10018 opportunities to optimize the output.
10019
10020 For the 80386, we handle X+REG by loading X into a register R and
10021 using R+REG. R will go in a general reg and indexing will be used.
10022 However, if REG is a broken-out memory address or multiplication,
10023 nothing needs to be done because REG can certainly go in a general reg.
10024
10025 When -fpic is used, special handling is needed for symbolic references.
10026 See comments by legitimize_pic_address in i386.c for details. */
10027
10028 static rtx
10029 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
10030 enum machine_mode mode)
10031 {
10032 int changed = 0;
10033 unsigned log;
10034
10035 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
10036 if (log)
10037 return legitimize_tls_address (x, (enum tls_model) log, false);
10038 if (GET_CODE (x) == CONST
10039 && GET_CODE (XEXP (x, 0)) == PLUS
10040 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10041 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
10042 {
10043 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
10044 (enum tls_model) log, false);
10045 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10046 }
10047
10048 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
10049 {
10050 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
10051 return legitimize_dllimport_symbol (x, true);
10052 if (GET_CODE (x) == CONST
10053 && GET_CODE (XEXP (x, 0)) == PLUS
10054 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10055 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
10056 {
10057 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
10058 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10059 }
10060 }
10061
10062 if (flag_pic && SYMBOLIC_CONST (x))
10063 return legitimize_pic_address (x, 0);
10064
10065 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
10066 if (GET_CODE (x) == ASHIFT
10067 && CONST_INT_P (XEXP (x, 1))
10068 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
10069 {
10070 changed = 1;
10071 log = INTVAL (XEXP (x, 1));
10072 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
10073 GEN_INT (1 << log));
10074 }
10075
10076 if (GET_CODE (x) == PLUS)
10077 {
10078 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
10079
10080 if (GET_CODE (XEXP (x, 0)) == ASHIFT
10081 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10082 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
10083 {
10084 changed = 1;
10085 log = INTVAL (XEXP (XEXP (x, 0), 1));
10086 XEXP (x, 0) = gen_rtx_MULT (Pmode,
10087 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
10088 GEN_INT (1 << log));
10089 }
10090
10091 if (GET_CODE (XEXP (x, 1)) == ASHIFT
10092 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10093 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
10094 {
10095 changed = 1;
10096 log = INTVAL (XEXP (XEXP (x, 1), 1));
10097 XEXP (x, 1) = gen_rtx_MULT (Pmode,
10098 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
10099 GEN_INT (1 << log));
10100 }
10101
10102 /* Put multiply first if it isn't already. */
10103 if (GET_CODE (XEXP (x, 1)) == MULT)
10104 {
10105 rtx tmp = XEXP (x, 0);
10106 XEXP (x, 0) = XEXP (x, 1);
10107 XEXP (x, 1) = tmp;
10108 changed = 1;
10109 }
10110
10111 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
10112 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
10113 created by virtual register instantiation, register elimination, and
10114 similar optimizations. */
10115 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
10116 {
10117 changed = 1;
10118 x = gen_rtx_PLUS (Pmode,
10119 gen_rtx_PLUS (Pmode, XEXP (x, 0),
10120 XEXP (XEXP (x, 1), 0)),
10121 XEXP (XEXP (x, 1), 1));
10122 }
10123
10124 /* Canonicalize
10125 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
10126 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
10127 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
10128 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10129 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
10130 && CONSTANT_P (XEXP (x, 1)))
10131 {
10132 rtx constant;
10133 rtx other = NULL_RTX;
10134
10135 if (CONST_INT_P (XEXP (x, 1)))
10136 {
10137 constant = XEXP (x, 1);
10138 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
10139 }
10140 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
10141 {
10142 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
10143 other = XEXP (x, 1);
10144 }
10145 else
10146 constant = 0;
10147
10148 if (constant)
10149 {
10150 changed = 1;
10151 x = gen_rtx_PLUS (Pmode,
10152 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
10153 XEXP (XEXP (XEXP (x, 0), 1), 0)),
10154 plus_constant (other, INTVAL (constant)));
10155 }
10156 }
10157
10158 if (changed && legitimate_address_p (mode, x, FALSE))
10159 return x;
10160
10161 if (GET_CODE (XEXP (x, 0)) == MULT)
10162 {
10163 changed = 1;
10164 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
10165 }
10166
10167 if (GET_CODE (XEXP (x, 1)) == MULT)
10168 {
10169 changed = 1;
10170 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
10171 }
10172
10173 if (changed
10174 && REG_P (XEXP (x, 1))
10175 && REG_P (XEXP (x, 0)))
10176 return x;
10177
10178 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
10179 {
10180 changed = 1;
10181 x = legitimize_pic_address (x, 0);
10182 }
10183
10184 if (changed && legitimate_address_p (mode, x, FALSE))
10185 return x;
10186
10187 if (REG_P (XEXP (x, 0)))
10188 {
10189 rtx temp = gen_reg_rtx (Pmode);
10190 rtx val = force_operand (XEXP (x, 1), temp);
10191 if (val != temp)
10192 emit_move_insn (temp, val);
10193
10194 XEXP (x, 1) = temp;
10195 return x;
10196 }
10197
10198 else if (REG_P (XEXP (x, 1)))
10199 {
10200 rtx temp = gen_reg_rtx (Pmode);
10201 rtx val = force_operand (XEXP (x, 0), temp);
10202 if (val != temp)
10203 emit_move_insn (temp, val);
10204
10205 XEXP (x, 0) = temp;
10206 return x;
10207 }
10208 }
10209
10210 return x;
10211 }
10212 \f
10213 /* Print an integer constant expression in assembler syntax. Addition
10214 and subtraction are the only arithmetic that may appear in these
10215 expressions. FILE is the stdio stream to write to, X is the rtx, and
10216 CODE is the operand print code from the output string. */
10217
10218 static void
10219 output_pic_addr_const (FILE *file, rtx x, int code)
10220 {
10221 char buf[256];
10222
10223 switch (GET_CODE (x))
10224 {
10225 case PC:
10226 gcc_assert (flag_pic);
10227 putc ('.', file);
10228 break;
10229
10230 case SYMBOL_REF:
10231 if (! TARGET_MACHO || TARGET_64BIT)
10232 output_addr_const (file, x);
10233 else
10234 {
10235 const char *name = XSTR (x, 0);
10236
10237 /* Mark the decl as referenced so that cgraph will
10238 output the function. */
10239 if (SYMBOL_REF_DECL (x))
10240 mark_decl_referenced (SYMBOL_REF_DECL (x));
10241
10242 #if TARGET_MACHO
10243 if (MACHOPIC_INDIRECT
10244 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
10245 name = machopic_indirection_name (x, /*stub_p=*/true);
10246 #endif
10247 assemble_name (file, name);
10248 }
10249 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
10250 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
10251 fputs ("@PLT", file);
10252 break;
10253
10254 case LABEL_REF:
10255 x = XEXP (x, 0);
10256 /* FALLTHRU */
10257 case CODE_LABEL:
10258 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
10259 assemble_name (asm_out_file, buf);
10260 break;
10261
10262 case CONST_INT:
10263 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
10264 break;
10265
10266 case CONST:
10267 /* This used to output parentheses around the expression,
10268 but that does not work on the 386 (either ATT or BSD assembler). */
10269 output_pic_addr_const (file, XEXP (x, 0), code);
10270 break;
10271
10272 case CONST_DOUBLE:
10273 if (GET_MODE (x) == VOIDmode)
10274 {
10275 /* We can use %d if the number is <32 bits and positive. */
10276 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
10277 fprintf (file, "0x%lx%08lx",
10278 (unsigned long) CONST_DOUBLE_HIGH (x),
10279 (unsigned long) CONST_DOUBLE_LOW (x));
10280 else
10281 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
10282 }
10283 else
10284 /* We can't handle floating point constants;
10285 PRINT_OPERAND must handle them. */
10286 output_operand_lossage ("floating constant misused");
10287 break;
10288
10289 case PLUS:
10290 /* Some assemblers need integer constants to appear first. */
10291 if (CONST_INT_P (XEXP (x, 0)))
10292 {
10293 output_pic_addr_const (file, XEXP (x, 0), code);
10294 putc ('+', file);
10295 output_pic_addr_const (file, XEXP (x, 1), code);
10296 }
10297 else
10298 {
10299 gcc_assert (CONST_INT_P (XEXP (x, 1)));
10300 output_pic_addr_const (file, XEXP (x, 1), code);
10301 putc ('+', file);
10302 output_pic_addr_const (file, XEXP (x, 0), code);
10303 }
10304 break;
10305
10306 case MINUS:
10307 if (!TARGET_MACHO)
10308 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
10309 output_pic_addr_const (file, XEXP (x, 0), code);
10310 putc ('-', file);
10311 output_pic_addr_const (file, XEXP (x, 1), code);
10312 if (!TARGET_MACHO)
10313 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
10314 break;
10315
10316 case UNSPEC:
10317 gcc_assert (XVECLEN (x, 0) == 1);
10318 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
10319 switch (XINT (x, 1))
10320 {
10321 case UNSPEC_GOT:
10322 fputs ("@GOT", file);
10323 break;
10324 case UNSPEC_GOTOFF:
10325 fputs ("@GOTOFF", file);
10326 break;
10327 case UNSPEC_PLTOFF:
10328 fputs ("@PLTOFF", file);
10329 break;
10330 case UNSPEC_GOTPCREL:
10331 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10332 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
10333 break;
10334 case UNSPEC_GOTTPOFF:
10335 /* FIXME: This might be @TPOFF in Sun ld too. */
10336 fputs ("@GOTTPOFF", file);
10337 break;
10338 case UNSPEC_TPOFF:
10339 fputs ("@TPOFF", file);
10340 break;
10341 case UNSPEC_NTPOFF:
10342 if (TARGET_64BIT)
10343 fputs ("@TPOFF", file);
10344 else
10345 fputs ("@NTPOFF", file);
10346 break;
10347 case UNSPEC_DTPOFF:
10348 fputs ("@DTPOFF", file);
10349 break;
10350 case UNSPEC_GOTNTPOFF:
10351 if (TARGET_64BIT)
10352 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10353 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
10354 else
10355 fputs ("@GOTNTPOFF", file);
10356 break;
10357 case UNSPEC_INDNTPOFF:
10358 fputs ("@INDNTPOFF", file);
10359 break;
10360 #if TARGET_MACHO
10361 case UNSPEC_MACHOPIC_OFFSET:
10362 putc ('-', file);
10363 machopic_output_function_base_name (file);
10364 break;
10365 #endif
10366 default:
10367 output_operand_lossage ("invalid UNSPEC as operand");
10368 break;
10369 }
10370 break;
10371
10372 default:
10373 output_operand_lossage ("invalid expression as operand");
10374 }
10375 }
10376
10377 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
10378 We need to emit DTP-relative relocations. */
10379
10380 static void ATTRIBUTE_UNUSED
10381 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
10382 {
10383 fputs (ASM_LONG, file);
10384 output_addr_const (file, x);
10385 fputs ("@DTPOFF", file);
10386 switch (size)
10387 {
10388 case 4:
10389 break;
10390 case 8:
10391 fputs (", 0", file);
10392 break;
10393 default:
10394 gcc_unreachable ();
10395 }
10396 }
10397
10398 /* Return true if X is a representation of the PIC register. This copes
10399 with calls from ix86_find_base_term, where the register might have
10400 been replaced by a cselib value. */
10401
10402 static bool
10403 ix86_pic_register_p (rtx x)
10404 {
10405 if (GET_CODE (x) == VALUE)
10406 return (pic_offset_table_rtx
10407 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
10408 else
10409 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
10410 }
10411
10412 /* In the name of slightly smaller debug output, and to cater to
10413 general assembler lossage, recognize PIC+GOTOFF and turn it back
10414 into a direct symbol reference.
10415
10416 On Darwin, this is necessary to avoid a crash, because Darwin
10417 has a different PIC label for each routine but the DWARF debugging
10418 information is not associated with any particular routine, so it's
10419 necessary to remove references to the PIC label from RTL stored by
10420 the DWARF output code. */
10421
10422 static rtx
10423 ix86_delegitimize_address (rtx orig_x)
10424 {
10425 rtx x = orig_x;
10426 /* reg_addend is NULL or a multiple of some register. */
10427 rtx reg_addend = NULL_RTX;
10428 /* const_addend is NULL or a const_int. */
10429 rtx const_addend = NULL_RTX;
10430 /* This is the result, or NULL. */
10431 rtx result = NULL_RTX;
10432
10433 if (MEM_P (x))
10434 x = XEXP (x, 0);
10435
10436 if (TARGET_64BIT)
10437 {
10438 if (GET_CODE (x) != CONST
10439 || GET_CODE (XEXP (x, 0)) != UNSPEC
10440 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
10441 || !MEM_P (orig_x))
10442 return orig_x;
10443 return XVECEXP (XEXP (x, 0), 0, 0);
10444 }
10445
10446 if (GET_CODE (x) != PLUS
10447 || GET_CODE (XEXP (x, 1)) != CONST)
10448 return orig_x;
10449
10450 if (ix86_pic_register_p (XEXP (x, 0)))
10451 /* %ebx + GOT/GOTOFF */
10452 ;
10453 else if (GET_CODE (XEXP (x, 0)) == PLUS)
10454 {
10455 /* %ebx + %reg * scale + GOT/GOTOFF */
10456 reg_addend = XEXP (x, 0);
10457 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
10458 reg_addend = XEXP (reg_addend, 1);
10459 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
10460 reg_addend = XEXP (reg_addend, 0);
10461 else
10462 return orig_x;
10463 if (!REG_P (reg_addend)
10464 && GET_CODE (reg_addend) != MULT
10465 && GET_CODE (reg_addend) != ASHIFT)
10466 return orig_x;
10467 }
10468 else
10469 return orig_x;
10470
10471 x = XEXP (XEXP (x, 1), 0);
10472 if (GET_CODE (x) == PLUS
10473 && CONST_INT_P (XEXP (x, 1)))
10474 {
10475 const_addend = XEXP (x, 1);
10476 x = XEXP (x, 0);
10477 }
10478
10479 if (GET_CODE (x) == UNSPEC
10480 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
10481 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
10482 result = XVECEXP (x, 0, 0);
10483
10484 if (TARGET_MACHO && darwin_local_data_pic (x)
10485 && !MEM_P (orig_x))
10486 result = XVECEXP (x, 0, 0);
10487
10488 if (! result)
10489 return orig_x;
10490
10491 if (const_addend)
10492 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
10493 if (reg_addend)
10494 result = gen_rtx_PLUS (Pmode, reg_addend, result);
10495 return result;
10496 }
10497
10498 /* If X is a machine specific address (i.e. a symbol or label being
10499 referenced as a displacement from the GOT implemented using an
10500 UNSPEC), then return the base term. Otherwise return X. */
10501
10502 rtx
10503 ix86_find_base_term (rtx x)
10504 {
10505 rtx term;
10506
10507 if (TARGET_64BIT)
10508 {
10509 if (GET_CODE (x) != CONST)
10510 return x;
10511 term = XEXP (x, 0);
10512 if (GET_CODE (term) == PLUS
10513 && (CONST_INT_P (XEXP (term, 1))
10514 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
10515 term = XEXP (term, 0);
10516 if (GET_CODE (term) != UNSPEC
10517 || XINT (term, 1) != UNSPEC_GOTPCREL)
10518 return x;
10519
10520 return XVECEXP (term, 0, 0);
10521 }
10522
10523 return ix86_delegitimize_address (x);
10524 }
10525 \f
10526 static void
10527 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
10528 int fp, FILE *file)
10529 {
10530 const char *suffix;
10531
10532 if (mode == CCFPmode || mode == CCFPUmode)
10533 {
10534 enum rtx_code second_code, bypass_code;
10535 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
10536 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
10537 code = ix86_fp_compare_code_to_integer (code);
10538 mode = CCmode;
10539 }
10540 if (reverse)
10541 code = reverse_condition (code);
10542
10543 switch (code)
10544 {
10545 case EQ:
10546 switch (mode)
10547 {
10548 case CCAmode:
10549 suffix = "a";
10550 break;
10551
10552 case CCCmode:
10553 suffix = "c";
10554 break;
10555
10556 case CCOmode:
10557 suffix = "o";
10558 break;
10559
10560 case CCSmode:
10561 suffix = "s";
10562 break;
10563
10564 default:
10565 suffix = "e";
10566 }
10567 break;
10568 case NE:
10569 switch (mode)
10570 {
10571 case CCAmode:
10572 suffix = "na";
10573 break;
10574
10575 case CCCmode:
10576 suffix = "nc";
10577 break;
10578
10579 case CCOmode:
10580 suffix = "no";
10581 break;
10582
10583 case CCSmode:
10584 suffix = "ns";
10585 break;
10586
10587 default:
10588 suffix = "ne";
10589 }
10590 break;
10591 case GT:
10592 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
10593 suffix = "g";
10594 break;
10595 case GTU:
10596 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
10597 Those same assemblers have the same but opposite lossage on cmov. */
10598 if (mode == CCmode)
10599 suffix = fp ? "nbe" : "a";
10600 else if (mode == CCCmode)
10601 suffix = "b";
10602 else
10603 gcc_unreachable ();
10604 break;
10605 case LT:
10606 switch (mode)
10607 {
10608 case CCNOmode:
10609 case CCGOCmode:
10610 suffix = "s";
10611 break;
10612
10613 case CCmode:
10614 case CCGCmode:
10615 suffix = "l";
10616 break;
10617
10618 default:
10619 gcc_unreachable ();
10620 }
10621 break;
10622 case LTU:
10623 gcc_assert (mode == CCmode || mode == CCCmode);
10624 suffix = "b";
10625 break;
10626 case GE:
10627 switch (mode)
10628 {
10629 case CCNOmode:
10630 case CCGOCmode:
10631 suffix = "ns";
10632 break;
10633
10634 case CCmode:
10635 case CCGCmode:
10636 suffix = "ge";
10637 break;
10638
10639 default:
10640 gcc_unreachable ();
10641 }
10642 break;
10643 case GEU:
10644 /* ??? As above. */
10645 gcc_assert (mode == CCmode || mode == CCCmode);
10646 suffix = fp ? "nb" : "ae";
10647 break;
10648 case LE:
10649 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
10650 suffix = "le";
10651 break;
10652 case LEU:
10653 /* ??? As above. */
10654 if (mode == CCmode)
10655 suffix = "be";
10656 else if (mode == CCCmode)
10657 suffix = fp ? "nb" : "ae";
10658 else
10659 gcc_unreachable ();
10660 break;
10661 case UNORDERED:
10662 suffix = fp ? "u" : "p";
10663 break;
10664 case ORDERED:
10665 suffix = fp ? "nu" : "np";
10666 break;
10667 default:
10668 gcc_unreachable ();
10669 }
10670 fputs (suffix, file);
10671 }
10672
10673 /* Print the name of register X to FILE based on its machine mode and number.
10674 If CODE is 'w', pretend the mode is HImode.
10675 If CODE is 'b', pretend the mode is QImode.
10676 If CODE is 'k', pretend the mode is SImode.
10677 If CODE is 'q', pretend the mode is DImode.
10678 If CODE is 'x', pretend the mode is V4SFmode.
10679 If CODE is 't', pretend the mode is V8SFmode.
10680 If CODE is 'h', pretend the reg is the 'high' byte register.
10681 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
10682 If CODE is 'd', duplicate the operand for AVX instruction.
10683 */
10684
10685 void
10686 print_reg (rtx x, int code, FILE *file)
10687 {
10688 const char *reg;
10689 bool duplicated = code == 'd' && TARGET_AVX;
10690
10691 gcc_assert (x == pc_rtx
10692 || (REGNO (x) != ARG_POINTER_REGNUM
10693 && REGNO (x) != FRAME_POINTER_REGNUM
10694 && REGNO (x) != FLAGS_REG
10695 && REGNO (x) != FPSR_REG
10696 && REGNO (x) != FPCR_REG));
10697
10698 if (ASSEMBLER_DIALECT == ASM_ATT)
10699 putc ('%', file);
10700
10701 if (x == pc_rtx)
10702 {
10703 gcc_assert (TARGET_64BIT);
10704 fputs ("rip", file);
10705 return;
10706 }
10707
10708 if (code == 'w' || MMX_REG_P (x))
10709 code = 2;
10710 else if (code == 'b')
10711 code = 1;
10712 else if (code == 'k')
10713 code = 4;
10714 else if (code == 'q')
10715 code = 8;
10716 else if (code == 'y')
10717 code = 3;
10718 else if (code == 'h')
10719 code = 0;
10720 else if (code == 'x')
10721 code = 16;
10722 else if (code == 't')
10723 code = 32;
10724 else
10725 code = GET_MODE_SIZE (GET_MODE (x));
10726
10727 /* Irritatingly, AMD extended registers use different naming convention
10728 from the normal registers. */
10729 if (REX_INT_REG_P (x))
10730 {
10731 gcc_assert (TARGET_64BIT);
10732 switch (code)
10733 {
10734 case 0:
10735 error ("extended registers have no high halves");
10736 break;
10737 case 1:
10738 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
10739 break;
10740 case 2:
10741 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
10742 break;
10743 case 4:
10744 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
10745 break;
10746 case 8:
10747 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
10748 break;
10749 default:
10750 error ("unsupported operand size for extended register");
10751 break;
10752 }
10753 return;
10754 }
10755
10756 reg = NULL;
10757 switch (code)
10758 {
10759 case 3:
10760 if (STACK_TOP_P (x))
10761 {
10762 reg = "st(0)";
10763 break;
10764 }
10765 /* FALLTHRU */
10766 case 8:
10767 case 4:
10768 case 12:
10769 if (! ANY_FP_REG_P (x))
10770 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
10771 /* FALLTHRU */
10772 case 16:
10773 case 2:
10774 normal:
10775 reg = hi_reg_name[REGNO (x)];
10776 break;
10777 case 1:
10778 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
10779 goto normal;
10780 reg = qi_reg_name[REGNO (x)];
10781 break;
10782 case 0:
10783 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
10784 goto normal;
10785 reg = qi_high_reg_name[REGNO (x)];
10786 break;
10787 case 32:
10788 if (SSE_REG_P (x))
10789 {
10790 gcc_assert (!duplicated);
10791 putc ('y', file);
10792 fputs (hi_reg_name[REGNO (x)] + 1, file);
10793 return;
10794 }
10795 break;
10796 default:
10797 gcc_unreachable ();
10798 }
10799
10800 fputs (reg, file);
10801 if (duplicated)
10802 {
10803 if (ASSEMBLER_DIALECT == ASM_ATT)
10804 fprintf (file, ", %%%s", reg);
10805 else
10806 fprintf (file, ", %s", reg);
10807 }
10808 }
10809
10810 /* Locate some local-dynamic symbol still in use by this function
10811 so that we can print its name in some tls_local_dynamic_base
10812 pattern. */
10813
10814 static int
10815 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
10816 {
10817 rtx x = *px;
10818
10819 if (GET_CODE (x) == SYMBOL_REF
10820 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
10821 {
10822 cfun->machine->some_ld_name = XSTR (x, 0);
10823 return 1;
10824 }
10825
10826 return 0;
10827 }
10828
10829 static const char *
10830 get_some_local_dynamic_name (void)
10831 {
10832 rtx insn;
10833
10834 if (cfun->machine->some_ld_name)
10835 return cfun->machine->some_ld_name;
10836
10837 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
10838 if (INSN_P (insn)
10839 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
10840 return cfun->machine->some_ld_name;
10841
10842 gcc_unreachable ();
10843 }
10844
10845 /* Meaning of CODE:
10846 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
10847 C -- print opcode suffix for set/cmov insn.
10848 c -- like C, but print reversed condition
10849 E,e -- likewise, but for compare-and-branch fused insn.
10850 F,f -- likewise, but for floating-point.
10851 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
10852 otherwise nothing
10853 R -- print the prefix for register names.
10854 z -- print the opcode suffix for the size of the current operand.
10855 Z -- likewise, with special suffixes for x87 instructions.
10856 * -- print a star (in certain assembler syntax)
10857 A -- print an absolute memory reference.
10858 w -- print the operand as if it's a "word" (HImode) even if it isn't.
10859 s -- print a shift double count, followed by the assemblers argument
10860 delimiter.
10861 b -- print the QImode name of the register for the indicated operand.
10862 %b0 would print %al if operands[0] is reg 0.
10863 w -- likewise, print the HImode name of the register.
10864 k -- likewise, print the SImode name of the register.
10865 q -- likewise, print the DImode name of the register.
10866 x -- likewise, print the V4SFmode name of the register.
10867 t -- likewise, print the V8SFmode name of the register.
10868 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
10869 y -- print "st(0)" instead of "st" as a register.
10870 d -- print duplicated register operand for AVX instruction.
10871 D -- print condition for SSE cmp instruction.
10872 P -- if PIC, print an @PLT suffix.
10873 X -- don't print any sort of PIC '@' suffix for a symbol.
10874 & -- print some in-use local-dynamic symbol name.
10875 H -- print a memory address offset by 8; used for sse high-parts
10876 Y -- print condition for SSE5 com* instruction.
10877 + -- print a branch hint as 'cs' or 'ds' prefix
10878 ; -- print a semicolon (after prefixes due to bug in older gas).
10879 */
10880
10881 void
10882 print_operand (FILE *file, rtx x, int code)
10883 {
10884 if (code)
10885 {
10886 switch (code)
10887 {
10888 case '*':
10889 if (ASSEMBLER_DIALECT == ASM_ATT)
10890 putc ('*', file);
10891 return;
10892
10893 case '&':
10894 assemble_name (file, get_some_local_dynamic_name ());
10895 return;
10896
10897 case 'A':
10898 switch (ASSEMBLER_DIALECT)
10899 {
10900 case ASM_ATT:
10901 putc ('*', file);
10902 break;
10903
10904 case ASM_INTEL:
10905 /* Intel syntax. For absolute addresses, registers should not
10906 be surrounded by braces. */
10907 if (!REG_P (x))
10908 {
10909 putc ('[', file);
10910 PRINT_OPERAND (file, x, 0);
10911 putc (']', file);
10912 return;
10913 }
10914 break;
10915
10916 default:
10917 gcc_unreachable ();
10918 }
10919
10920 PRINT_OPERAND (file, x, 0);
10921 return;
10922
10923
10924 case 'L':
10925 if (ASSEMBLER_DIALECT == ASM_ATT)
10926 putc ('l', file);
10927 return;
10928
10929 case 'W':
10930 if (ASSEMBLER_DIALECT == ASM_ATT)
10931 putc ('w', file);
10932 return;
10933
10934 case 'B':
10935 if (ASSEMBLER_DIALECT == ASM_ATT)
10936 putc ('b', file);
10937 return;
10938
10939 case 'Q':
10940 if (ASSEMBLER_DIALECT == ASM_ATT)
10941 putc ('l', file);
10942 return;
10943
10944 case 'S':
10945 if (ASSEMBLER_DIALECT == ASM_ATT)
10946 putc ('s', file);
10947 return;
10948
10949 case 'T':
10950 if (ASSEMBLER_DIALECT == ASM_ATT)
10951 putc ('t', file);
10952 return;
10953
10954 case 'z':
10955 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
10956 {
10957 /* Opcodes don't get size suffixes if using Intel opcodes. */
10958 if (ASSEMBLER_DIALECT == ASM_INTEL)
10959 return;
10960
10961 switch (GET_MODE_SIZE (GET_MODE (x)))
10962 {
10963 case 1:
10964 putc ('b', file);
10965 return;
10966
10967 case 2:
10968 putc ('w', file);
10969 return;
10970
10971 case 4:
10972 putc ('l', file);
10973 return;
10974
10975 case 8:
10976 putc ('q', file);
10977 return;
10978
10979 default:
10980 output_operand_lossage
10981 ("invalid operand size for operand code '%c'", code);
10982 return;
10983 }
10984 }
10985
10986 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
10987 warning
10988 (0, "non-integer operand used with operand code '%c'", code);
10989 /* FALLTHRU */
10990
10991 case 'Z':
10992 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
10993 if (ASSEMBLER_DIALECT == ASM_INTEL)
10994 return;
10995
10996 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
10997 {
10998 switch (GET_MODE_SIZE (GET_MODE (x)))
10999 {
11000 case 2:
11001 #ifdef HAVE_AS_IX86_FILDS
11002 putc ('s', file);
11003 #endif
11004 return;
11005
11006 case 4:
11007 putc ('l', file);
11008 return;
11009
11010 case 8:
11011 #ifdef HAVE_AS_IX86_FILDQ
11012 putc ('q', file);
11013 #else
11014 fputs ("ll", file);
11015 #endif
11016 return;
11017
11018 default:
11019 break;
11020 }
11021 }
11022 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11023 {
11024 /* 387 opcodes don't get size suffixes
11025 if the operands are registers. */
11026 if (STACK_REG_P (x))
11027 return;
11028
11029 switch (GET_MODE_SIZE (GET_MODE (x)))
11030 {
11031 case 4:
11032 putc ('s', file);
11033 return;
11034
11035 case 8:
11036 putc ('l', file);
11037 return;
11038
11039 case 12:
11040 case 16:
11041 putc ('t', file);
11042 return;
11043
11044 default:
11045 break;
11046 }
11047 }
11048 else
11049 {
11050 output_operand_lossage
11051 ("invalid operand type used with operand code '%c'", code);
11052 return;
11053 }
11054
11055 output_operand_lossage
11056 ("invalid operand size for operand code '%c'", code);
11057 return;
11058
11059 case 'd':
11060 case 'b':
11061 case 'w':
11062 case 'k':
11063 case 'q':
11064 case 'h':
11065 case 't':
11066 case 'y':
11067 case 'x':
11068 case 'X':
11069 case 'P':
11070 break;
11071
11072 case 's':
11073 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
11074 {
11075 PRINT_OPERAND (file, x, 0);
11076 fputs (", ", file);
11077 }
11078 return;
11079
11080 case 'D':
11081 /* Little bit of braindamage here. The SSE compare instructions
11082 does use completely different names for the comparisons that the
11083 fp conditional moves. */
11084 if (TARGET_AVX)
11085 {
11086 switch (GET_CODE (x))
11087 {
11088 case EQ:
11089 fputs ("eq", file);
11090 break;
11091 case UNEQ:
11092 fputs ("eq_us", file);
11093 break;
11094 case LT:
11095 fputs ("lt", file);
11096 break;
11097 case UNLT:
11098 fputs ("nge", file);
11099 break;
11100 case LE:
11101 fputs ("le", file);
11102 break;
11103 case UNLE:
11104 fputs ("ngt", file);
11105 break;
11106 case UNORDERED:
11107 fputs ("unord", file);
11108 break;
11109 case NE:
11110 fputs ("neq", file);
11111 break;
11112 case LTGT:
11113 fputs ("neq_oq", file);
11114 break;
11115 case GE:
11116 fputs ("ge", file);
11117 break;
11118 case UNGE:
11119 fputs ("nlt", file);
11120 break;
11121 case GT:
11122 fputs ("gt", file);
11123 break;
11124 case UNGT:
11125 fputs ("nle", file);
11126 break;
11127 case ORDERED:
11128 fputs ("ord", file);
11129 break;
11130 default:
11131 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11132 return;
11133 }
11134 }
11135 else
11136 {
11137 switch (GET_CODE (x))
11138 {
11139 case EQ:
11140 case UNEQ:
11141 fputs ("eq", file);
11142 break;
11143 case LT:
11144 case UNLT:
11145 fputs ("lt", file);
11146 break;
11147 case LE:
11148 case UNLE:
11149 fputs ("le", file);
11150 break;
11151 case UNORDERED:
11152 fputs ("unord", file);
11153 break;
11154 case NE:
11155 case LTGT:
11156 fputs ("neq", file);
11157 break;
11158 case UNGE:
11159 case GE:
11160 fputs ("nlt", file);
11161 break;
11162 case UNGT:
11163 case GT:
11164 fputs ("nle", file);
11165 break;
11166 case ORDERED:
11167 fputs ("ord", file);
11168 break;
11169 default:
11170 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11171 return;
11172 }
11173 }
11174 return;
11175 case 'O':
11176 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11177 if (ASSEMBLER_DIALECT == ASM_ATT)
11178 {
11179 switch (GET_MODE (x))
11180 {
11181 case HImode: putc ('w', file); break;
11182 case SImode:
11183 case SFmode: putc ('l', file); break;
11184 case DImode:
11185 case DFmode: putc ('q', file); break;
11186 default: gcc_unreachable ();
11187 }
11188 putc ('.', file);
11189 }
11190 #endif
11191 return;
11192 case 'C':
11193 if (!COMPARISON_P (x))
11194 {
11195 output_operand_lossage ("operand is neither a constant nor a "
11196 "condition code, invalid operand code "
11197 "'C'");
11198 return;
11199 }
11200 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
11201 return;
11202 case 'F':
11203 if (!COMPARISON_P (x))
11204 {
11205 output_operand_lossage ("operand is neither a constant nor a "
11206 "condition code, invalid operand code "
11207 "'F'");
11208 return;
11209 }
11210 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11211 if (ASSEMBLER_DIALECT == ASM_ATT)
11212 putc ('.', file);
11213 #endif
11214 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
11215 return;
11216
11217 /* Like above, but reverse condition */
11218 case 'c':
11219 /* Check to see if argument to %c is really a constant
11220 and not a condition code which needs to be reversed. */
11221 if (!COMPARISON_P (x))
11222 {
11223 output_operand_lossage ("operand is neither a constant nor a "
11224 "condition code, invalid operand "
11225 "code 'c'");
11226 return;
11227 }
11228 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
11229 return;
11230 case 'f':
11231 if (!COMPARISON_P (x))
11232 {
11233 output_operand_lossage ("operand is neither a constant nor a "
11234 "condition code, invalid operand "
11235 "code 'f'");
11236 return;
11237 }
11238 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11239 if (ASSEMBLER_DIALECT == ASM_ATT)
11240 putc ('.', file);
11241 #endif
11242 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
11243 return;
11244
11245 case 'E':
11246 put_condition_code (GET_CODE (x), CCmode, 0, 0, file);
11247 return;
11248
11249 case 'e':
11250 put_condition_code (GET_CODE (x), CCmode, 1, 0, file);
11251 return;
11252
11253 case 'H':
11254 /* It doesn't actually matter what mode we use here, as we're
11255 only going to use this for printing. */
11256 x = adjust_address_nv (x, DImode, 8);
11257 break;
11258
11259 case '+':
11260 {
11261 rtx x;
11262
11263 if (!optimize
11264 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
11265 return;
11266
11267 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
11268 if (x)
11269 {
11270 int pred_val = INTVAL (XEXP (x, 0));
11271
11272 if (pred_val < REG_BR_PROB_BASE * 45 / 100
11273 || pred_val > REG_BR_PROB_BASE * 55 / 100)
11274 {
11275 int taken = pred_val > REG_BR_PROB_BASE / 2;
11276 int cputaken = final_forward_branch_p (current_output_insn) == 0;
11277
11278 /* Emit hints only in the case default branch prediction
11279 heuristics would fail. */
11280 if (taken != cputaken)
11281 {
11282 /* We use 3e (DS) prefix for taken branches and
11283 2e (CS) prefix for not taken branches. */
11284 if (taken)
11285 fputs ("ds ; ", file);
11286 else
11287 fputs ("cs ; ", file);
11288 }
11289 }
11290 }
11291 return;
11292 }
11293
11294 case 'Y':
11295 switch (GET_CODE (x))
11296 {
11297 case NE:
11298 fputs ("neq", file);
11299 break;
11300 case EQ:
11301 fputs ("eq", file);
11302 break;
11303 case GE:
11304 case GEU:
11305 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
11306 break;
11307 case GT:
11308 case GTU:
11309 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
11310 break;
11311 case LE:
11312 case LEU:
11313 fputs ("le", file);
11314 break;
11315 case LT:
11316 case LTU:
11317 fputs ("lt", file);
11318 break;
11319 case UNORDERED:
11320 fputs ("unord", file);
11321 break;
11322 case ORDERED:
11323 fputs ("ord", file);
11324 break;
11325 case UNEQ:
11326 fputs ("ueq", file);
11327 break;
11328 case UNGE:
11329 fputs ("nlt", file);
11330 break;
11331 case UNGT:
11332 fputs ("nle", file);
11333 break;
11334 case UNLE:
11335 fputs ("ule", file);
11336 break;
11337 case UNLT:
11338 fputs ("ult", file);
11339 break;
11340 case LTGT:
11341 fputs ("une", file);
11342 break;
11343 default:
11344 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11345 return;
11346 }
11347 return;
11348
11349 case ';':
11350 #if TARGET_MACHO
11351 fputs (" ; ", file);
11352 #else
11353 fputc (' ', file);
11354 #endif
11355 return;
11356
11357 default:
11358 output_operand_lossage ("invalid operand code '%c'", code);
11359 }
11360 }
11361
11362 if (REG_P (x))
11363 print_reg (x, code, file);
11364
11365 else if (MEM_P (x))
11366 {
11367 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
11368 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
11369 && GET_MODE (x) != BLKmode)
11370 {
11371 const char * size;
11372 switch (GET_MODE_SIZE (GET_MODE (x)))
11373 {
11374 case 1: size = "BYTE"; break;
11375 case 2: size = "WORD"; break;
11376 case 4: size = "DWORD"; break;
11377 case 8: size = "QWORD"; break;
11378 case 12: size = "XWORD"; break;
11379 case 16:
11380 if (GET_MODE (x) == XFmode)
11381 size = "XWORD";
11382 else
11383 size = "XMMWORD";
11384 break;
11385 default:
11386 gcc_unreachable ();
11387 }
11388
11389 /* Check for explicit size override (codes 'b', 'w' and 'k') */
11390 if (code == 'b')
11391 size = "BYTE";
11392 else if (code == 'w')
11393 size = "WORD";
11394 else if (code == 'k')
11395 size = "DWORD";
11396
11397 fputs (size, file);
11398 fputs (" PTR ", file);
11399 }
11400
11401 x = XEXP (x, 0);
11402 /* Avoid (%rip) for call operands. */
11403 if (CONSTANT_ADDRESS_P (x) && code == 'P'
11404 && !CONST_INT_P (x))
11405 output_addr_const (file, x);
11406 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
11407 output_operand_lossage ("invalid constraints for operand");
11408 else
11409 output_address (x);
11410 }
11411
11412 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
11413 {
11414 REAL_VALUE_TYPE r;
11415 long l;
11416
11417 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11418 REAL_VALUE_TO_TARGET_SINGLE (r, l);
11419
11420 if (ASSEMBLER_DIALECT == ASM_ATT)
11421 putc ('$', file);
11422 fprintf (file, "0x%08lx", (long unsigned int) l);
11423 }
11424
11425 /* These float cases don't actually occur as immediate operands. */
11426 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
11427 {
11428 char dstr[30];
11429
11430 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11431 fprintf (file, "%s", dstr);
11432 }
11433
11434 else if (GET_CODE (x) == CONST_DOUBLE
11435 && GET_MODE (x) == XFmode)
11436 {
11437 char dstr[30];
11438
11439 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11440 fprintf (file, "%s", dstr);
11441 }
11442
11443 else
11444 {
11445 /* We have patterns that allow zero sets of memory, for instance.
11446 In 64-bit mode, we should probably support all 8-byte vectors,
11447 since we can in fact encode that into an immediate. */
11448 if (GET_CODE (x) == CONST_VECTOR)
11449 {
11450 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
11451 x = const0_rtx;
11452 }
11453
11454 if (code != 'P')
11455 {
11456 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
11457 {
11458 if (ASSEMBLER_DIALECT == ASM_ATT)
11459 putc ('$', file);
11460 }
11461 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
11462 || GET_CODE (x) == LABEL_REF)
11463 {
11464 if (ASSEMBLER_DIALECT == ASM_ATT)
11465 putc ('$', file);
11466 else
11467 fputs ("OFFSET FLAT:", file);
11468 }
11469 }
11470 if (CONST_INT_P (x))
11471 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11472 else if (flag_pic)
11473 output_pic_addr_const (file, x, code);
11474 else
11475 output_addr_const (file, x);
11476 }
11477 }
11478 \f
11479 /* Print a memory operand whose address is ADDR. */
11480
11481 void
11482 print_operand_address (FILE *file, rtx addr)
11483 {
11484 struct ix86_address parts;
11485 rtx base, index, disp;
11486 int scale;
11487 int ok = ix86_decompose_address (addr, &parts);
11488
11489 gcc_assert (ok);
11490
11491 base = parts.base;
11492 index = parts.index;
11493 disp = parts.disp;
11494 scale = parts.scale;
11495
11496 switch (parts.seg)
11497 {
11498 case SEG_DEFAULT:
11499 break;
11500 case SEG_FS:
11501 case SEG_GS:
11502 if (ASSEMBLER_DIALECT == ASM_ATT)
11503 putc ('%', file);
11504 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
11505 break;
11506 default:
11507 gcc_unreachable ();
11508 }
11509
11510 /* Use one byte shorter RIP relative addressing for 64bit mode. */
11511 if (TARGET_64BIT && !base && !index)
11512 {
11513 rtx symbol = disp;
11514
11515 if (GET_CODE (disp) == CONST
11516 && GET_CODE (XEXP (disp, 0)) == PLUS
11517 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11518 symbol = XEXP (XEXP (disp, 0), 0);
11519
11520 if (GET_CODE (symbol) == LABEL_REF
11521 || (GET_CODE (symbol) == SYMBOL_REF
11522 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
11523 base = pc_rtx;
11524 }
11525 if (!base && !index)
11526 {
11527 /* Displacement only requires special attention. */
11528
11529 if (CONST_INT_P (disp))
11530 {
11531 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
11532 fputs ("ds:", file);
11533 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
11534 }
11535 else if (flag_pic)
11536 output_pic_addr_const (file, disp, 0);
11537 else
11538 output_addr_const (file, disp);
11539 }
11540 else
11541 {
11542 if (ASSEMBLER_DIALECT == ASM_ATT)
11543 {
11544 if (disp)
11545 {
11546 if (flag_pic)
11547 output_pic_addr_const (file, disp, 0);
11548 else if (GET_CODE (disp) == LABEL_REF)
11549 output_asm_label (disp);
11550 else
11551 output_addr_const (file, disp);
11552 }
11553
11554 putc ('(', file);
11555 if (base)
11556 print_reg (base, 0, file);
11557 if (index)
11558 {
11559 putc (',', file);
11560 print_reg (index, 0, file);
11561 if (scale != 1)
11562 fprintf (file, ",%d", scale);
11563 }
11564 putc (')', file);
11565 }
11566 else
11567 {
11568 rtx offset = NULL_RTX;
11569
11570 if (disp)
11571 {
11572 /* Pull out the offset of a symbol; print any symbol itself. */
11573 if (GET_CODE (disp) == CONST
11574 && GET_CODE (XEXP (disp, 0)) == PLUS
11575 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11576 {
11577 offset = XEXP (XEXP (disp, 0), 1);
11578 disp = gen_rtx_CONST (VOIDmode,
11579 XEXP (XEXP (disp, 0), 0));
11580 }
11581
11582 if (flag_pic)
11583 output_pic_addr_const (file, disp, 0);
11584 else if (GET_CODE (disp) == LABEL_REF)
11585 output_asm_label (disp);
11586 else if (CONST_INT_P (disp))
11587 offset = disp;
11588 else
11589 output_addr_const (file, disp);
11590 }
11591
11592 putc ('[', file);
11593 if (base)
11594 {
11595 print_reg (base, 0, file);
11596 if (offset)
11597 {
11598 if (INTVAL (offset) >= 0)
11599 putc ('+', file);
11600 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11601 }
11602 }
11603 else if (offset)
11604 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11605 else
11606 putc ('0', file);
11607
11608 if (index)
11609 {
11610 putc ('+', file);
11611 print_reg (index, 0, file);
11612 if (scale != 1)
11613 fprintf (file, "*%d", scale);
11614 }
11615 putc (']', file);
11616 }
11617 }
11618 }
11619
11620 bool
11621 output_addr_const_extra (FILE *file, rtx x)
11622 {
11623 rtx op;
11624
11625 if (GET_CODE (x) != UNSPEC)
11626 return false;
11627
11628 op = XVECEXP (x, 0, 0);
11629 switch (XINT (x, 1))
11630 {
11631 case UNSPEC_GOTTPOFF:
11632 output_addr_const (file, op);
11633 /* FIXME: This might be @TPOFF in Sun ld. */
11634 fputs ("@GOTTPOFF", file);
11635 break;
11636 case UNSPEC_TPOFF:
11637 output_addr_const (file, op);
11638 fputs ("@TPOFF", file);
11639 break;
11640 case UNSPEC_NTPOFF:
11641 output_addr_const (file, op);
11642 if (TARGET_64BIT)
11643 fputs ("@TPOFF", file);
11644 else
11645 fputs ("@NTPOFF", file);
11646 break;
11647 case UNSPEC_DTPOFF:
11648 output_addr_const (file, op);
11649 fputs ("@DTPOFF", file);
11650 break;
11651 case UNSPEC_GOTNTPOFF:
11652 output_addr_const (file, op);
11653 if (TARGET_64BIT)
11654 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11655 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
11656 else
11657 fputs ("@GOTNTPOFF", file);
11658 break;
11659 case UNSPEC_INDNTPOFF:
11660 output_addr_const (file, op);
11661 fputs ("@INDNTPOFF", file);
11662 break;
11663 #if TARGET_MACHO
11664 case UNSPEC_MACHOPIC_OFFSET:
11665 output_addr_const (file, op);
11666 putc ('-', file);
11667 machopic_output_function_base_name (file);
11668 break;
11669 #endif
11670
11671 default:
11672 return false;
11673 }
11674
11675 return true;
11676 }
11677 \f
11678 /* Split one or more DImode RTL references into pairs of SImode
11679 references. The RTL can be REG, offsettable MEM, integer constant, or
11680 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11681 split and "num" is its length. lo_half and hi_half are output arrays
11682 that parallel "operands". */
11683
11684 void
11685 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11686 {
11687 while (num--)
11688 {
11689 rtx op = operands[num];
11690
11691 /* simplify_subreg refuse to split volatile memory addresses,
11692 but we still have to handle it. */
11693 if (MEM_P (op))
11694 {
11695 lo_half[num] = adjust_address (op, SImode, 0);
11696 hi_half[num] = adjust_address (op, SImode, 4);
11697 }
11698 else
11699 {
11700 lo_half[num] = simplify_gen_subreg (SImode, op,
11701 GET_MODE (op) == VOIDmode
11702 ? DImode : GET_MODE (op), 0);
11703 hi_half[num] = simplify_gen_subreg (SImode, op,
11704 GET_MODE (op) == VOIDmode
11705 ? DImode : GET_MODE (op), 4);
11706 }
11707 }
11708 }
11709 /* Split one or more TImode RTL references into pairs of DImode
11710 references. The RTL can be REG, offsettable MEM, integer constant, or
11711 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11712 split and "num" is its length. lo_half and hi_half are output arrays
11713 that parallel "operands". */
11714
11715 void
11716 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11717 {
11718 while (num--)
11719 {
11720 rtx op = operands[num];
11721
11722 /* simplify_subreg refuse to split volatile memory addresses, but we
11723 still have to handle it. */
11724 if (MEM_P (op))
11725 {
11726 lo_half[num] = adjust_address (op, DImode, 0);
11727 hi_half[num] = adjust_address (op, DImode, 8);
11728 }
11729 else
11730 {
11731 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
11732 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
11733 }
11734 }
11735 }
11736 \f
11737 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
11738 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
11739 is the expression of the binary operation. The output may either be
11740 emitted here, or returned to the caller, like all output_* functions.
11741
11742 There is no guarantee that the operands are the same mode, as they
11743 might be within FLOAT or FLOAT_EXTEND expressions. */
11744
11745 #ifndef SYSV386_COMPAT
11746 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
11747 wants to fix the assemblers because that causes incompatibility
11748 with gcc. No-one wants to fix gcc because that causes
11749 incompatibility with assemblers... You can use the option of
11750 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
11751 #define SYSV386_COMPAT 1
11752 #endif
11753
11754 const char *
11755 output_387_binary_op (rtx insn, rtx *operands)
11756 {
11757 static char buf[40];
11758 const char *p;
11759 const char *ssep;
11760 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
11761
11762 #ifdef ENABLE_CHECKING
11763 /* Even if we do not want to check the inputs, this documents input
11764 constraints. Which helps in understanding the following code. */
11765 if (STACK_REG_P (operands[0])
11766 && ((REG_P (operands[1])
11767 && REGNO (operands[0]) == REGNO (operands[1])
11768 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
11769 || (REG_P (operands[2])
11770 && REGNO (operands[0]) == REGNO (operands[2])
11771 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
11772 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
11773 ; /* ok */
11774 else
11775 gcc_assert (is_sse);
11776 #endif
11777
11778 switch (GET_CODE (operands[3]))
11779 {
11780 case PLUS:
11781 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11782 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11783 p = "fiadd";
11784 else
11785 p = "fadd";
11786 ssep = "vadd";
11787 break;
11788
11789 case MINUS:
11790 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11791 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11792 p = "fisub";
11793 else
11794 p = "fsub";
11795 ssep = "vsub";
11796 break;
11797
11798 case MULT:
11799 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11800 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11801 p = "fimul";
11802 else
11803 p = "fmul";
11804 ssep = "vmul";
11805 break;
11806
11807 case DIV:
11808 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11809 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11810 p = "fidiv";
11811 else
11812 p = "fdiv";
11813 ssep = "vdiv";
11814 break;
11815
11816 default:
11817 gcc_unreachable ();
11818 }
11819
11820 if (is_sse)
11821 {
11822 if (TARGET_AVX)
11823 {
11824 strcpy (buf, ssep);
11825 if (GET_MODE (operands[0]) == SFmode)
11826 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
11827 else
11828 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
11829 }
11830 else
11831 {
11832 strcpy (buf, ssep + 1);
11833 if (GET_MODE (operands[0]) == SFmode)
11834 strcat (buf, "ss\t{%2, %0|%0, %2}");
11835 else
11836 strcat (buf, "sd\t{%2, %0|%0, %2}");
11837 }
11838 return buf;
11839 }
11840 strcpy (buf, p);
11841
11842 switch (GET_CODE (operands[3]))
11843 {
11844 case MULT:
11845 case PLUS:
11846 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
11847 {
11848 rtx temp = operands[2];
11849 operands[2] = operands[1];
11850 operands[1] = temp;
11851 }
11852
11853 /* know operands[0] == operands[1]. */
11854
11855 if (MEM_P (operands[2]))
11856 {
11857 p = "%Z2\t%2";
11858 break;
11859 }
11860
11861 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11862 {
11863 if (STACK_TOP_P (operands[0]))
11864 /* How is it that we are storing to a dead operand[2]?
11865 Well, presumably operands[1] is dead too. We can't
11866 store the result to st(0) as st(0) gets popped on this
11867 instruction. Instead store to operands[2] (which I
11868 think has to be st(1)). st(1) will be popped later.
11869 gcc <= 2.8.1 didn't have this check and generated
11870 assembly code that the Unixware assembler rejected. */
11871 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
11872 else
11873 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
11874 break;
11875 }
11876
11877 if (STACK_TOP_P (operands[0]))
11878 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
11879 else
11880 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
11881 break;
11882
11883 case MINUS:
11884 case DIV:
11885 if (MEM_P (operands[1]))
11886 {
11887 p = "r%Z1\t%1";
11888 break;
11889 }
11890
11891 if (MEM_P (operands[2]))
11892 {
11893 p = "%Z2\t%2";
11894 break;
11895 }
11896
11897 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11898 {
11899 #if SYSV386_COMPAT
11900 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
11901 derived assemblers, confusingly reverse the direction of
11902 the operation for fsub{r} and fdiv{r} when the
11903 destination register is not st(0). The Intel assembler
11904 doesn't have this brain damage. Read !SYSV386_COMPAT to
11905 figure out what the hardware really does. */
11906 if (STACK_TOP_P (operands[0]))
11907 p = "{p\t%0, %2|rp\t%2, %0}";
11908 else
11909 p = "{rp\t%2, %0|p\t%0, %2}";
11910 #else
11911 if (STACK_TOP_P (operands[0]))
11912 /* As above for fmul/fadd, we can't store to st(0). */
11913 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
11914 else
11915 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
11916 #endif
11917 break;
11918 }
11919
11920 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
11921 {
11922 #if SYSV386_COMPAT
11923 if (STACK_TOP_P (operands[0]))
11924 p = "{rp\t%0, %1|p\t%1, %0}";
11925 else
11926 p = "{p\t%1, %0|rp\t%0, %1}";
11927 #else
11928 if (STACK_TOP_P (operands[0]))
11929 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
11930 else
11931 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
11932 #endif
11933 break;
11934 }
11935
11936 if (STACK_TOP_P (operands[0]))
11937 {
11938 if (STACK_TOP_P (operands[1]))
11939 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
11940 else
11941 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
11942 break;
11943 }
11944 else if (STACK_TOP_P (operands[1]))
11945 {
11946 #if SYSV386_COMPAT
11947 p = "{\t%1, %0|r\t%0, %1}";
11948 #else
11949 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
11950 #endif
11951 }
11952 else
11953 {
11954 #if SYSV386_COMPAT
11955 p = "{r\t%2, %0|\t%0, %2}";
11956 #else
11957 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
11958 #endif
11959 }
11960 break;
11961
11962 default:
11963 gcc_unreachable ();
11964 }
11965
11966 strcat (buf, p);
11967 return buf;
11968 }
11969
11970 /* Return needed mode for entity in optimize_mode_switching pass. */
11971
11972 int
11973 ix86_mode_needed (int entity, rtx insn)
11974 {
11975 enum attr_i387_cw mode;
11976
11977 /* The mode UNINITIALIZED is used to store control word after a
11978 function call or ASM pattern. The mode ANY specify that function
11979 has no requirements on the control word and make no changes in the
11980 bits we are interested in. */
11981
11982 if (CALL_P (insn)
11983 || (NONJUMP_INSN_P (insn)
11984 && (asm_noperands (PATTERN (insn)) >= 0
11985 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
11986 return I387_CW_UNINITIALIZED;
11987
11988 if (recog_memoized (insn) < 0)
11989 return I387_CW_ANY;
11990
11991 mode = get_attr_i387_cw (insn);
11992
11993 switch (entity)
11994 {
11995 case I387_TRUNC:
11996 if (mode == I387_CW_TRUNC)
11997 return mode;
11998 break;
11999
12000 case I387_FLOOR:
12001 if (mode == I387_CW_FLOOR)
12002 return mode;
12003 break;
12004
12005 case I387_CEIL:
12006 if (mode == I387_CW_CEIL)
12007 return mode;
12008 break;
12009
12010 case I387_MASK_PM:
12011 if (mode == I387_CW_MASK_PM)
12012 return mode;
12013 break;
12014
12015 default:
12016 gcc_unreachable ();
12017 }
12018
12019 return I387_CW_ANY;
12020 }
12021
12022 /* Output code to initialize control word copies used by trunc?f?i and
12023 rounding patterns. CURRENT_MODE is set to current control word,
12024 while NEW_MODE is set to new control word. */
12025
12026 void
12027 emit_i387_cw_initialization (int mode)
12028 {
12029 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
12030 rtx new_mode;
12031
12032 enum ix86_stack_slot slot;
12033
12034 rtx reg = gen_reg_rtx (HImode);
12035
12036 emit_insn (gen_x86_fnstcw_1 (stored_mode));
12037 emit_move_insn (reg, copy_rtx (stored_mode));
12038
12039 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
12040 || optimize_function_for_size_p (cfun))
12041 {
12042 switch (mode)
12043 {
12044 case I387_CW_TRUNC:
12045 /* round toward zero (truncate) */
12046 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
12047 slot = SLOT_CW_TRUNC;
12048 break;
12049
12050 case I387_CW_FLOOR:
12051 /* round down toward -oo */
12052 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12053 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
12054 slot = SLOT_CW_FLOOR;
12055 break;
12056
12057 case I387_CW_CEIL:
12058 /* round up toward +oo */
12059 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12060 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
12061 slot = SLOT_CW_CEIL;
12062 break;
12063
12064 case I387_CW_MASK_PM:
12065 /* mask precision exception for nearbyint() */
12066 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12067 slot = SLOT_CW_MASK_PM;
12068 break;
12069
12070 default:
12071 gcc_unreachable ();
12072 }
12073 }
12074 else
12075 {
12076 switch (mode)
12077 {
12078 case I387_CW_TRUNC:
12079 /* round toward zero (truncate) */
12080 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
12081 slot = SLOT_CW_TRUNC;
12082 break;
12083
12084 case I387_CW_FLOOR:
12085 /* round down toward -oo */
12086 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
12087 slot = SLOT_CW_FLOOR;
12088 break;
12089
12090 case I387_CW_CEIL:
12091 /* round up toward +oo */
12092 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
12093 slot = SLOT_CW_CEIL;
12094 break;
12095
12096 case I387_CW_MASK_PM:
12097 /* mask precision exception for nearbyint() */
12098 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12099 slot = SLOT_CW_MASK_PM;
12100 break;
12101
12102 default:
12103 gcc_unreachable ();
12104 }
12105 }
12106
12107 gcc_assert (slot < MAX_386_STACK_LOCALS);
12108
12109 new_mode = assign_386_stack_local (HImode, slot);
12110 emit_move_insn (new_mode, reg);
12111 }
12112
12113 /* Output code for INSN to convert a float to a signed int. OPERANDS
12114 are the insn operands. The output may be [HSD]Imode and the input
12115 operand may be [SDX]Fmode. */
12116
12117 const char *
12118 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
12119 {
12120 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12121 int dimode_p = GET_MODE (operands[0]) == DImode;
12122 int round_mode = get_attr_i387_cw (insn);
12123
12124 /* Jump through a hoop or two for DImode, since the hardware has no
12125 non-popping instruction. We used to do this a different way, but
12126 that was somewhat fragile and broke with post-reload splitters. */
12127 if ((dimode_p || fisttp) && !stack_top_dies)
12128 output_asm_insn ("fld\t%y1", operands);
12129
12130 gcc_assert (STACK_TOP_P (operands[1]));
12131 gcc_assert (MEM_P (operands[0]));
12132 gcc_assert (GET_MODE (operands[1]) != TFmode);
12133
12134 if (fisttp)
12135 output_asm_insn ("fisttp%Z0\t%0", operands);
12136 else
12137 {
12138 if (round_mode != I387_CW_ANY)
12139 output_asm_insn ("fldcw\t%3", operands);
12140 if (stack_top_dies || dimode_p)
12141 output_asm_insn ("fistp%Z0\t%0", operands);
12142 else
12143 output_asm_insn ("fist%Z0\t%0", operands);
12144 if (round_mode != I387_CW_ANY)
12145 output_asm_insn ("fldcw\t%2", operands);
12146 }
12147
12148 return "";
12149 }
12150
12151 /* Output code for x87 ffreep insn. The OPNO argument, which may only
12152 have the values zero or one, indicates the ffreep insn's operand
12153 from the OPERANDS array. */
12154
12155 static const char *
12156 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
12157 {
12158 if (TARGET_USE_FFREEP)
12159 #if HAVE_AS_IX86_FFREEP
12160 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
12161 #else
12162 {
12163 static char retval[] = ".word\t0xc_df";
12164 int regno = REGNO (operands[opno]);
12165
12166 gcc_assert (FP_REGNO_P (regno));
12167
12168 retval[9] = '0' + (regno - FIRST_STACK_REG);
12169 return retval;
12170 }
12171 #endif
12172
12173 return opno ? "fstp\t%y1" : "fstp\t%y0";
12174 }
12175
12176
12177 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
12178 should be used. UNORDERED_P is true when fucom should be used. */
12179
12180 const char *
12181 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
12182 {
12183 int stack_top_dies;
12184 rtx cmp_op0, cmp_op1;
12185 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
12186
12187 if (eflags_p)
12188 {
12189 cmp_op0 = operands[0];
12190 cmp_op1 = operands[1];
12191 }
12192 else
12193 {
12194 cmp_op0 = operands[1];
12195 cmp_op1 = operands[2];
12196 }
12197
12198 if (is_sse)
12199 {
12200 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
12201 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
12202 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
12203 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
12204
12205 if (GET_MODE (operands[0]) == SFmode)
12206 if (unordered_p)
12207 return &ucomiss[TARGET_AVX ? 0 : 1];
12208 else
12209 return &comiss[TARGET_AVX ? 0 : 1];
12210 else
12211 if (unordered_p)
12212 return &ucomisd[TARGET_AVX ? 0 : 1];
12213 else
12214 return &comisd[TARGET_AVX ? 0 : 1];
12215 }
12216
12217 gcc_assert (STACK_TOP_P (cmp_op0));
12218
12219 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12220
12221 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
12222 {
12223 if (stack_top_dies)
12224 {
12225 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
12226 return output_387_ffreep (operands, 1);
12227 }
12228 else
12229 return "ftst\n\tfnstsw\t%0";
12230 }
12231
12232 if (STACK_REG_P (cmp_op1)
12233 && stack_top_dies
12234 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
12235 && REGNO (cmp_op1) != FIRST_STACK_REG)
12236 {
12237 /* If both the top of the 387 stack dies, and the other operand
12238 is also a stack register that dies, then this must be a
12239 `fcompp' float compare */
12240
12241 if (eflags_p)
12242 {
12243 /* There is no double popping fcomi variant. Fortunately,
12244 eflags is immune from the fstp's cc clobbering. */
12245 if (unordered_p)
12246 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
12247 else
12248 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
12249 return output_387_ffreep (operands, 0);
12250 }
12251 else
12252 {
12253 if (unordered_p)
12254 return "fucompp\n\tfnstsw\t%0";
12255 else
12256 return "fcompp\n\tfnstsw\t%0";
12257 }
12258 }
12259 else
12260 {
12261 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
12262
12263 static const char * const alt[16] =
12264 {
12265 "fcom%Z2\t%y2\n\tfnstsw\t%0",
12266 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
12267 "fucom%Z2\t%y2\n\tfnstsw\t%0",
12268 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
12269
12270 "ficom%Z2\t%y2\n\tfnstsw\t%0",
12271 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
12272 NULL,
12273 NULL,
12274
12275 "fcomi\t{%y1, %0|%0, %y1}",
12276 "fcomip\t{%y1, %0|%0, %y1}",
12277 "fucomi\t{%y1, %0|%0, %y1}",
12278 "fucomip\t{%y1, %0|%0, %y1}",
12279
12280 NULL,
12281 NULL,
12282 NULL,
12283 NULL
12284 };
12285
12286 int mask;
12287 const char *ret;
12288
12289 mask = eflags_p << 3;
12290 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
12291 mask |= unordered_p << 1;
12292 mask |= stack_top_dies;
12293
12294 gcc_assert (mask < 16);
12295 ret = alt[mask];
12296 gcc_assert (ret);
12297
12298 return ret;
12299 }
12300 }
12301
12302 void
12303 ix86_output_addr_vec_elt (FILE *file, int value)
12304 {
12305 const char *directive = ASM_LONG;
12306
12307 #ifdef ASM_QUAD
12308 if (TARGET_64BIT)
12309 directive = ASM_QUAD;
12310 #else
12311 gcc_assert (!TARGET_64BIT);
12312 #endif
12313
12314 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
12315 }
12316
12317 void
12318 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
12319 {
12320 const char *directive = ASM_LONG;
12321
12322 #ifdef ASM_QUAD
12323 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
12324 directive = ASM_QUAD;
12325 #else
12326 gcc_assert (!TARGET_64BIT);
12327 #endif
12328 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
12329 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
12330 fprintf (file, "%s%s%d-%s%d\n",
12331 directive, LPREFIX, value, LPREFIX, rel);
12332 else if (HAVE_AS_GOTOFF_IN_DATA)
12333 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
12334 #if TARGET_MACHO
12335 else if (TARGET_MACHO)
12336 {
12337 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
12338 machopic_output_function_base_name (file);
12339 fprintf(file, "\n");
12340 }
12341 #endif
12342 else
12343 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
12344 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
12345 }
12346 \f
12347 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
12348 for the target. */
12349
12350 void
12351 ix86_expand_clear (rtx dest)
12352 {
12353 rtx tmp;
12354
12355 /* We play register width games, which are only valid after reload. */
12356 gcc_assert (reload_completed);
12357
12358 /* Avoid HImode and its attendant prefix byte. */
12359 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
12360 dest = gen_rtx_REG (SImode, REGNO (dest));
12361 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
12362
12363 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
12364 if (reload_completed && (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ()))
12365 {
12366 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12367 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
12368 }
12369
12370 emit_insn (tmp);
12371 }
12372
12373 /* X is an unchanging MEM. If it is a constant pool reference, return
12374 the constant pool rtx, else NULL. */
12375
12376 rtx
12377 maybe_get_pool_constant (rtx x)
12378 {
12379 x = ix86_delegitimize_address (XEXP (x, 0));
12380
12381 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
12382 return get_pool_constant (x);
12383
12384 return NULL_RTX;
12385 }
12386
12387 void
12388 ix86_expand_move (enum machine_mode mode, rtx operands[])
12389 {
12390 rtx op0, op1;
12391 enum tls_model model;
12392
12393 op0 = operands[0];
12394 op1 = operands[1];
12395
12396 if (GET_CODE (op1) == SYMBOL_REF)
12397 {
12398 model = SYMBOL_REF_TLS_MODEL (op1);
12399 if (model)
12400 {
12401 op1 = legitimize_tls_address (op1, model, true);
12402 op1 = force_operand (op1, op0);
12403 if (op1 == op0)
12404 return;
12405 }
12406 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12407 && SYMBOL_REF_DLLIMPORT_P (op1))
12408 op1 = legitimize_dllimport_symbol (op1, false);
12409 }
12410 else if (GET_CODE (op1) == CONST
12411 && GET_CODE (XEXP (op1, 0)) == PLUS
12412 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
12413 {
12414 rtx addend = XEXP (XEXP (op1, 0), 1);
12415 rtx symbol = XEXP (XEXP (op1, 0), 0);
12416 rtx tmp = NULL;
12417
12418 model = SYMBOL_REF_TLS_MODEL (symbol);
12419 if (model)
12420 tmp = legitimize_tls_address (symbol, model, true);
12421 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12422 && SYMBOL_REF_DLLIMPORT_P (symbol))
12423 tmp = legitimize_dllimport_symbol (symbol, true);
12424
12425 if (tmp)
12426 {
12427 tmp = force_operand (tmp, NULL);
12428 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
12429 op0, 1, OPTAB_DIRECT);
12430 if (tmp == op0)
12431 return;
12432 }
12433 }
12434
12435 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
12436 {
12437 if (TARGET_MACHO && !TARGET_64BIT)
12438 {
12439 #if TARGET_MACHO
12440 if (MACHOPIC_PURE)
12441 {
12442 rtx temp = ((reload_in_progress
12443 || ((op0 && REG_P (op0))
12444 && mode == Pmode))
12445 ? op0 : gen_reg_rtx (Pmode));
12446 op1 = machopic_indirect_data_reference (op1, temp);
12447 op1 = machopic_legitimize_pic_address (op1, mode,
12448 temp == op1 ? 0 : temp);
12449 }
12450 else if (MACHOPIC_INDIRECT)
12451 op1 = machopic_indirect_data_reference (op1, 0);
12452 if (op0 == op1)
12453 return;
12454 #endif
12455 }
12456 else
12457 {
12458 if (MEM_P (op0))
12459 op1 = force_reg (Pmode, op1);
12460 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
12461 {
12462 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
12463 op1 = legitimize_pic_address (op1, reg);
12464 if (op0 == op1)
12465 return;
12466 }
12467 }
12468 }
12469 else
12470 {
12471 if (MEM_P (op0)
12472 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
12473 || !push_operand (op0, mode))
12474 && MEM_P (op1))
12475 op1 = force_reg (mode, op1);
12476
12477 if (push_operand (op0, mode)
12478 && ! general_no_elim_operand (op1, mode))
12479 op1 = copy_to_mode_reg (mode, op1);
12480
12481 /* Force large constants in 64bit compilation into register
12482 to get them CSEed. */
12483 if (can_create_pseudo_p ()
12484 && (mode == DImode) && TARGET_64BIT
12485 && immediate_operand (op1, mode)
12486 && !x86_64_zext_immediate_operand (op1, VOIDmode)
12487 && !register_operand (op0, mode)
12488 && optimize)
12489 op1 = copy_to_mode_reg (mode, op1);
12490
12491 if (can_create_pseudo_p ()
12492 && FLOAT_MODE_P (mode)
12493 && GET_CODE (op1) == CONST_DOUBLE)
12494 {
12495 /* If we are loading a floating point constant to a register,
12496 force the value to memory now, since we'll get better code
12497 out the back end. */
12498
12499 op1 = validize_mem (force_const_mem (mode, op1));
12500 if (!register_operand (op0, mode))
12501 {
12502 rtx temp = gen_reg_rtx (mode);
12503 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
12504 emit_move_insn (op0, temp);
12505 return;
12506 }
12507 }
12508 }
12509
12510 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12511 }
12512
12513 void
12514 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
12515 {
12516 rtx op0 = operands[0], op1 = operands[1];
12517 unsigned int align = GET_MODE_ALIGNMENT (mode);
12518
12519 /* Force constants other than zero into memory. We do not know how
12520 the instructions used to build constants modify the upper 64 bits
12521 of the register, once we have that information we may be able
12522 to handle some of them more efficiently. */
12523 if (can_create_pseudo_p ()
12524 && register_operand (op0, mode)
12525 && (CONSTANT_P (op1)
12526 || (GET_CODE (op1) == SUBREG
12527 && CONSTANT_P (SUBREG_REG (op1))))
12528 && standard_sse_constant_p (op1) <= 0)
12529 op1 = validize_mem (force_const_mem (mode, op1));
12530
12531 /* We need to check memory alignment for SSE mode since attribute
12532 can make operands unaligned. */
12533 if (can_create_pseudo_p ()
12534 && SSE_REG_MODE_P (mode)
12535 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
12536 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
12537 {
12538 rtx tmp[2];
12539
12540 /* ix86_expand_vector_move_misalign() does not like constants ... */
12541 if (CONSTANT_P (op1)
12542 || (GET_CODE (op1) == SUBREG
12543 && CONSTANT_P (SUBREG_REG (op1))))
12544 op1 = validize_mem (force_const_mem (mode, op1));
12545
12546 /* ... nor both arguments in memory. */
12547 if (!register_operand (op0, mode)
12548 && !register_operand (op1, mode))
12549 op1 = force_reg (mode, op1);
12550
12551 tmp[0] = op0; tmp[1] = op1;
12552 ix86_expand_vector_move_misalign (mode, tmp);
12553 return;
12554 }
12555
12556 /* Make operand1 a register if it isn't already. */
12557 if (can_create_pseudo_p ()
12558 && !register_operand (op0, mode)
12559 && !register_operand (op1, mode))
12560 {
12561 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
12562 return;
12563 }
12564
12565 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12566 }
12567
12568 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
12569 straight to ix86_expand_vector_move. */
12570 /* Code generation for scalar reg-reg moves of single and double precision data:
12571 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
12572 movaps reg, reg
12573 else
12574 movss reg, reg
12575 if (x86_sse_partial_reg_dependency == true)
12576 movapd reg, reg
12577 else
12578 movsd reg, reg
12579
12580 Code generation for scalar loads of double precision data:
12581 if (x86_sse_split_regs == true)
12582 movlpd mem, reg (gas syntax)
12583 else
12584 movsd mem, reg
12585
12586 Code generation for unaligned packed loads of single precision data
12587 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
12588 if (x86_sse_unaligned_move_optimal)
12589 movups mem, reg
12590
12591 if (x86_sse_partial_reg_dependency == true)
12592 {
12593 xorps reg, reg
12594 movlps mem, reg
12595 movhps mem+8, reg
12596 }
12597 else
12598 {
12599 movlps mem, reg
12600 movhps mem+8, reg
12601 }
12602
12603 Code generation for unaligned packed loads of double precision data
12604 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
12605 if (x86_sse_unaligned_move_optimal)
12606 movupd mem, reg
12607
12608 if (x86_sse_split_regs == true)
12609 {
12610 movlpd mem, reg
12611 movhpd mem+8, reg
12612 }
12613 else
12614 {
12615 movsd mem, reg
12616 movhpd mem+8, reg
12617 }
12618 */
12619
12620 void
12621 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
12622 {
12623 rtx op0, op1, m;
12624
12625 op0 = operands[0];
12626 op1 = operands[1];
12627
12628 if (TARGET_AVX)
12629 {
12630 switch (GET_MODE_CLASS (mode))
12631 {
12632 case MODE_VECTOR_INT:
12633 case MODE_INT:
12634 switch (GET_MODE_SIZE (mode))
12635 {
12636 case 16:
12637 op0 = gen_lowpart (V16QImode, op0);
12638 op1 = gen_lowpart (V16QImode, op1);
12639 emit_insn (gen_avx_movdqu (op0, op1));
12640 break;
12641 case 32:
12642 op0 = gen_lowpart (V32QImode, op0);
12643 op1 = gen_lowpart (V32QImode, op1);
12644 emit_insn (gen_avx_movdqu256 (op0, op1));
12645 break;
12646 default:
12647 gcc_unreachable ();
12648 }
12649 break;
12650 case MODE_VECTOR_FLOAT:
12651 op0 = gen_lowpart (mode, op0);
12652 op1 = gen_lowpart (mode, op1);
12653
12654 switch (mode)
12655 {
12656 case V4SFmode:
12657 emit_insn (gen_avx_movups (op0, op1));
12658 break;
12659 case V8SFmode:
12660 emit_insn (gen_avx_movups256 (op0, op1));
12661 break;
12662 case V2DFmode:
12663 emit_insn (gen_avx_movupd (op0, op1));
12664 break;
12665 case V4DFmode:
12666 emit_insn (gen_avx_movupd256 (op0, op1));
12667 break;
12668 default:
12669 gcc_unreachable ();
12670 }
12671 break;
12672
12673 default:
12674 gcc_unreachable ();
12675 }
12676
12677 return;
12678 }
12679
12680 if (MEM_P (op1))
12681 {
12682 /* If we're optimizing for size, movups is the smallest. */
12683 if (optimize_insn_for_size_p ())
12684 {
12685 op0 = gen_lowpart (V4SFmode, op0);
12686 op1 = gen_lowpart (V4SFmode, op1);
12687 emit_insn (gen_sse_movups (op0, op1));
12688 return;
12689 }
12690
12691 /* ??? If we have typed data, then it would appear that using
12692 movdqu is the only way to get unaligned data loaded with
12693 integer type. */
12694 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12695 {
12696 op0 = gen_lowpart (V16QImode, op0);
12697 op1 = gen_lowpart (V16QImode, op1);
12698 emit_insn (gen_sse2_movdqu (op0, op1));
12699 return;
12700 }
12701
12702 if (TARGET_SSE2 && mode == V2DFmode)
12703 {
12704 rtx zero;
12705
12706 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12707 {
12708 op0 = gen_lowpart (V2DFmode, op0);
12709 op1 = gen_lowpart (V2DFmode, op1);
12710 emit_insn (gen_sse2_movupd (op0, op1));
12711 return;
12712 }
12713
12714 /* When SSE registers are split into halves, we can avoid
12715 writing to the top half twice. */
12716 if (TARGET_SSE_SPLIT_REGS)
12717 {
12718 emit_clobber (op0);
12719 zero = op0;
12720 }
12721 else
12722 {
12723 /* ??? Not sure about the best option for the Intel chips.
12724 The following would seem to satisfy; the register is
12725 entirely cleared, breaking the dependency chain. We
12726 then store to the upper half, with a dependency depth
12727 of one. A rumor has it that Intel recommends two movsd
12728 followed by an unpacklpd, but this is unconfirmed. And
12729 given that the dependency depth of the unpacklpd would
12730 still be one, I'm not sure why this would be better. */
12731 zero = CONST0_RTX (V2DFmode);
12732 }
12733
12734 m = adjust_address (op1, DFmode, 0);
12735 emit_insn (gen_sse2_loadlpd (op0, zero, m));
12736 m = adjust_address (op1, DFmode, 8);
12737 emit_insn (gen_sse2_loadhpd (op0, op0, m));
12738 }
12739 else
12740 {
12741 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12742 {
12743 op0 = gen_lowpart (V4SFmode, op0);
12744 op1 = gen_lowpart (V4SFmode, op1);
12745 emit_insn (gen_sse_movups (op0, op1));
12746 return;
12747 }
12748
12749 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
12750 emit_move_insn (op0, CONST0_RTX (mode));
12751 else
12752 emit_clobber (op0);
12753
12754 if (mode != V4SFmode)
12755 op0 = gen_lowpart (V4SFmode, op0);
12756 m = adjust_address (op1, V2SFmode, 0);
12757 emit_insn (gen_sse_loadlps (op0, op0, m));
12758 m = adjust_address (op1, V2SFmode, 8);
12759 emit_insn (gen_sse_loadhps (op0, op0, m));
12760 }
12761 }
12762 else if (MEM_P (op0))
12763 {
12764 /* If we're optimizing for size, movups is the smallest. */
12765 if (optimize_insn_for_size_p ())
12766 {
12767 op0 = gen_lowpart (V4SFmode, op0);
12768 op1 = gen_lowpart (V4SFmode, op1);
12769 emit_insn (gen_sse_movups (op0, op1));
12770 return;
12771 }
12772
12773 /* ??? Similar to above, only less clear because of quote
12774 typeless stores unquote. */
12775 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
12776 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12777 {
12778 op0 = gen_lowpart (V16QImode, op0);
12779 op1 = gen_lowpart (V16QImode, op1);
12780 emit_insn (gen_sse2_movdqu (op0, op1));
12781 return;
12782 }
12783
12784 if (TARGET_SSE2 && mode == V2DFmode)
12785 {
12786 m = adjust_address (op0, DFmode, 0);
12787 emit_insn (gen_sse2_storelpd (m, op1));
12788 m = adjust_address (op0, DFmode, 8);
12789 emit_insn (gen_sse2_storehpd (m, op1));
12790 }
12791 else
12792 {
12793 if (mode != V4SFmode)
12794 op1 = gen_lowpart (V4SFmode, op1);
12795 m = adjust_address (op0, V2SFmode, 0);
12796 emit_insn (gen_sse_storelps (m, op1));
12797 m = adjust_address (op0, V2SFmode, 8);
12798 emit_insn (gen_sse_storehps (m, op1));
12799 }
12800 }
12801 else
12802 gcc_unreachable ();
12803 }
12804
12805 /* Expand a push in MODE. This is some mode for which we do not support
12806 proper push instructions, at least from the registers that we expect
12807 the value to live in. */
12808
12809 void
12810 ix86_expand_push (enum machine_mode mode, rtx x)
12811 {
12812 rtx tmp;
12813
12814 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
12815 GEN_INT (-GET_MODE_SIZE (mode)),
12816 stack_pointer_rtx, 1, OPTAB_DIRECT);
12817 if (tmp != stack_pointer_rtx)
12818 emit_move_insn (stack_pointer_rtx, tmp);
12819
12820 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
12821
12822 /* When we push an operand onto stack, it has to be aligned at least
12823 at the function argument boundary. However since we don't have
12824 the argument type, we can't determine the actual argument
12825 boundary. */
12826 emit_move_insn (tmp, x);
12827 }
12828
12829 /* Helper function of ix86_fixup_binary_operands to canonicalize
12830 operand order. Returns true if the operands should be swapped. */
12831
12832 static bool
12833 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
12834 rtx operands[])
12835 {
12836 rtx dst = operands[0];
12837 rtx src1 = operands[1];
12838 rtx src2 = operands[2];
12839
12840 /* If the operation is not commutative, we can't do anything. */
12841 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
12842 return false;
12843
12844 /* Highest priority is that src1 should match dst. */
12845 if (rtx_equal_p (dst, src1))
12846 return false;
12847 if (rtx_equal_p (dst, src2))
12848 return true;
12849
12850 /* Next highest priority is that immediate constants come second. */
12851 if (immediate_operand (src2, mode))
12852 return false;
12853 if (immediate_operand (src1, mode))
12854 return true;
12855
12856 /* Lowest priority is that memory references should come second. */
12857 if (MEM_P (src2))
12858 return false;
12859 if (MEM_P (src1))
12860 return true;
12861
12862 return false;
12863 }
12864
12865
12866 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
12867 destination to use for the operation. If different from the true
12868 destination in operands[0], a copy operation will be required. */
12869
12870 rtx
12871 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
12872 rtx operands[])
12873 {
12874 rtx dst = operands[0];
12875 rtx src1 = operands[1];
12876 rtx src2 = operands[2];
12877
12878 /* Canonicalize operand order. */
12879 if (ix86_swap_binary_operands_p (code, mode, operands))
12880 {
12881 rtx temp;
12882
12883 /* It is invalid to swap operands of different modes. */
12884 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
12885
12886 temp = src1;
12887 src1 = src2;
12888 src2 = temp;
12889 }
12890
12891 /* Both source operands cannot be in memory. */
12892 if (MEM_P (src1) && MEM_P (src2))
12893 {
12894 /* Optimization: Only read from memory once. */
12895 if (rtx_equal_p (src1, src2))
12896 {
12897 src2 = force_reg (mode, src2);
12898 src1 = src2;
12899 }
12900 else
12901 src2 = force_reg (mode, src2);
12902 }
12903
12904 /* If the destination is memory, and we do not have matching source
12905 operands, do things in registers. */
12906 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
12907 dst = gen_reg_rtx (mode);
12908
12909 /* Source 1 cannot be a constant. */
12910 if (CONSTANT_P (src1))
12911 src1 = force_reg (mode, src1);
12912
12913 /* Source 1 cannot be a non-matching memory. */
12914 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
12915 src1 = force_reg (mode, src1);
12916
12917 operands[1] = src1;
12918 operands[2] = src2;
12919 return dst;
12920 }
12921
12922 /* Similarly, but assume that the destination has already been
12923 set up properly. */
12924
12925 void
12926 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
12927 enum machine_mode mode, rtx operands[])
12928 {
12929 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
12930 gcc_assert (dst == operands[0]);
12931 }
12932
12933 /* Attempt to expand a binary operator. Make the expansion closer to the
12934 actual machine, then just general_operand, which will allow 3 separate
12935 memory references (one output, two input) in a single insn. */
12936
12937 void
12938 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
12939 rtx operands[])
12940 {
12941 rtx src1, src2, dst, op, clob;
12942
12943 dst = ix86_fixup_binary_operands (code, mode, operands);
12944 src1 = operands[1];
12945 src2 = operands[2];
12946
12947 /* Emit the instruction. */
12948
12949 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
12950 if (reload_in_progress)
12951 {
12952 /* Reload doesn't know about the flags register, and doesn't know that
12953 it doesn't want to clobber it. We can only do this with PLUS. */
12954 gcc_assert (code == PLUS);
12955 emit_insn (op);
12956 }
12957 else
12958 {
12959 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12960 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
12961 }
12962
12963 /* Fix up the destination if needed. */
12964 if (dst != operands[0])
12965 emit_move_insn (operands[0], dst);
12966 }
12967
12968 /* Return TRUE or FALSE depending on whether the binary operator meets the
12969 appropriate constraints. */
12970
12971 int
12972 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
12973 rtx operands[3])
12974 {
12975 rtx dst = operands[0];
12976 rtx src1 = operands[1];
12977 rtx src2 = operands[2];
12978
12979 /* Both source operands cannot be in memory. */
12980 if (MEM_P (src1) && MEM_P (src2))
12981 return 0;
12982
12983 /* Canonicalize operand order for commutative operators. */
12984 if (ix86_swap_binary_operands_p (code, mode, operands))
12985 {
12986 rtx temp = src1;
12987 src1 = src2;
12988 src2 = temp;
12989 }
12990
12991 /* If the destination is memory, we must have a matching source operand. */
12992 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
12993 return 0;
12994
12995 /* Source 1 cannot be a constant. */
12996 if (CONSTANT_P (src1))
12997 return 0;
12998
12999 /* Source 1 cannot be a non-matching memory. */
13000 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
13001 return 0;
13002
13003 return 1;
13004 }
13005
13006 /* Attempt to expand a unary operator. Make the expansion closer to the
13007 actual machine, then just general_operand, which will allow 2 separate
13008 memory references (one output, one input) in a single insn. */
13009
13010 void
13011 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
13012 rtx operands[])
13013 {
13014 int matching_memory;
13015 rtx src, dst, op, clob;
13016
13017 dst = operands[0];
13018 src = operands[1];
13019
13020 /* If the destination is memory, and we do not have matching source
13021 operands, do things in registers. */
13022 matching_memory = 0;
13023 if (MEM_P (dst))
13024 {
13025 if (rtx_equal_p (dst, src))
13026 matching_memory = 1;
13027 else
13028 dst = gen_reg_rtx (mode);
13029 }
13030
13031 /* When source operand is memory, destination must match. */
13032 if (MEM_P (src) && !matching_memory)
13033 src = force_reg (mode, src);
13034
13035 /* Emit the instruction. */
13036
13037 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
13038 if (reload_in_progress || code == NOT)
13039 {
13040 /* Reload doesn't know about the flags register, and doesn't know that
13041 it doesn't want to clobber it. */
13042 gcc_assert (code == NOT);
13043 emit_insn (op);
13044 }
13045 else
13046 {
13047 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13048 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
13049 }
13050
13051 /* Fix up the destination if needed. */
13052 if (dst != operands[0])
13053 emit_move_insn (operands[0], dst);
13054 }
13055
13056 #define LEA_SEARCH_THRESHOLD 12
13057
13058 /* Search backward for non-agu definition of register number REGNO1
13059 or register number REGNO2 in INSN's basic block until
13060 1. Pass LEA_SEARCH_THRESHOLD instructions, or
13061 2. Reach BB boundary, or
13062 3. Reach agu definition.
13063 Returns the distance between the non-agu definition point and INSN.
13064 If no definition point, returns -1. */
13065
13066 static int
13067 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
13068 rtx insn)
13069 {
13070 basic_block bb = BLOCK_FOR_INSN (insn);
13071 int distance = 0;
13072 df_ref *def_rec;
13073 enum attr_type insn_type;
13074
13075 if (insn != BB_HEAD (bb))
13076 {
13077 rtx prev = PREV_INSN (insn);
13078 while (prev && distance < LEA_SEARCH_THRESHOLD)
13079 {
13080 if (INSN_P (prev))
13081 {
13082 distance++;
13083 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
13084 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13085 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13086 && (regno1 == DF_REF_REGNO (*def_rec)
13087 || regno2 == DF_REF_REGNO (*def_rec)))
13088 {
13089 insn_type = get_attr_type (prev);
13090 if (insn_type != TYPE_LEA)
13091 goto done;
13092 }
13093 }
13094 if (prev == BB_HEAD (bb))
13095 break;
13096 prev = PREV_INSN (prev);
13097 }
13098 }
13099
13100 if (distance < LEA_SEARCH_THRESHOLD)
13101 {
13102 edge e;
13103 edge_iterator ei;
13104 bool simple_loop = false;
13105
13106 FOR_EACH_EDGE (e, ei, bb->preds)
13107 if (e->src == bb)
13108 {
13109 simple_loop = true;
13110 break;
13111 }
13112
13113 if (simple_loop)
13114 {
13115 rtx prev = BB_END (bb);
13116 while (prev
13117 && prev != insn
13118 && distance < LEA_SEARCH_THRESHOLD)
13119 {
13120 if (INSN_P (prev))
13121 {
13122 distance++;
13123 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
13124 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13125 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13126 && (regno1 == DF_REF_REGNO (*def_rec)
13127 || regno2 == DF_REF_REGNO (*def_rec)))
13128 {
13129 insn_type = get_attr_type (prev);
13130 if (insn_type != TYPE_LEA)
13131 goto done;
13132 }
13133 }
13134 prev = PREV_INSN (prev);
13135 }
13136 }
13137 }
13138
13139 distance = -1;
13140
13141 done:
13142 /* get_attr_type may modify recog data. We want to make sure
13143 that recog data is valid for instruction INSN, on which
13144 distance_non_agu_define is called. INSN is unchanged here. */
13145 extract_insn_cached (insn);
13146 return distance;
13147 }
13148
13149 /* Return the distance between INSN and the next insn that uses
13150 register number REGNO0 in memory address. Return -1 if no such
13151 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
13152
13153 static int
13154 distance_agu_use (unsigned int regno0, rtx insn)
13155 {
13156 basic_block bb = BLOCK_FOR_INSN (insn);
13157 int distance = 0;
13158 df_ref *def_rec;
13159 df_ref *use_rec;
13160
13161 if (insn != BB_END (bb))
13162 {
13163 rtx next = NEXT_INSN (insn);
13164 while (next && distance < LEA_SEARCH_THRESHOLD)
13165 {
13166 if (INSN_P (next))
13167 {
13168 distance++;
13169
13170 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
13171 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
13172 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
13173 && regno0 == DF_REF_REGNO (*use_rec))
13174 {
13175 /* Return DISTANCE if OP0 is used in memory
13176 address in NEXT. */
13177 return distance;
13178 }
13179
13180 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
13181 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13182 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13183 && regno0 == DF_REF_REGNO (*def_rec))
13184 {
13185 /* Return -1 if OP0 is set in NEXT. */
13186 return -1;
13187 }
13188 }
13189 if (next == BB_END (bb))
13190 break;
13191 next = NEXT_INSN (next);
13192 }
13193 }
13194
13195 if (distance < LEA_SEARCH_THRESHOLD)
13196 {
13197 edge e;
13198 edge_iterator ei;
13199 bool simple_loop = false;
13200
13201 FOR_EACH_EDGE (e, ei, bb->succs)
13202 if (e->dest == bb)
13203 {
13204 simple_loop = true;
13205 break;
13206 }
13207
13208 if (simple_loop)
13209 {
13210 rtx next = BB_HEAD (bb);
13211 while (next
13212 && next != insn
13213 && distance < LEA_SEARCH_THRESHOLD)
13214 {
13215 if (INSN_P (next))
13216 {
13217 distance++;
13218
13219 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
13220 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
13221 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
13222 && regno0 == DF_REF_REGNO (*use_rec))
13223 {
13224 /* Return DISTANCE if OP0 is used in memory
13225 address in NEXT. */
13226 return distance;
13227 }
13228
13229 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
13230 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13231 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13232 && regno0 == DF_REF_REGNO (*def_rec))
13233 {
13234 /* Return -1 if OP0 is set in NEXT. */
13235 return -1;
13236 }
13237
13238 }
13239 next = NEXT_INSN (next);
13240 }
13241 }
13242 }
13243
13244 return -1;
13245 }
13246
13247 /* Define this macro to tune LEA priority vs ADD, it take effect when
13248 there is a dilemma of choicing LEA or ADD
13249 Negative value: ADD is more preferred than LEA
13250 Zero: Netrual
13251 Positive value: LEA is more preferred than ADD*/
13252 #define IX86_LEA_PRIORITY 2
13253
13254 /* Return true if it is ok to optimize an ADD operation to LEA
13255 operation to avoid flag register consumation. For the processors
13256 like ATOM, if the destination register of LEA holds an actual
13257 address which will be used soon, LEA is better and otherwise ADD
13258 is better. */
13259
13260 bool
13261 ix86_lea_for_add_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13262 rtx insn, rtx operands[])
13263 {
13264 unsigned int regno0 = true_regnum (operands[0]);
13265 unsigned int regno1 = true_regnum (operands[1]);
13266 unsigned int regno2;
13267
13268 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
13269 return regno0 != regno1;
13270
13271 regno2 = true_regnum (operands[2]);
13272
13273 /* If a = b + c, (a!=b && a!=c), must use lea form. */
13274 if (regno0 != regno1 && regno0 != regno2)
13275 return true;
13276 else
13277 {
13278 int dist_define, dist_use;
13279 dist_define = distance_non_agu_define (regno1, regno2, insn);
13280 if (dist_define <= 0)
13281 return true;
13282
13283 /* If this insn has both backward non-agu dependence and forward
13284 agu dependence, the one with short distance take effect. */
13285 dist_use = distance_agu_use (regno0, insn);
13286 if (dist_use <= 0
13287 || (dist_define + IX86_LEA_PRIORITY) < dist_use)
13288 return false;
13289
13290 return true;
13291 }
13292 }
13293
13294 /* Return true if destination reg of SET_BODY is shift count of
13295 USE_BODY. */
13296
13297 static bool
13298 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
13299 {
13300 rtx set_dest;
13301 rtx shift_rtx;
13302 int i;
13303
13304 /* Retrieve destination of SET_BODY. */
13305 switch (GET_CODE (set_body))
13306 {
13307 case SET:
13308 set_dest = SET_DEST (set_body);
13309 if (!set_dest || !REG_P (set_dest))
13310 return false;
13311 break;
13312 case PARALLEL:
13313 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
13314 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
13315 use_body))
13316 return true;
13317 default:
13318 return false;
13319 break;
13320 }
13321
13322 /* Retrieve shift count of USE_BODY. */
13323 switch (GET_CODE (use_body))
13324 {
13325 case SET:
13326 shift_rtx = XEXP (use_body, 1);
13327 break;
13328 case PARALLEL:
13329 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
13330 if (ix86_dep_by_shift_count_body (set_body,
13331 XVECEXP (use_body, 0, i)))
13332 return true;
13333 default:
13334 return false;
13335 break;
13336 }
13337
13338 if (shift_rtx
13339 && (GET_CODE (shift_rtx) == ASHIFT
13340 || GET_CODE (shift_rtx) == LSHIFTRT
13341 || GET_CODE (shift_rtx) == ASHIFTRT
13342 || GET_CODE (shift_rtx) == ROTATE
13343 || GET_CODE (shift_rtx) == ROTATERT))
13344 {
13345 rtx shift_count = XEXP (shift_rtx, 1);
13346
13347 /* Return true if shift count is dest of SET_BODY. */
13348 if (REG_P (shift_count)
13349 && true_regnum (set_dest) == true_regnum (shift_count))
13350 return true;
13351 }
13352
13353 return false;
13354 }
13355
13356 /* Return true if destination reg of SET_INSN is shift count of
13357 USE_INSN. */
13358
13359 bool
13360 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
13361 {
13362 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
13363 PATTERN (use_insn));
13364 }
13365
13366 /* Return TRUE or FALSE depending on whether the unary operator meets the
13367 appropriate constraints. */
13368
13369 int
13370 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13371 enum machine_mode mode ATTRIBUTE_UNUSED,
13372 rtx operands[2] ATTRIBUTE_UNUSED)
13373 {
13374 /* If one of operands is memory, source and destination must match. */
13375 if ((MEM_P (operands[0])
13376 || MEM_P (operands[1]))
13377 && ! rtx_equal_p (operands[0], operands[1]))
13378 return FALSE;
13379 return TRUE;
13380 }
13381
13382 /* Post-reload splitter for converting an SF or DFmode value in an
13383 SSE register into an unsigned SImode. */
13384
13385 void
13386 ix86_split_convert_uns_si_sse (rtx operands[])
13387 {
13388 enum machine_mode vecmode;
13389 rtx value, large, zero_or_two31, input, two31, x;
13390
13391 large = operands[1];
13392 zero_or_two31 = operands[2];
13393 input = operands[3];
13394 two31 = operands[4];
13395 vecmode = GET_MODE (large);
13396 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
13397
13398 /* Load up the value into the low element. We must ensure that the other
13399 elements are valid floats -- zero is the easiest such value. */
13400 if (MEM_P (input))
13401 {
13402 if (vecmode == V4SFmode)
13403 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
13404 else
13405 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
13406 }
13407 else
13408 {
13409 input = gen_rtx_REG (vecmode, REGNO (input));
13410 emit_move_insn (value, CONST0_RTX (vecmode));
13411 if (vecmode == V4SFmode)
13412 emit_insn (gen_sse_movss (value, value, input));
13413 else
13414 emit_insn (gen_sse2_movsd (value, value, input));
13415 }
13416
13417 emit_move_insn (large, two31);
13418 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
13419
13420 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
13421 emit_insn (gen_rtx_SET (VOIDmode, large, x));
13422
13423 x = gen_rtx_AND (vecmode, zero_or_two31, large);
13424 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
13425
13426 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
13427 emit_insn (gen_rtx_SET (VOIDmode, value, x));
13428
13429 large = gen_rtx_REG (V4SImode, REGNO (large));
13430 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
13431
13432 x = gen_rtx_REG (V4SImode, REGNO (value));
13433 if (vecmode == V4SFmode)
13434 emit_insn (gen_sse2_cvttps2dq (x, value));
13435 else
13436 emit_insn (gen_sse2_cvttpd2dq (x, value));
13437 value = x;
13438
13439 emit_insn (gen_xorv4si3 (value, value, large));
13440 }
13441
13442 /* Convert an unsigned DImode value into a DFmode, using only SSE.
13443 Expects the 64-bit DImode to be supplied in a pair of integral
13444 registers. Requires SSE2; will use SSE3 if available. For x86_32,
13445 -mfpmath=sse, !optimize_size only. */
13446
13447 void
13448 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
13449 {
13450 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
13451 rtx int_xmm, fp_xmm;
13452 rtx biases, exponents;
13453 rtx x;
13454
13455 int_xmm = gen_reg_rtx (V4SImode);
13456 if (TARGET_INTER_UNIT_MOVES)
13457 emit_insn (gen_movdi_to_sse (int_xmm, input));
13458 else if (TARGET_SSE_SPLIT_REGS)
13459 {
13460 emit_clobber (int_xmm);
13461 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
13462 }
13463 else
13464 {
13465 x = gen_reg_rtx (V2DImode);
13466 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
13467 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
13468 }
13469
13470 x = gen_rtx_CONST_VECTOR (V4SImode,
13471 gen_rtvec (4, GEN_INT (0x43300000UL),
13472 GEN_INT (0x45300000UL),
13473 const0_rtx, const0_rtx));
13474 exponents = validize_mem (force_const_mem (V4SImode, x));
13475
13476 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
13477 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
13478
13479 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
13480 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
13481 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
13482 (0x1.0p84 + double(fp_value_hi_xmm)).
13483 Note these exponents differ by 32. */
13484
13485 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
13486
13487 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
13488 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
13489 real_ldexp (&bias_lo_rvt, &dconst1, 52);
13490 real_ldexp (&bias_hi_rvt, &dconst1, 84);
13491 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
13492 x = const_double_from_real_value (bias_hi_rvt, DFmode);
13493 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
13494 biases = validize_mem (force_const_mem (V2DFmode, biases));
13495 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
13496
13497 /* Add the upper and lower DFmode values together. */
13498 if (TARGET_SSE3)
13499 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
13500 else
13501 {
13502 x = copy_to_mode_reg (V2DFmode, fp_xmm);
13503 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
13504 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
13505 }
13506
13507 ix86_expand_vector_extract (false, target, fp_xmm, 0);
13508 }
13509
13510 /* Not used, but eases macroization of patterns. */
13511 void
13512 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
13513 rtx input ATTRIBUTE_UNUSED)
13514 {
13515 gcc_unreachable ();
13516 }
13517
13518 /* Convert an unsigned SImode value into a DFmode. Only currently used
13519 for SSE, but applicable anywhere. */
13520
13521 void
13522 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
13523 {
13524 REAL_VALUE_TYPE TWO31r;
13525 rtx x, fp;
13526
13527 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
13528 NULL, 1, OPTAB_DIRECT);
13529
13530 fp = gen_reg_rtx (DFmode);
13531 emit_insn (gen_floatsidf2 (fp, x));
13532
13533 real_ldexp (&TWO31r, &dconst1, 31);
13534 x = const_double_from_real_value (TWO31r, DFmode);
13535
13536 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
13537 if (x != target)
13538 emit_move_insn (target, x);
13539 }
13540
13541 /* Convert a signed DImode value into a DFmode. Only used for SSE in
13542 32-bit mode; otherwise we have a direct convert instruction. */
13543
13544 void
13545 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
13546 {
13547 REAL_VALUE_TYPE TWO32r;
13548 rtx fp_lo, fp_hi, x;
13549
13550 fp_lo = gen_reg_rtx (DFmode);
13551 fp_hi = gen_reg_rtx (DFmode);
13552
13553 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
13554
13555 real_ldexp (&TWO32r, &dconst1, 32);
13556 x = const_double_from_real_value (TWO32r, DFmode);
13557 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
13558
13559 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
13560
13561 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
13562 0, OPTAB_DIRECT);
13563 if (x != target)
13564 emit_move_insn (target, x);
13565 }
13566
13567 /* Convert an unsigned SImode value into a SFmode, using only SSE.
13568 For x86_32, -mfpmath=sse, !optimize_size only. */
13569 void
13570 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
13571 {
13572 REAL_VALUE_TYPE ONE16r;
13573 rtx fp_hi, fp_lo, int_hi, int_lo, x;
13574
13575 real_ldexp (&ONE16r, &dconst1, 16);
13576 x = const_double_from_real_value (ONE16r, SFmode);
13577 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
13578 NULL, 0, OPTAB_DIRECT);
13579 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
13580 NULL, 0, OPTAB_DIRECT);
13581 fp_hi = gen_reg_rtx (SFmode);
13582 fp_lo = gen_reg_rtx (SFmode);
13583 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
13584 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
13585 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
13586 0, OPTAB_DIRECT);
13587 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
13588 0, OPTAB_DIRECT);
13589 if (!rtx_equal_p (target, fp_hi))
13590 emit_move_insn (target, fp_hi);
13591 }
13592
13593 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
13594 then replicate the value for all elements of the vector
13595 register. */
13596
13597 rtx
13598 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
13599 {
13600 rtvec v;
13601 switch (mode)
13602 {
13603 case SImode:
13604 gcc_assert (vect);
13605 v = gen_rtvec (4, value, value, value, value);
13606 return gen_rtx_CONST_VECTOR (V4SImode, v);
13607
13608 case DImode:
13609 gcc_assert (vect);
13610 v = gen_rtvec (2, value, value);
13611 return gen_rtx_CONST_VECTOR (V2DImode, v);
13612
13613 case SFmode:
13614 if (vect)
13615 v = gen_rtvec (4, value, value, value, value);
13616 else
13617 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
13618 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
13619 return gen_rtx_CONST_VECTOR (V4SFmode, v);
13620
13621 case DFmode:
13622 if (vect)
13623 v = gen_rtvec (2, value, value);
13624 else
13625 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
13626 return gen_rtx_CONST_VECTOR (V2DFmode, v);
13627
13628 default:
13629 gcc_unreachable ();
13630 }
13631 }
13632
13633 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
13634 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
13635 for an SSE register. If VECT is true, then replicate the mask for
13636 all elements of the vector register. If INVERT is true, then create
13637 a mask excluding the sign bit. */
13638
13639 rtx
13640 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
13641 {
13642 enum machine_mode vec_mode, imode;
13643 HOST_WIDE_INT hi, lo;
13644 int shift = 63;
13645 rtx v;
13646 rtx mask;
13647
13648 /* Find the sign bit, sign extended to 2*HWI. */
13649 switch (mode)
13650 {
13651 case SImode:
13652 case SFmode:
13653 imode = SImode;
13654 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
13655 lo = 0x80000000, hi = lo < 0;
13656 break;
13657
13658 case DImode:
13659 case DFmode:
13660 imode = DImode;
13661 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
13662 if (HOST_BITS_PER_WIDE_INT >= 64)
13663 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
13664 else
13665 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13666 break;
13667
13668 case TImode:
13669 case TFmode:
13670 vec_mode = VOIDmode;
13671 if (HOST_BITS_PER_WIDE_INT >= 64)
13672 {
13673 imode = TImode;
13674 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
13675 }
13676 else
13677 {
13678 rtvec vec;
13679
13680 imode = DImode;
13681 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13682
13683 if (invert)
13684 {
13685 lo = ~lo, hi = ~hi;
13686 v = constm1_rtx;
13687 }
13688 else
13689 v = const0_rtx;
13690
13691 mask = immed_double_const (lo, hi, imode);
13692
13693 vec = gen_rtvec (2, v, mask);
13694 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
13695 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
13696
13697 return v;
13698 }
13699 break;
13700
13701 default:
13702 gcc_unreachable ();
13703 }
13704
13705 if (invert)
13706 lo = ~lo, hi = ~hi;
13707
13708 /* Force this value into the low part of a fp vector constant. */
13709 mask = immed_double_const (lo, hi, imode);
13710 mask = gen_lowpart (mode, mask);
13711
13712 if (vec_mode == VOIDmode)
13713 return force_reg (mode, mask);
13714
13715 v = ix86_build_const_vector (mode, vect, mask);
13716 return force_reg (vec_mode, v);
13717 }
13718
13719 /* Generate code for floating point ABS or NEG. */
13720
13721 void
13722 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
13723 rtx operands[])
13724 {
13725 rtx mask, set, use, clob, dst, src;
13726 bool use_sse = false;
13727 bool vector_mode = VECTOR_MODE_P (mode);
13728 enum machine_mode elt_mode = mode;
13729
13730 if (vector_mode)
13731 {
13732 elt_mode = GET_MODE_INNER (mode);
13733 use_sse = true;
13734 }
13735 else if (mode == TFmode)
13736 use_sse = true;
13737 else if (TARGET_SSE_MATH)
13738 use_sse = SSE_FLOAT_MODE_P (mode);
13739
13740 /* NEG and ABS performed with SSE use bitwise mask operations.
13741 Create the appropriate mask now. */
13742 if (use_sse)
13743 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
13744 else
13745 mask = NULL_RTX;
13746
13747 dst = operands[0];
13748 src = operands[1];
13749
13750 if (vector_mode)
13751 {
13752 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
13753 set = gen_rtx_SET (VOIDmode, dst, set);
13754 emit_insn (set);
13755 }
13756 else
13757 {
13758 set = gen_rtx_fmt_e (code, mode, src);
13759 set = gen_rtx_SET (VOIDmode, dst, set);
13760 if (mask)
13761 {
13762 use = gen_rtx_USE (VOIDmode, mask);
13763 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13764 emit_insn (gen_rtx_PARALLEL (VOIDmode,
13765 gen_rtvec (3, set, use, clob)));
13766 }
13767 else
13768 emit_insn (set);
13769 }
13770 }
13771
13772 /* Expand a copysign operation. Special case operand 0 being a constant. */
13773
13774 void
13775 ix86_expand_copysign (rtx operands[])
13776 {
13777 enum machine_mode mode;
13778 rtx dest, op0, op1, mask, nmask;
13779
13780 dest = operands[0];
13781 op0 = operands[1];
13782 op1 = operands[2];
13783
13784 mode = GET_MODE (dest);
13785
13786 if (GET_CODE (op0) == CONST_DOUBLE)
13787 {
13788 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
13789
13790 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
13791 op0 = simplify_unary_operation (ABS, mode, op0, mode);
13792
13793 if (mode == SFmode || mode == DFmode)
13794 {
13795 enum machine_mode vmode;
13796
13797 vmode = mode == SFmode ? V4SFmode : V2DFmode;
13798
13799 if (op0 == CONST0_RTX (mode))
13800 op0 = CONST0_RTX (vmode);
13801 else
13802 {
13803 rtvec v;
13804
13805 if (mode == SFmode)
13806 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
13807 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
13808 else
13809 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
13810
13811 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
13812 }
13813 }
13814 else if (op0 != CONST0_RTX (mode))
13815 op0 = force_reg (mode, op0);
13816
13817 mask = ix86_build_signbit_mask (mode, 0, 0);
13818
13819 if (mode == SFmode)
13820 copysign_insn = gen_copysignsf3_const;
13821 else if (mode == DFmode)
13822 copysign_insn = gen_copysigndf3_const;
13823 else
13824 copysign_insn = gen_copysigntf3_const;
13825
13826 emit_insn (copysign_insn (dest, op0, op1, mask));
13827 }
13828 else
13829 {
13830 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
13831
13832 nmask = ix86_build_signbit_mask (mode, 0, 1);
13833 mask = ix86_build_signbit_mask (mode, 0, 0);
13834
13835 if (mode == SFmode)
13836 copysign_insn = gen_copysignsf3_var;
13837 else if (mode == DFmode)
13838 copysign_insn = gen_copysigndf3_var;
13839 else
13840 copysign_insn = gen_copysigntf3_var;
13841
13842 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
13843 }
13844 }
13845
13846 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
13847 be a constant, and so has already been expanded into a vector constant. */
13848
13849 void
13850 ix86_split_copysign_const (rtx operands[])
13851 {
13852 enum machine_mode mode, vmode;
13853 rtx dest, op0, op1, mask, x;
13854
13855 dest = operands[0];
13856 op0 = operands[1];
13857 op1 = operands[2];
13858 mask = operands[3];
13859
13860 mode = GET_MODE (dest);
13861 vmode = GET_MODE (mask);
13862
13863 dest = simplify_gen_subreg (vmode, dest, mode, 0);
13864 x = gen_rtx_AND (vmode, dest, mask);
13865 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13866
13867 if (op0 != CONST0_RTX (vmode))
13868 {
13869 x = gen_rtx_IOR (vmode, dest, op0);
13870 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13871 }
13872 }
13873
13874 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
13875 so we have to do two masks. */
13876
13877 void
13878 ix86_split_copysign_var (rtx operands[])
13879 {
13880 enum machine_mode mode, vmode;
13881 rtx dest, scratch, op0, op1, mask, nmask, x;
13882
13883 dest = operands[0];
13884 scratch = operands[1];
13885 op0 = operands[2];
13886 op1 = operands[3];
13887 nmask = operands[4];
13888 mask = operands[5];
13889
13890 mode = GET_MODE (dest);
13891 vmode = GET_MODE (mask);
13892
13893 if (rtx_equal_p (op0, op1))
13894 {
13895 /* Shouldn't happen often (it's useless, obviously), but when it does
13896 we'd generate incorrect code if we continue below. */
13897 emit_move_insn (dest, op0);
13898 return;
13899 }
13900
13901 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
13902 {
13903 gcc_assert (REGNO (op1) == REGNO (scratch));
13904
13905 x = gen_rtx_AND (vmode, scratch, mask);
13906 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13907
13908 dest = mask;
13909 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13910 x = gen_rtx_NOT (vmode, dest);
13911 x = gen_rtx_AND (vmode, x, op0);
13912 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13913 }
13914 else
13915 {
13916 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
13917 {
13918 x = gen_rtx_AND (vmode, scratch, mask);
13919 }
13920 else /* alternative 2,4 */
13921 {
13922 gcc_assert (REGNO (mask) == REGNO (scratch));
13923 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
13924 x = gen_rtx_AND (vmode, scratch, op1);
13925 }
13926 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13927
13928 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
13929 {
13930 dest = simplify_gen_subreg (vmode, op0, mode, 0);
13931 x = gen_rtx_AND (vmode, dest, nmask);
13932 }
13933 else /* alternative 3,4 */
13934 {
13935 gcc_assert (REGNO (nmask) == REGNO (dest));
13936 dest = nmask;
13937 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13938 x = gen_rtx_AND (vmode, dest, op0);
13939 }
13940 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13941 }
13942
13943 x = gen_rtx_IOR (vmode, dest, scratch);
13944 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13945 }
13946
13947 /* Return TRUE or FALSE depending on whether the first SET in INSN
13948 has source and destination with matching CC modes, and that the
13949 CC mode is at least as constrained as REQ_MODE. */
13950
13951 int
13952 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
13953 {
13954 rtx set;
13955 enum machine_mode set_mode;
13956
13957 set = PATTERN (insn);
13958 if (GET_CODE (set) == PARALLEL)
13959 set = XVECEXP (set, 0, 0);
13960 gcc_assert (GET_CODE (set) == SET);
13961 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
13962
13963 set_mode = GET_MODE (SET_DEST (set));
13964 switch (set_mode)
13965 {
13966 case CCNOmode:
13967 if (req_mode != CCNOmode
13968 && (req_mode != CCmode
13969 || XEXP (SET_SRC (set), 1) != const0_rtx))
13970 return 0;
13971 break;
13972 case CCmode:
13973 if (req_mode == CCGCmode)
13974 return 0;
13975 /* FALLTHRU */
13976 case CCGCmode:
13977 if (req_mode == CCGOCmode || req_mode == CCNOmode)
13978 return 0;
13979 /* FALLTHRU */
13980 case CCGOCmode:
13981 if (req_mode == CCZmode)
13982 return 0;
13983 /* FALLTHRU */
13984 case CCAmode:
13985 case CCCmode:
13986 case CCOmode:
13987 case CCSmode:
13988 case CCZmode:
13989 break;
13990
13991 default:
13992 gcc_unreachable ();
13993 }
13994
13995 return (GET_MODE (SET_SRC (set)) == set_mode);
13996 }
13997
13998 /* Generate insn patterns to do an integer compare of OPERANDS. */
13999
14000 static rtx
14001 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
14002 {
14003 enum machine_mode cmpmode;
14004 rtx tmp, flags;
14005
14006 cmpmode = SELECT_CC_MODE (code, op0, op1);
14007 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
14008
14009 /* This is very simple, but making the interface the same as in the
14010 FP case makes the rest of the code easier. */
14011 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
14012 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
14013
14014 /* Return the test that should be put into the flags user, i.e.
14015 the bcc, scc, or cmov instruction. */
14016 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
14017 }
14018
14019 /* Figure out whether to use ordered or unordered fp comparisons.
14020 Return the appropriate mode to use. */
14021
14022 enum machine_mode
14023 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
14024 {
14025 /* ??? In order to make all comparisons reversible, we do all comparisons
14026 non-trapping when compiling for IEEE. Once gcc is able to distinguish
14027 all forms trapping and nontrapping comparisons, we can make inequality
14028 comparisons trapping again, since it results in better code when using
14029 FCOM based compares. */
14030 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
14031 }
14032
14033 enum machine_mode
14034 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
14035 {
14036 enum machine_mode mode = GET_MODE (op0);
14037
14038 if (SCALAR_FLOAT_MODE_P (mode))
14039 {
14040 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
14041 return ix86_fp_compare_mode (code);
14042 }
14043
14044 switch (code)
14045 {
14046 /* Only zero flag is needed. */
14047 case EQ: /* ZF=0 */
14048 case NE: /* ZF!=0 */
14049 return CCZmode;
14050 /* Codes needing carry flag. */
14051 case GEU: /* CF=0 */
14052 case LTU: /* CF=1 */
14053 /* Detect overflow checks. They need just the carry flag. */
14054 if (GET_CODE (op0) == PLUS
14055 && rtx_equal_p (op1, XEXP (op0, 0)))
14056 return CCCmode;
14057 else
14058 return CCmode;
14059 case GTU: /* CF=0 & ZF=0 */
14060 case LEU: /* CF=1 | ZF=1 */
14061 /* Detect overflow checks. They need just the carry flag. */
14062 if (GET_CODE (op0) == MINUS
14063 && rtx_equal_p (op1, XEXP (op0, 0)))
14064 return CCCmode;
14065 else
14066 return CCmode;
14067 /* Codes possibly doable only with sign flag when
14068 comparing against zero. */
14069 case GE: /* SF=OF or SF=0 */
14070 case LT: /* SF<>OF or SF=1 */
14071 if (op1 == const0_rtx)
14072 return CCGOCmode;
14073 else
14074 /* For other cases Carry flag is not required. */
14075 return CCGCmode;
14076 /* Codes doable only with sign flag when comparing
14077 against zero, but we miss jump instruction for it
14078 so we need to use relational tests against overflow
14079 that thus needs to be zero. */
14080 case GT: /* ZF=0 & SF=OF */
14081 case LE: /* ZF=1 | SF<>OF */
14082 if (op1 == const0_rtx)
14083 return CCNOmode;
14084 else
14085 return CCGCmode;
14086 /* strcmp pattern do (use flags) and combine may ask us for proper
14087 mode. */
14088 case USE:
14089 return CCmode;
14090 default:
14091 gcc_unreachable ();
14092 }
14093 }
14094
14095 /* Return the fixed registers used for condition codes. */
14096
14097 static bool
14098 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
14099 {
14100 *p1 = FLAGS_REG;
14101 *p2 = FPSR_REG;
14102 return true;
14103 }
14104
14105 /* If two condition code modes are compatible, return a condition code
14106 mode which is compatible with both. Otherwise, return
14107 VOIDmode. */
14108
14109 static enum machine_mode
14110 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
14111 {
14112 if (m1 == m2)
14113 return m1;
14114
14115 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
14116 return VOIDmode;
14117
14118 if ((m1 == CCGCmode && m2 == CCGOCmode)
14119 || (m1 == CCGOCmode && m2 == CCGCmode))
14120 return CCGCmode;
14121
14122 switch (m1)
14123 {
14124 default:
14125 gcc_unreachable ();
14126
14127 case CCmode:
14128 case CCGCmode:
14129 case CCGOCmode:
14130 case CCNOmode:
14131 case CCAmode:
14132 case CCCmode:
14133 case CCOmode:
14134 case CCSmode:
14135 case CCZmode:
14136 switch (m2)
14137 {
14138 default:
14139 return VOIDmode;
14140
14141 case CCmode:
14142 case CCGCmode:
14143 case CCGOCmode:
14144 case CCNOmode:
14145 case CCAmode:
14146 case CCCmode:
14147 case CCOmode:
14148 case CCSmode:
14149 case CCZmode:
14150 return CCmode;
14151 }
14152
14153 case CCFPmode:
14154 case CCFPUmode:
14155 /* These are only compatible with themselves, which we already
14156 checked above. */
14157 return VOIDmode;
14158 }
14159 }
14160
14161 /* Split comparison code CODE into comparisons we can do using branch
14162 instructions. BYPASS_CODE is comparison code for branch that will
14163 branch around FIRST_CODE and SECOND_CODE. If some of branches
14164 is not required, set value to UNKNOWN.
14165 We never require more than two branches. */
14166
14167 void
14168 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
14169 enum rtx_code *first_code,
14170 enum rtx_code *second_code)
14171 {
14172 *first_code = code;
14173 *bypass_code = UNKNOWN;
14174 *second_code = UNKNOWN;
14175
14176 /* The fcomi comparison sets flags as follows:
14177
14178 cmp ZF PF CF
14179 > 0 0 0
14180 < 0 0 1
14181 = 1 0 0
14182 un 1 1 1 */
14183
14184 switch (code)
14185 {
14186 case GT: /* GTU - CF=0 & ZF=0 */
14187 case GE: /* GEU - CF=0 */
14188 case ORDERED: /* PF=0 */
14189 case UNORDERED: /* PF=1 */
14190 case UNEQ: /* EQ - ZF=1 */
14191 case UNLT: /* LTU - CF=1 */
14192 case UNLE: /* LEU - CF=1 | ZF=1 */
14193 case LTGT: /* EQ - ZF=0 */
14194 break;
14195 case LT: /* LTU - CF=1 - fails on unordered */
14196 *first_code = UNLT;
14197 *bypass_code = UNORDERED;
14198 break;
14199 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
14200 *first_code = UNLE;
14201 *bypass_code = UNORDERED;
14202 break;
14203 case EQ: /* EQ - ZF=1 - fails on unordered */
14204 *first_code = UNEQ;
14205 *bypass_code = UNORDERED;
14206 break;
14207 case NE: /* NE - ZF=0 - fails on unordered */
14208 *first_code = LTGT;
14209 *second_code = UNORDERED;
14210 break;
14211 case UNGE: /* GEU - CF=0 - fails on unordered */
14212 *first_code = GE;
14213 *second_code = UNORDERED;
14214 break;
14215 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
14216 *first_code = GT;
14217 *second_code = UNORDERED;
14218 break;
14219 default:
14220 gcc_unreachable ();
14221 }
14222 if (!TARGET_IEEE_FP)
14223 {
14224 *second_code = UNKNOWN;
14225 *bypass_code = UNKNOWN;
14226 }
14227 }
14228
14229 /* Return cost of comparison done fcom + arithmetics operations on AX.
14230 All following functions do use number of instructions as a cost metrics.
14231 In future this should be tweaked to compute bytes for optimize_size and
14232 take into account performance of various instructions on various CPUs. */
14233 static int
14234 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
14235 {
14236 if (!TARGET_IEEE_FP)
14237 return 4;
14238 /* The cost of code output by ix86_expand_fp_compare. */
14239 switch (code)
14240 {
14241 case UNLE:
14242 case UNLT:
14243 case LTGT:
14244 case GT:
14245 case GE:
14246 case UNORDERED:
14247 case ORDERED:
14248 case UNEQ:
14249 return 4;
14250 break;
14251 case LT:
14252 case NE:
14253 case EQ:
14254 case UNGE:
14255 return 5;
14256 break;
14257 case LE:
14258 case UNGT:
14259 return 6;
14260 break;
14261 default:
14262 gcc_unreachable ();
14263 }
14264 }
14265
14266 /* Return cost of comparison done using fcomi operation.
14267 See ix86_fp_comparison_arithmetics_cost for the metrics. */
14268 static int
14269 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
14270 {
14271 enum rtx_code bypass_code, first_code, second_code;
14272 /* Return arbitrarily high cost when instruction is not supported - this
14273 prevents gcc from using it. */
14274 if (!TARGET_CMOVE)
14275 return 1024;
14276 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14277 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
14278 }
14279
14280 /* Return cost of comparison done using sahf operation.
14281 See ix86_fp_comparison_arithmetics_cost for the metrics. */
14282 static int
14283 ix86_fp_comparison_sahf_cost (enum rtx_code code)
14284 {
14285 enum rtx_code bypass_code, first_code, second_code;
14286 /* Return arbitrarily high cost when instruction is not preferred - this
14287 avoids gcc from using it. */
14288 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())))
14289 return 1024;
14290 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14291 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
14292 }
14293
14294 /* Compute cost of the comparison done using any method.
14295 See ix86_fp_comparison_arithmetics_cost for the metrics. */
14296 static int
14297 ix86_fp_comparison_cost (enum rtx_code code)
14298 {
14299 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
14300 int min;
14301
14302 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
14303 sahf_cost = ix86_fp_comparison_sahf_cost (code);
14304
14305 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
14306 if (min > sahf_cost)
14307 min = sahf_cost;
14308 if (min > fcomi_cost)
14309 min = fcomi_cost;
14310 return min;
14311 }
14312
14313 /* Return true if we should use an FCOMI instruction for this
14314 fp comparison. */
14315
14316 int
14317 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
14318 {
14319 enum rtx_code swapped_code = swap_condition (code);
14320
14321 return ((ix86_fp_comparison_cost (code)
14322 == ix86_fp_comparison_fcomi_cost (code))
14323 || (ix86_fp_comparison_cost (swapped_code)
14324 == ix86_fp_comparison_fcomi_cost (swapped_code)));
14325 }
14326
14327 /* Swap, force into registers, or otherwise massage the two operands
14328 to a fp comparison. The operands are updated in place; the new
14329 comparison code is returned. */
14330
14331 static enum rtx_code
14332 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
14333 {
14334 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
14335 rtx op0 = *pop0, op1 = *pop1;
14336 enum machine_mode op_mode = GET_MODE (op0);
14337 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
14338
14339 /* All of the unordered compare instructions only work on registers.
14340 The same is true of the fcomi compare instructions. The XFmode
14341 compare instructions require registers except when comparing
14342 against zero or when converting operand 1 from fixed point to
14343 floating point. */
14344
14345 if (!is_sse
14346 && (fpcmp_mode == CCFPUmode
14347 || (op_mode == XFmode
14348 && ! (standard_80387_constant_p (op0) == 1
14349 || standard_80387_constant_p (op1) == 1)
14350 && GET_CODE (op1) != FLOAT)
14351 || ix86_use_fcomi_compare (code)))
14352 {
14353 op0 = force_reg (op_mode, op0);
14354 op1 = force_reg (op_mode, op1);
14355 }
14356 else
14357 {
14358 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
14359 things around if they appear profitable, otherwise force op0
14360 into a register. */
14361
14362 if (standard_80387_constant_p (op0) == 0
14363 || (MEM_P (op0)
14364 && ! (standard_80387_constant_p (op1) == 0
14365 || MEM_P (op1))))
14366 {
14367 rtx tmp;
14368 tmp = op0, op0 = op1, op1 = tmp;
14369 code = swap_condition (code);
14370 }
14371
14372 if (!REG_P (op0))
14373 op0 = force_reg (op_mode, op0);
14374
14375 if (CONSTANT_P (op1))
14376 {
14377 int tmp = standard_80387_constant_p (op1);
14378 if (tmp == 0)
14379 op1 = validize_mem (force_const_mem (op_mode, op1));
14380 else if (tmp == 1)
14381 {
14382 if (TARGET_CMOVE)
14383 op1 = force_reg (op_mode, op1);
14384 }
14385 else
14386 op1 = force_reg (op_mode, op1);
14387 }
14388 }
14389
14390 /* Try to rearrange the comparison to make it cheaper. */
14391 if (ix86_fp_comparison_cost (code)
14392 > ix86_fp_comparison_cost (swap_condition (code))
14393 && (REG_P (op1) || can_create_pseudo_p ()))
14394 {
14395 rtx tmp;
14396 tmp = op0, op0 = op1, op1 = tmp;
14397 code = swap_condition (code);
14398 if (!REG_P (op0))
14399 op0 = force_reg (op_mode, op0);
14400 }
14401
14402 *pop0 = op0;
14403 *pop1 = op1;
14404 return code;
14405 }
14406
14407 /* Convert comparison codes we use to represent FP comparison to integer
14408 code that will result in proper branch. Return UNKNOWN if no such code
14409 is available. */
14410
14411 enum rtx_code
14412 ix86_fp_compare_code_to_integer (enum rtx_code code)
14413 {
14414 switch (code)
14415 {
14416 case GT:
14417 return GTU;
14418 case GE:
14419 return GEU;
14420 case ORDERED:
14421 case UNORDERED:
14422 return code;
14423 break;
14424 case UNEQ:
14425 return EQ;
14426 break;
14427 case UNLT:
14428 return LTU;
14429 break;
14430 case UNLE:
14431 return LEU;
14432 break;
14433 case LTGT:
14434 return NE;
14435 break;
14436 default:
14437 return UNKNOWN;
14438 }
14439 }
14440
14441 /* Generate insn patterns to do a floating point compare of OPERANDS. */
14442
14443 static rtx
14444 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
14445 rtx *second_test, rtx *bypass_test)
14446 {
14447 enum machine_mode fpcmp_mode, intcmp_mode;
14448 rtx tmp, tmp2;
14449 int cost = ix86_fp_comparison_cost (code);
14450 enum rtx_code bypass_code, first_code, second_code;
14451
14452 fpcmp_mode = ix86_fp_compare_mode (code);
14453 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
14454
14455 if (second_test)
14456 *second_test = NULL_RTX;
14457 if (bypass_test)
14458 *bypass_test = NULL_RTX;
14459
14460 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14461
14462 /* Do fcomi/sahf based test when profitable. */
14463 if (ix86_fp_comparison_arithmetics_cost (code) > cost
14464 && (bypass_code == UNKNOWN || bypass_test)
14465 && (second_code == UNKNOWN || second_test))
14466 {
14467 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14468 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
14469 tmp);
14470 if (TARGET_CMOVE)
14471 emit_insn (tmp);
14472 else
14473 {
14474 gcc_assert (TARGET_SAHF);
14475
14476 if (!scratch)
14477 scratch = gen_reg_rtx (HImode);
14478 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
14479
14480 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
14481 }
14482
14483 /* The FP codes work out to act like unsigned. */
14484 intcmp_mode = fpcmp_mode;
14485 code = first_code;
14486 if (bypass_code != UNKNOWN)
14487 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
14488 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14489 const0_rtx);
14490 if (second_code != UNKNOWN)
14491 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
14492 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14493 const0_rtx);
14494 }
14495 else
14496 {
14497 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
14498 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14499 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
14500 if (!scratch)
14501 scratch = gen_reg_rtx (HImode);
14502 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
14503
14504 /* In the unordered case, we have to check C2 for NaN's, which
14505 doesn't happen to work out to anything nice combination-wise.
14506 So do some bit twiddling on the value we've got in AH to come
14507 up with an appropriate set of condition codes. */
14508
14509 intcmp_mode = CCNOmode;
14510 switch (code)
14511 {
14512 case GT:
14513 case UNGT:
14514 if (code == GT || !TARGET_IEEE_FP)
14515 {
14516 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14517 code = EQ;
14518 }
14519 else
14520 {
14521 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14522 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14523 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
14524 intcmp_mode = CCmode;
14525 code = GEU;
14526 }
14527 break;
14528 case LT:
14529 case UNLT:
14530 if (code == LT && TARGET_IEEE_FP)
14531 {
14532 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14533 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
14534 intcmp_mode = CCmode;
14535 code = EQ;
14536 }
14537 else
14538 {
14539 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
14540 code = NE;
14541 }
14542 break;
14543 case GE:
14544 case UNGE:
14545 if (code == GE || !TARGET_IEEE_FP)
14546 {
14547 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
14548 code = EQ;
14549 }
14550 else
14551 {
14552 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14553 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14554 GEN_INT (0x01)));
14555 code = NE;
14556 }
14557 break;
14558 case LE:
14559 case UNLE:
14560 if (code == LE && TARGET_IEEE_FP)
14561 {
14562 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14563 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14564 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14565 intcmp_mode = CCmode;
14566 code = LTU;
14567 }
14568 else
14569 {
14570 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14571 code = NE;
14572 }
14573 break;
14574 case EQ:
14575 case UNEQ:
14576 if (code == EQ && TARGET_IEEE_FP)
14577 {
14578 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14579 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14580 intcmp_mode = CCmode;
14581 code = EQ;
14582 }
14583 else
14584 {
14585 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14586 code = NE;
14587 break;
14588 }
14589 break;
14590 case NE:
14591 case LTGT:
14592 if (code == NE && TARGET_IEEE_FP)
14593 {
14594 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14595 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14596 GEN_INT (0x40)));
14597 code = NE;
14598 }
14599 else
14600 {
14601 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14602 code = EQ;
14603 }
14604 break;
14605
14606 case UNORDERED:
14607 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14608 code = NE;
14609 break;
14610 case ORDERED:
14611 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14612 code = EQ;
14613 break;
14614
14615 default:
14616 gcc_unreachable ();
14617 }
14618 }
14619
14620 /* Return the test that should be put into the flags user, i.e.
14621 the bcc, scc, or cmov instruction. */
14622 return gen_rtx_fmt_ee (code, VOIDmode,
14623 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14624 const0_rtx);
14625 }
14626
14627 rtx
14628 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
14629 {
14630 rtx op0, op1, ret;
14631 op0 = ix86_compare_op0;
14632 op1 = ix86_compare_op1;
14633
14634 if (second_test)
14635 *second_test = NULL_RTX;
14636 if (bypass_test)
14637 *bypass_test = NULL_RTX;
14638
14639 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC)
14640 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_op0, ix86_compare_op1);
14641
14642 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
14643 {
14644 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
14645 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
14646 second_test, bypass_test);
14647 }
14648 else
14649 ret = ix86_expand_int_compare (code, op0, op1);
14650
14651 return ret;
14652 }
14653
14654 /* Return true if the CODE will result in nontrivial jump sequence. */
14655 bool
14656 ix86_fp_jump_nontrivial_p (enum rtx_code code)
14657 {
14658 enum rtx_code bypass_code, first_code, second_code;
14659 if (!TARGET_CMOVE)
14660 return true;
14661 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14662 return bypass_code != UNKNOWN || second_code != UNKNOWN;
14663 }
14664
14665 void
14666 ix86_expand_branch (enum rtx_code code, rtx label)
14667 {
14668 rtx tmp;
14669
14670 switch (GET_MODE (ix86_compare_op0))
14671 {
14672 case QImode:
14673 case HImode:
14674 case SImode:
14675 simple:
14676 tmp = ix86_expand_compare (code, NULL, NULL);
14677 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14678 gen_rtx_LABEL_REF (VOIDmode, label),
14679 pc_rtx);
14680 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
14681 return;
14682
14683 case SFmode:
14684 case DFmode:
14685 case XFmode:
14686 {
14687 rtvec vec;
14688 int use_fcomi;
14689 enum rtx_code bypass_code, first_code, second_code;
14690
14691 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
14692 &ix86_compare_op1);
14693
14694 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14695
14696 /* Check whether we will use the natural sequence with one jump. If
14697 so, we can expand jump early. Otherwise delay expansion by
14698 creating compound insn to not confuse optimizers. */
14699 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
14700 {
14701 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
14702 gen_rtx_LABEL_REF (VOIDmode, label),
14703 pc_rtx, NULL_RTX, NULL_RTX);
14704 }
14705 else
14706 {
14707 tmp = gen_rtx_fmt_ee (code, VOIDmode,
14708 ix86_compare_op0, ix86_compare_op1);
14709 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14710 gen_rtx_LABEL_REF (VOIDmode, label),
14711 pc_rtx);
14712 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
14713
14714 use_fcomi = ix86_use_fcomi_compare (code);
14715 vec = rtvec_alloc (3 + !use_fcomi);
14716 RTVEC_ELT (vec, 0) = tmp;
14717 RTVEC_ELT (vec, 1)
14718 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
14719 RTVEC_ELT (vec, 2)
14720 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
14721 if (! use_fcomi)
14722 RTVEC_ELT (vec, 3)
14723 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
14724
14725 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
14726 }
14727 return;
14728 }
14729
14730 case DImode:
14731 if (TARGET_64BIT)
14732 goto simple;
14733 case TImode:
14734 /* Expand DImode branch into multiple compare+branch. */
14735 {
14736 rtx lo[2], hi[2], label2;
14737 enum rtx_code code1, code2, code3;
14738 enum machine_mode submode;
14739
14740 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
14741 {
14742 tmp = ix86_compare_op0;
14743 ix86_compare_op0 = ix86_compare_op1;
14744 ix86_compare_op1 = tmp;
14745 code = swap_condition (code);
14746 }
14747 if (GET_MODE (ix86_compare_op0) == DImode)
14748 {
14749 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
14750 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
14751 submode = SImode;
14752 }
14753 else
14754 {
14755 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
14756 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
14757 submode = DImode;
14758 }
14759
14760 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
14761 avoid two branches. This costs one extra insn, so disable when
14762 optimizing for size. */
14763
14764 if ((code == EQ || code == NE)
14765 && (!optimize_insn_for_size_p ()
14766 || hi[1] == const0_rtx || lo[1] == const0_rtx))
14767 {
14768 rtx xor0, xor1;
14769
14770 xor1 = hi[0];
14771 if (hi[1] != const0_rtx)
14772 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
14773 NULL_RTX, 0, OPTAB_WIDEN);
14774
14775 xor0 = lo[0];
14776 if (lo[1] != const0_rtx)
14777 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
14778 NULL_RTX, 0, OPTAB_WIDEN);
14779
14780 tmp = expand_binop (submode, ior_optab, xor1, xor0,
14781 NULL_RTX, 0, OPTAB_WIDEN);
14782
14783 ix86_compare_op0 = tmp;
14784 ix86_compare_op1 = const0_rtx;
14785 ix86_expand_branch (code, label);
14786 return;
14787 }
14788
14789 /* Otherwise, if we are doing less-than or greater-or-equal-than,
14790 op1 is a constant and the low word is zero, then we can just
14791 examine the high word. Similarly for low word -1 and
14792 less-or-equal-than or greater-than. */
14793
14794 if (CONST_INT_P (hi[1]))
14795 switch (code)
14796 {
14797 case LT: case LTU: case GE: case GEU:
14798 if (lo[1] == const0_rtx)
14799 {
14800 ix86_compare_op0 = hi[0];
14801 ix86_compare_op1 = hi[1];
14802 ix86_expand_branch (code, label);
14803 return;
14804 }
14805 break;
14806 case LE: case LEU: case GT: case GTU:
14807 if (lo[1] == constm1_rtx)
14808 {
14809 ix86_compare_op0 = hi[0];
14810 ix86_compare_op1 = hi[1];
14811 ix86_expand_branch (code, label);
14812 return;
14813 }
14814 break;
14815 default:
14816 break;
14817 }
14818
14819 /* Otherwise, we need two or three jumps. */
14820
14821 label2 = gen_label_rtx ();
14822
14823 code1 = code;
14824 code2 = swap_condition (code);
14825 code3 = unsigned_condition (code);
14826
14827 switch (code)
14828 {
14829 case LT: case GT: case LTU: case GTU:
14830 break;
14831
14832 case LE: code1 = LT; code2 = GT; break;
14833 case GE: code1 = GT; code2 = LT; break;
14834 case LEU: code1 = LTU; code2 = GTU; break;
14835 case GEU: code1 = GTU; code2 = LTU; break;
14836
14837 case EQ: code1 = UNKNOWN; code2 = NE; break;
14838 case NE: code2 = UNKNOWN; break;
14839
14840 default:
14841 gcc_unreachable ();
14842 }
14843
14844 /*
14845 * a < b =>
14846 * if (hi(a) < hi(b)) goto true;
14847 * if (hi(a) > hi(b)) goto false;
14848 * if (lo(a) < lo(b)) goto true;
14849 * false:
14850 */
14851
14852 ix86_compare_op0 = hi[0];
14853 ix86_compare_op1 = hi[1];
14854
14855 if (code1 != UNKNOWN)
14856 ix86_expand_branch (code1, label);
14857 if (code2 != UNKNOWN)
14858 ix86_expand_branch (code2, label2);
14859
14860 ix86_compare_op0 = lo[0];
14861 ix86_compare_op1 = lo[1];
14862 ix86_expand_branch (code3, label);
14863
14864 if (code2 != UNKNOWN)
14865 emit_label (label2);
14866 return;
14867 }
14868
14869 default:
14870 /* If we have already emitted a compare insn, go straight to simple.
14871 ix86_expand_compare won't emit anything if ix86_compare_emitted
14872 is non NULL. */
14873 gcc_assert (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC);
14874 goto simple;
14875 }
14876 }
14877
14878 /* Split branch based on floating point condition. */
14879 void
14880 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
14881 rtx target1, rtx target2, rtx tmp, rtx pushed)
14882 {
14883 rtx second, bypass;
14884 rtx label = NULL_RTX;
14885 rtx condition;
14886 int bypass_probability = -1, second_probability = -1, probability = -1;
14887 rtx i;
14888
14889 if (target2 != pc_rtx)
14890 {
14891 rtx tmp = target2;
14892 code = reverse_condition_maybe_unordered (code);
14893 target2 = target1;
14894 target1 = tmp;
14895 }
14896
14897 condition = ix86_expand_fp_compare (code, op1, op2,
14898 tmp, &second, &bypass);
14899
14900 /* Remove pushed operand from stack. */
14901 if (pushed)
14902 ix86_free_from_memory (GET_MODE (pushed));
14903
14904 if (split_branch_probability >= 0)
14905 {
14906 /* Distribute the probabilities across the jumps.
14907 Assume the BYPASS and SECOND to be always test
14908 for UNORDERED. */
14909 probability = split_branch_probability;
14910
14911 /* Value of 1 is low enough to make no need for probability
14912 to be updated. Later we may run some experiments and see
14913 if unordered values are more frequent in practice. */
14914 if (bypass)
14915 bypass_probability = 1;
14916 if (second)
14917 second_probability = 1;
14918 }
14919 if (bypass != NULL_RTX)
14920 {
14921 label = gen_label_rtx ();
14922 i = emit_jump_insn (gen_rtx_SET
14923 (VOIDmode, pc_rtx,
14924 gen_rtx_IF_THEN_ELSE (VOIDmode,
14925 bypass,
14926 gen_rtx_LABEL_REF (VOIDmode,
14927 label),
14928 pc_rtx)));
14929 if (bypass_probability >= 0)
14930 add_reg_note (i, REG_BR_PROB, GEN_INT (bypass_probability));
14931 }
14932 i = emit_jump_insn (gen_rtx_SET
14933 (VOIDmode, pc_rtx,
14934 gen_rtx_IF_THEN_ELSE (VOIDmode,
14935 condition, target1, target2)));
14936 if (probability >= 0)
14937 add_reg_note (i, REG_BR_PROB, GEN_INT (probability));
14938 if (second != NULL_RTX)
14939 {
14940 i = emit_jump_insn (gen_rtx_SET
14941 (VOIDmode, pc_rtx,
14942 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
14943 target2)));
14944 if (second_probability >= 0)
14945 add_reg_note (i, REG_BR_PROB, GEN_INT (second_probability));
14946 }
14947 if (label != NULL_RTX)
14948 emit_label (label);
14949 }
14950
14951 int
14952 ix86_expand_setcc (enum rtx_code code, rtx dest)
14953 {
14954 rtx ret, tmp, tmpreg, equiv;
14955 rtx second_test, bypass_test;
14956
14957 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
14958 return 0; /* FAIL */
14959
14960 gcc_assert (GET_MODE (dest) == QImode);
14961
14962 ret = ix86_expand_compare (code, &second_test, &bypass_test);
14963 PUT_MODE (ret, QImode);
14964
14965 tmp = dest;
14966 tmpreg = dest;
14967
14968 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
14969 if (bypass_test || second_test)
14970 {
14971 rtx test = second_test;
14972 int bypass = 0;
14973 rtx tmp2 = gen_reg_rtx (QImode);
14974 if (bypass_test)
14975 {
14976 gcc_assert (!second_test);
14977 test = bypass_test;
14978 bypass = 1;
14979 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
14980 }
14981 PUT_MODE (test, QImode);
14982 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
14983
14984 if (bypass)
14985 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
14986 else
14987 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
14988 }
14989
14990 /* Attach a REG_EQUAL note describing the comparison result. */
14991 if (ix86_compare_op0 && ix86_compare_op1)
14992 {
14993 equiv = simplify_gen_relational (code, QImode,
14994 GET_MODE (ix86_compare_op0),
14995 ix86_compare_op0, ix86_compare_op1);
14996 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
14997 }
14998
14999 return 1; /* DONE */
15000 }
15001
15002 /* Expand comparison setting or clearing carry flag. Return true when
15003 successful and set pop for the operation. */
15004 static bool
15005 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
15006 {
15007 enum machine_mode mode =
15008 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
15009
15010 /* Do not handle DImode compares that go through special path. */
15011 if (mode == (TARGET_64BIT ? TImode : DImode))
15012 return false;
15013
15014 if (SCALAR_FLOAT_MODE_P (mode))
15015 {
15016 rtx second_test = NULL, bypass_test = NULL;
15017 rtx compare_op, compare_seq;
15018
15019 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
15020
15021 /* Shortcut: following common codes never translate
15022 into carry flag compares. */
15023 if (code == EQ || code == NE || code == UNEQ || code == LTGT
15024 || code == ORDERED || code == UNORDERED)
15025 return false;
15026
15027 /* These comparisons require zero flag; swap operands so they won't. */
15028 if ((code == GT || code == UNLE || code == LE || code == UNGT)
15029 && !TARGET_IEEE_FP)
15030 {
15031 rtx tmp = op0;
15032 op0 = op1;
15033 op1 = tmp;
15034 code = swap_condition (code);
15035 }
15036
15037 /* Try to expand the comparison and verify that we end up with
15038 carry flag based comparison. This fails to be true only when
15039 we decide to expand comparison using arithmetic that is not
15040 too common scenario. */
15041 start_sequence ();
15042 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
15043 &second_test, &bypass_test);
15044 compare_seq = get_insns ();
15045 end_sequence ();
15046
15047 if (second_test || bypass_test)
15048 return false;
15049
15050 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15051 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15052 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
15053 else
15054 code = GET_CODE (compare_op);
15055
15056 if (code != LTU && code != GEU)
15057 return false;
15058
15059 emit_insn (compare_seq);
15060 *pop = compare_op;
15061 return true;
15062 }
15063
15064 if (!INTEGRAL_MODE_P (mode))
15065 return false;
15066
15067 switch (code)
15068 {
15069 case LTU:
15070 case GEU:
15071 break;
15072
15073 /* Convert a==0 into (unsigned)a<1. */
15074 case EQ:
15075 case NE:
15076 if (op1 != const0_rtx)
15077 return false;
15078 op1 = const1_rtx;
15079 code = (code == EQ ? LTU : GEU);
15080 break;
15081
15082 /* Convert a>b into b<a or a>=b-1. */
15083 case GTU:
15084 case LEU:
15085 if (CONST_INT_P (op1))
15086 {
15087 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
15088 /* Bail out on overflow. We still can swap operands but that
15089 would force loading of the constant into register. */
15090 if (op1 == const0_rtx
15091 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
15092 return false;
15093 code = (code == GTU ? GEU : LTU);
15094 }
15095 else
15096 {
15097 rtx tmp = op1;
15098 op1 = op0;
15099 op0 = tmp;
15100 code = (code == GTU ? LTU : GEU);
15101 }
15102 break;
15103
15104 /* Convert a>=0 into (unsigned)a<0x80000000. */
15105 case LT:
15106 case GE:
15107 if (mode == DImode || op1 != const0_rtx)
15108 return false;
15109 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
15110 code = (code == LT ? GEU : LTU);
15111 break;
15112 case LE:
15113 case GT:
15114 if (mode == DImode || op1 != constm1_rtx)
15115 return false;
15116 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
15117 code = (code == LE ? GEU : LTU);
15118 break;
15119
15120 default:
15121 return false;
15122 }
15123 /* Swapping operands may cause constant to appear as first operand. */
15124 if (!nonimmediate_operand (op0, VOIDmode))
15125 {
15126 if (!can_create_pseudo_p ())
15127 return false;
15128 op0 = force_reg (mode, op0);
15129 }
15130 ix86_compare_op0 = op0;
15131 ix86_compare_op1 = op1;
15132 *pop = ix86_expand_compare (code, NULL, NULL);
15133 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
15134 return true;
15135 }
15136
15137 int
15138 ix86_expand_int_movcc (rtx operands[])
15139 {
15140 enum rtx_code code = GET_CODE (operands[1]), compare_code;
15141 rtx compare_seq, compare_op;
15142 rtx second_test, bypass_test;
15143 enum machine_mode mode = GET_MODE (operands[0]);
15144 bool sign_bit_compare_p = false;;
15145
15146 start_sequence ();
15147 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15148 compare_seq = get_insns ();
15149 end_sequence ();
15150
15151 compare_code = GET_CODE (compare_op);
15152
15153 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
15154 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
15155 sign_bit_compare_p = true;
15156
15157 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
15158 HImode insns, we'd be swallowed in word prefix ops. */
15159
15160 if ((mode != HImode || TARGET_FAST_PREFIX)
15161 && (mode != (TARGET_64BIT ? TImode : DImode))
15162 && CONST_INT_P (operands[2])
15163 && CONST_INT_P (operands[3]))
15164 {
15165 rtx out = operands[0];
15166 HOST_WIDE_INT ct = INTVAL (operands[2]);
15167 HOST_WIDE_INT cf = INTVAL (operands[3]);
15168 HOST_WIDE_INT diff;
15169
15170 diff = ct - cf;
15171 /* Sign bit compares are better done using shifts than we do by using
15172 sbb. */
15173 if (sign_bit_compare_p
15174 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
15175 ix86_compare_op1, &compare_op))
15176 {
15177 /* Detect overlap between destination and compare sources. */
15178 rtx tmp = out;
15179
15180 if (!sign_bit_compare_p)
15181 {
15182 bool fpcmp = false;
15183
15184 compare_code = GET_CODE (compare_op);
15185
15186 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15187 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15188 {
15189 fpcmp = true;
15190 compare_code = ix86_fp_compare_code_to_integer (compare_code);
15191 }
15192
15193 /* To simplify rest of code, restrict to the GEU case. */
15194 if (compare_code == LTU)
15195 {
15196 HOST_WIDE_INT tmp = ct;
15197 ct = cf;
15198 cf = tmp;
15199 compare_code = reverse_condition (compare_code);
15200 code = reverse_condition (code);
15201 }
15202 else
15203 {
15204 if (fpcmp)
15205 PUT_CODE (compare_op,
15206 reverse_condition_maybe_unordered
15207 (GET_CODE (compare_op)));
15208 else
15209 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
15210 }
15211 diff = ct - cf;
15212
15213 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
15214 || reg_overlap_mentioned_p (out, ix86_compare_op1))
15215 tmp = gen_reg_rtx (mode);
15216
15217 if (mode == DImode)
15218 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
15219 else
15220 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
15221 }
15222 else
15223 {
15224 if (code == GT || code == GE)
15225 code = reverse_condition (code);
15226 else
15227 {
15228 HOST_WIDE_INT tmp = ct;
15229 ct = cf;
15230 cf = tmp;
15231 diff = ct - cf;
15232 }
15233 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
15234 ix86_compare_op1, VOIDmode, 0, -1);
15235 }
15236
15237 if (diff == 1)
15238 {
15239 /*
15240 * cmpl op0,op1
15241 * sbbl dest,dest
15242 * [addl dest, ct]
15243 *
15244 * Size 5 - 8.
15245 */
15246 if (ct)
15247 tmp = expand_simple_binop (mode, PLUS,
15248 tmp, GEN_INT (ct),
15249 copy_rtx (tmp), 1, OPTAB_DIRECT);
15250 }
15251 else if (cf == -1)
15252 {
15253 /*
15254 * cmpl op0,op1
15255 * sbbl dest,dest
15256 * orl $ct, dest
15257 *
15258 * Size 8.
15259 */
15260 tmp = expand_simple_binop (mode, IOR,
15261 tmp, GEN_INT (ct),
15262 copy_rtx (tmp), 1, OPTAB_DIRECT);
15263 }
15264 else if (diff == -1 && ct)
15265 {
15266 /*
15267 * cmpl op0,op1
15268 * sbbl dest,dest
15269 * notl dest
15270 * [addl dest, cf]
15271 *
15272 * Size 8 - 11.
15273 */
15274 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15275 if (cf)
15276 tmp = expand_simple_binop (mode, PLUS,
15277 copy_rtx (tmp), GEN_INT (cf),
15278 copy_rtx (tmp), 1, OPTAB_DIRECT);
15279 }
15280 else
15281 {
15282 /*
15283 * cmpl op0,op1
15284 * sbbl dest,dest
15285 * [notl dest]
15286 * andl cf - ct, dest
15287 * [addl dest, ct]
15288 *
15289 * Size 8 - 11.
15290 */
15291
15292 if (cf == 0)
15293 {
15294 cf = ct;
15295 ct = 0;
15296 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15297 }
15298
15299 tmp = expand_simple_binop (mode, AND,
15300 copy_rtx (tmp),
15301 gen_int_mode (cf - ct, mode),
15302 copy_rtx (tmp), 1, OPTAB_DIRECT);
15303 if (ct)
15304 tmp = expand_simple_binop (mode, PLUS,
15305 copy_rtx (tmp), GEN_INT (ct),
15306 copy_rtx (tmp), 1, OPTAB_DIRECT);
15307 }
15308
15309 if (!rtx_equal_p (tmp, out))
15310 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
15311
15312 return 1; /* DONE */
15313 }
15314
15315 if (diff < 0)
15316 {
15317 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15318
15319 HOST_WIDE_INT tmp;
15320 tmp = ct, ct = cf, cf = tmp;
15321 diff = -diff;
15322
15323 if (SCALAR_FLOAT_MODE_P (cmp_mode))
15324 {
15325 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15326
15327 /* We may be reversing unordered compare to normal compare, that
15328 is not valid in general (we may convert non-trapping condition
15329 to trapping one), however on i386 we currently emit all
15330 comparisons unordered. */
15331 compare_code = reverse_condition_maybe_unordered (compare_code);
15332 code = reverse_condition_maybe_unordered (code);
15333 }
15334 else
15335 {
15336 compare_code = reverse_condition (compare_code);
15337 code = reverse_condition (code);
15338 }
15339 }
15340
15341 compare_code = UNKNOWN;
15342 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
15343 && CONST_INT_P (ix86_compare_op1))
15344 {
15345 if (ix86_compare_op1 == const0_rtx
15346 && (code == LT || code == GE))
15347 compare_code = code;
15348 else if (ix86_compare_op1 == constm1_rtx)
15349 {
15350 if (code == LE)
15351 compare_code = LT;
15352 else if (code == GT)
15353 compare_code = GE;
15354 }
15355 }
15356
15357 /* Optimize dest = (op0 < 0) ? -1 : cf. */
15358 if (compare_code != UNKNOWN
15359 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
15360 && (cf == -1 || ct == -1))
15361 {
15362 /* If lea code below could be used, only optimize
15363 if it results in a 2 insn sequence. */
15364
15365 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
15366 || diff == 3 || diff == 5 || diff == 9)
15367 || (compare_code == LT && ct == -1)
15368 || (compare_code == GE && cf == -1))
15369 {
15370 /*
15371 * notl op1 (if necessary)
15372 * sarl $31, op1
15373 * orl cf, op1
15374 */
15375 if (ct != -1)
15376 {
15377 cf = ct;
15378 ct = -1;
15379 code = reverse_condition (code);
15380 }
15381
15382 out = emit_store_flag (out, code, ix86_compare_op0,
15383 ix86_compare_op1, VOIDmode, 0, -1);
15384
15385 out = expand_simple_binop (mode, IOR,
15386 out, GEN_INT (cf),
15387 out, 1, OPTAB_DIRECT);
15388 if (out != operands[0])
15389 emit_move_insn (operands[0], out);
15390
15391 return 1; /* DONE */
15392 }
15393 }
15394
15395
15396 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
15397 || diff == 3 || diff == 5 || diff == 9)
15398 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
15399 && (mode != DImode
15400 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
15401 {
15402 /*
15403 * xorl dest,dest
15404 * cmpl op1,op2
15405 * setcc dest
15406 * lea cf(dest*(ct-cf)),dest
15407 *
15408 * Size 14.
15409 *
15410 * This also catches the degenerate setcc-only case.
15411 */
15412
15413 rtx tmp;
15414 int nops;
15415
15416 out = emit_store_flag (out, code, ix86_compare_op0,
15417 ix86_compare_op1, VOIDmode, 0, 1);
15418
15419 nops = 0;
15420 /* On x86_64 the lea instruction operates on Pmode, so we need
15421 to get arithmetics done in proper mode to match. */
15422 if (diff == 1)
15423 tmp = copy_rtx (out);
15424 else
15425 {
15426 rtx out1;
15427 out1 = copy_rtx (out);
15428 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
15429 nops++;
15430 if (diff & 1)
15431 {
15432 tmp = gen_rtx_PLUS (mode, tmp, out1);
15433 nops++;
15434 }
15435 }
15436 if (cf != 0)
15437 {
15438 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
15439 nops++;
15440 }
15441 if (!rtx_equal_p (tmp, out))
15442 {
15443 if (nops == 1)
15444 out = force_operand (tmp, copy_rtx (out));
15445 else
15446 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
15447 }
15448 if (!rtx_equal_p (out, operands[0]))
15449 emit_move_insn (operands[0], copy_rtx (out));
15450
15451 return 1; /* DONE */
15452 }
15453
15454 /*
15455 * General case: Jumpful:
15456 * xorl dest,dest cmpl op1, op2
15457 * cmpl op1, op2 movl ct, dest
15458 * setcc dest jcc 1f
15459 * decl dest movl cf, dest
15460 * andl (cf-ct),dest 1:
15461 * addl ct,dest
15462 *
15463 * Size 20. Size 14.
15464 *
15465 * This is reasonably steep, but branch mispredict costs are
15466 * high on modern cpus, so consider failing only if optimizing
15467 * for space.
15468 */
15469
15470 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15471 && BRANCH_COST (optimize_insn_for_speed_p (),
15472 false) >= 2)
15473 {
15474 if (cf == 0)
15475 {
15476 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15477
15478 cf = ct;
15479 ct = 0;
15480
15481 if (SCALAR_FLOAT_MODE_P (cmp_mode))
15482 {
15483 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15484
15485 /* We may be reversing unordered compare to normal compare,
15486 that is not valid in general (we may convert non-trapping
15487 condition to trapping one), however on i386 we currently
15488 emit all comparisons unordered. */
15489 code = reverse_condition_maybe_unordered (code);
15490 }
15491 else
15492 {
15493 code = reverse_condition (code);
15494 if (compare_code != UNKNOWN)
15495 compare_code = reverse_condition (compare_code);
15496 }
15497 }
15498
15499 if (compare_code != UNKNOWN)
15500 {
15501 /* notl op1 (if needed)
15502 sarl $31, op1
15503 andl (cf-ct), op1
15504 addl ct, op1
15505
15506 For x < 0 (resp. x <= -1) there will be no notl,
15507 so if possible swap the constants to get rid of the
15508 complement.
15509 True/false will be -1/0 while code below (store flag
15510 followed by decrement) is 0/-1, so the constants need
15511 to be exchanged once more. */
15512
15513 if (compare_code == GE || !cf)
15514 {
15515 code = reverse_condition (code);
15516 compare_code = LT;
15517 }
15518 else
15519 {
15520 HOST_WIDE_INT tmp = cf;
15521 cf = ct;
15522 ct = tmp;
15523 }
15524
15525 out = emit_store_flag (out, code, ix86_compare_op0,
15526 ix86_compare_op1, VOIDmode, 0, -1);
15527 }
15528 else
15529 {
15530 out = emit_store_flag (out, code, ix86_compare_op0,
15531 ix86_compare_op1, VOIDmode, 0, 1);
15532
15533 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
15534 copy_rtx (out), 1, OPTAB_DIRECT);
15535 }
15536
15537 out = expand_simple_binop (mode, AND, copy_rtx (out),
15538 gen_int_mode (cf - ct, mode),
15539 copy_rtx (out), 1, OPTAB_DIRECT);
15540 if (ct)
15541 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
15542 copy_rtx (out), 1, OPTAB_DIRECT);
15543 if (!rtx_equal_p (out, operands[0]))
15544 emit_move_insn (operands[0], copy_rtx (out));
15545
15546 return 1; /* DONE */
15547 }
15548 }
15549
15550 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15551 {
15552 /* Try a few things more with specific constants and a variable. */
15553
15554 optab op;
15555 rtx var, orig_out, out, tmp;
15556
15557 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
15558 return 0; /* FAIL */
15559
15560 /* If one of the two operands is an interesting constant, load a
15561 constant with the above and mask it in with a logical operation. */
15562
15563 if (CONST_INT_P (operands[2]))
15564 {
15565 var = operands[3];
15566 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
15567 operands[3] = constm1_rtx, op = and_optab;
15568 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
15569 operands[3] = const0_rtx, op = ior_optab;
15570 else
15571 return 0; /* FAIL */
15572 }
15573 else if (CONST_INT_P (operands[3]))
15574 {
15575 var = operands[2];
15576 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
15577 operands[2] = constm1_rtx, op = and_optab;
15578 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
15579 operands[2] = const0_rtx, op = ior_optab;
15580 else
15581 return 0; /* FAIL */
15582 }
15583 else
15584 return 0; /* FAIL */
15585
15586 orig_out = operands[0];
15587 tmp = gen_reg_rtx (mode);
15588 operands[0] = tmp;
15589
15590 /* Recurse to get the constant loaded. */
15591 if (ix86_expand_int_movcc (operands) == 0)
15592 return 0; /* FAIL */
15593
15594 /* Mask in the interesting variable. */
15595 out = expand_binop (mode, op, var, tmp, orig_out, 0,
15596 OPTAB_WIDEN);
15597 if (!rtx_equal_p (out, orig_out))
15598 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
15599
15600 return 1; /* DONE */
15601 }
15602
15603 /*
15604 * For comparison with above,
15605 *
15606 * movl cf,dest
15607 * movl ct,tmp
15608 * cmpl op1,op2
15609 * cmovcc tmp,dest
15610 *
15611 * Size 15.
15612 */
15613
15614 if (! nonimmediate_operand (operands[2], mode))
15615 operands[2] = force_reg (mode, operands[2]);
15616 if (! nonimmediate_operand (operands[3], mode))
15617 operands[3] = force_reg (mode, operands[3]);
15618
15619 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
15620 {
15621 rtx tmp = gen_reg_rtx (mode);
15622 emit_move_insn (tmp, operands[3]);
15623 operands[3] = tmp;
15624 }
15625 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
15626 {
15627 rtx tmp = gen_reg_rtx (mode);
15628 emit_move_insn (tmp, operands[2]);
15629 operands[2] = tmp;
15630 }
15631
15632 if (! register_operand (operands[2], VOIDmode)
15633 && (mode == QImode
15634 || ! register_operand (operands[3], VOIDmode)))
15635 operands[2] = force_reg (mode, operands[2]);
15636
15637 if (mode == QImode
15638 && ! register_operand (operands[3], VOIDmode))
15639 operands[3] = force_reg (mode, operands[3]);
15640
15641 emit_insn (compare_seq);
15642 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15643 gen_rtx_IF_THEN_ELSE (mode,
15644 compare_op, operands[2],
15645 operands[3])));
15646 if (bypass_test)
15647 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
15648 gen_rtx_IF_THEN_ELSE (mode,
15649 bypass_test,
15650 copy_rtx (operands[3]),
15651 copy_rtx (operands[0]))));
15652 if (second_test)
15653 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
15654 gen_rtx_IF_THEN_ELSE (mode,
15655 second_test,
15656 copy_rtx (operands[2]),
15657 copy_rtx (operands[0]))));
15658
15659 return 1; /* DONE */
15660 }
15661
15662 /* Swap, force into registers, or otherwise massage the two operands
15663 to an sse comparison with a mask result. Thus we differ a bit from
15664 ix86_prepare_fp_compare_args which expects to produce a flags result.
15665
15666 The DEST operand exists to help determine whether to commute commutative
15667 operators. The POP0/POP1 operands are updated in place. The new
15668 comparison code is returned, or UNKNOWN if not implementable. */
15669
15670 static enum rtx_code
15671 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
15672 rtx *pop0, rtx *pop1)
15673 {
15674 rtx tmp;
15675
15676 switch (code)
15677 {
15678 case LTGT:
15679 case UNEQ:
15680 /* We have no LTGT as an operator. We could implement it with
15681 NE & ORDERED, but this requires an extra temporary. It's
15682 not clear that it's worth it. */
15683 return UNKNOWN;
15684
15685 case LT:
15686 case LE:
15687 case UNGT:
15688 case UNGE:
15689 /* These are supported directly. */
15690 break;
15691
15692 case EQ:
15693 case NE:
15694 case UNORDERED:
15695 case ORDERED:
15696 /* For commutative operators, try to canonicalize the destination
15697 operand to be first in the comparison - this helps reload to
15698 avoid extra moves. */
15699 if (!dest || !rtx_equal_p (dest, *pop1))
15700 break;
15701 /* FALLTHRU */
15702
15703 case GE:
15704 case GT:
15705 case UNLE:
15706 case UNLT:
15707 /* These are not supported directly. Swap the comparison operands
15708 to transform into something that is supported. */
15709 tmp = *pop0;
15710 *pop0 = *pop1;
15711 *pop1 = tmp;
15712 code = swap_condition (code);
15713 break;
15714
15715 default:
15716 gcc_unreachable ();
15717 }
15718
15719 return code;
15720 }
15721
15722 /* Detect conditional moves that exactly match min/max operational
15723 semantics. Note that this is IEEE safe, as long as we don't
15724 interchange the operands.
15725
15726 Returns FALSE if this conditional move doesn't match a MIN/MAX,
15727 and TRUE if the operation is successful and instructions are emitted. */
15728
15729 static bool
15730 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
15731 rtx cmp_op1, rtx if_true, rtx if_false)
15732 {
15733 enum machine_mode mode;
15734 bool is_min;
15735 rtx tmp;
15736
15737 if (code == LT)
15738 ;
15739 else if (code == UNGE)
15740 {
15741 tmp = if_true;
15742 if_true = if_false;
15743 if_false = tmp;
15744 }
15745 else
15746 return false;
15747
15748 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
15749 is_min = true;
15750 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
15751 is_min = false;
15752 else
15753 return false;
15754
15755 mode = GET_MODE (dest);
15756
15757 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
15758 but MODE may be a vector mode and thus not appropriate. */
15759 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
15760 {
15761 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
15762 rtvec v;
15763
15764 if_true = force_reg (mode, if_true);
15765 v = gen_rtvec (2, if_true, if_false);
15766 tmp = gen_rtx_UNSPEC (mode, v, u);
15767 }
15768 else
15769 {
15770 code = is_min ? SMIN : SMAX;
15771 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
15772 }
15773
15774 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
15775 return true;
15776 }
15777
15778 /* Expand an sse vector comparison. Return the register with the result. */
15779
15780 static rtx
15781 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
15782 rtx op_true, rtx op_false)
15783 {
15784 enum machine_mode mode = GET_MODE (dest);
15785 rtx x;
15786
15787 cmp_op0 = force_reg (mode, cmp_op0);
15788 if (!nonimmediate_operand (cmp_op1, mode))
15789 cmp_op1 = force_reg (mode, cmp_op1);
15790
15791 if (optimize
15792 || reg_overlap_mentioned_p (dest, op_true)
15793 || reg_overlap_mentioned_p (dest, op_false))
15794 dest = gen_reg_rtx (mode);
15795
15796 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
15797 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15798
15799 return dest;
15800 }
15801
15802 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
15803 operations. This is used for both scalar and vector conditional moves. */
15804
15805 static void
15806 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
15807 {
15808 enum machine_mode mode = GET_MODE (dest);
15809 rtx t2, t3, x;
15810
15811 if (op_false == CONST0_RTX (mode))
15812 {
15813 op_true = force_reg (mode, op_true);
15814 x = gen_rtx_AND (mode, cmp, op_true);
15815 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15816 }
15817 else if (op_true == CONST0_RTX (mode))
15818 {
15819 op_false = force_reg (mode, op_false);
15820 x = gen_rtx_NOT (mode, cmp);
15821 x = gen_rtx_AND (mode, x, op_false);
15822 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15823 }
15824 else if (TARGET_SSE5)
15825 {
15826 rtx pcmov = gen_rtx_SET (mode, dest,
15827 gen_rtx_IF_THEN_ELSE (mode, cmp,
15828 op_true,
15829 op_false));
15830 emit_insn (pcmov);
15831 }
15832 else
15833 {
15834 op_true = force_reg (mode, op_true);
15835 op_false = force_reg (mode, op_false);
15836
15837 t2 = gen_reg_rtx (mode);
15838 if (optimize)
15839 t3 = gen_reg_rtx (mode);
15840 else
15841 t3 = dest;
15842
15843 x = gen_rtx_AND (mode, op_true, cmp);
15844 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
15845
15846 x = gen_rtx_NOT (mode, cmp);
15847 x = gen_rtx_AND (mode, x, op_false);
15848 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
15849
15850 x = gen_rtx_IOR (mode, t3, t2);
15851 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15852 }
15853 }
15854
15855 /* Expand a floating-point conditional move. Return true if successful. */
15856
15857 int
15858 ix86_expand_fp_movcc (rtx operands[])
15859 {
15860 enum machine_mode mode = GET_MODE (operands[0]);
15861 enum rtx_code code = GET_CODE (operands[1]);
15862 rtx tmp, compare_op, second_test, bypass_test;
15863
15864 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
15865 {
15866 enum machine_mode cmode;
15867
15868 /* Since we've no cmove for sse registers, don't force bad register
15869 allocation just to gain access to it. Deny movcc when the
15870 comparison mode doesn't match the move mode. */
15871 cmode = GET_MODE (ix86_compare_op0);
15872 if (cmode == VOIDmode)
15873 cmode = GET_MODE (ix86_compare_op1);
15874 if (cmode != mode)
15875 return 0;
15876
15877 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15878 &ix86_compare_op0,
15879 &ix86_compare_op1);
15880 if (code == UNKNOWN)
15881 return 0;
15882
15883 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
15884 ix86_compare_op1, operands[2],
15885 operands[3]))
15886 return 1;
15887
15888 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
15889 ix86_compare_op1, operands[2], operands[3]);
15890 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
15891 return 1;
15892 }
15893
15894 /* The floating point conditional move instructions don't directly
15895 support conditions resulting from a signed integer comparison. */
15896
15897 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15898
15899 /* The floating point conditional move instructions don't directly
15900 support signed integer comparisons. */
15901
15902 if (!fcmov_comparison_operator (compare_op, VOIDmode))
15903 {
15904 gcc_assert (!second_test && !bypass_test);
15905 tmp = gen_reg_rtx (QImode);
15906 ix86_expand_setcc (code, tmp);
15907 code = NE;
15908 ix86_compare_op0 = tmp;
15909 ix86_compare_op1 = const0_rtx;
15910 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15911 }
15912 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
15913 {
15914 tmp = gen_reg_rtx (mode);
15915 emit_move_insn (tmp, operands[3]);
15916 operands[3] = tmp;
15917 }
15918 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
15919 {
15920 tmp = gen_reg_rtx (mode);
15921 emit_move_insn (tmp, operands[2]);
15922 operands[2] = tmp;
15923 }
15924
15925 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15926 gen_rtx_IF_THEN_ELSE (mode, compare_op,
15927 operands[2], operands[3])));
15928 if (bypass_test)
15929 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15930 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
15931 operands[3], operands[0])));
15932 if (second_test)
15933 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15934 gen_rtx_IF_THEN_ELSE (mode, second_test,
15935 operands[2], operands[0])));
15936
15937 return 1;
15938 }
15939
15940 /* Expand a floating-point vector conditional move; a vcond operation
15941 rather than a movcc operation. */
15942
15943 bool
15944 ix86_expand_fp_vcond (rtx operands[])
15945 {
15946 enum rtx_code code = GET_CODE (operands[3]);
15947 rtx cmp;
15948
15949 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15950 &operands[4], &operands[5]);
15951 if (code == UNKNOWN)
15952 return false;
15953
15954 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
15955 operands[5], operands[1], operands[2]))
15956 return true;
15957
15958 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
15959 operands[1], operands[2]);
15960 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
15961 return true;
15962 }
15963
15964 /* Expand a signed/unsigned integral vector conditional move. */
15965
15966 bool
15967 ix86_expand_int_vcond (rtx operands[])
15968 {
15969 enum machine_mode mode = GET_MODE (operands[0]);
15970 enum rtx_code code = GET_CODE (operands[3]);
15971 bool negate = false;
15972 rtx x, cop0, cop1;
15973
15974 cop0 = operands[4];
15975 cop1 = operands[5];
15976
15977 /* SSE5 supports all of the comparisons on all vector int types. */
15978 if (!TARGET_SSE5)
15979 {
15980 /* Canonicalize the comparison to EQ, GT, GTU. */
15981 switch (code)
15982 {
15983 case EQ:
15984 case GT:
15985 case GTU:
15986 break;
15987
15988 case NE:
15989 case LE:
15990 case LEU:
15991 code = reverse_condition (code);
15992 negate = true;
15993 break;
15994
15995 case GE:
15996 case GEU:
15997 code = reverse_condition (code);
15998 negate = true;
15999 /* FALLTHRU */
16000
16001 case LT:
16002 case LTU:
16003 code = swap_condition (code);
16004 x = cop0, cop0 = cop1, cop1 = x;
16005 break;
16006
16007 default:
16008 gcc_unreachable ();
16009 }
16010
16011 /* Only SSE4.1/SSE4.2 supports V2DImode. */
16012 if (mode == V2DImode)
16013 {
16014 switch (code)
16015 {
16016 case EQ:
16017 /* SSE4.1 supports EQ. */
16018 if (!TARGET_SSE4_1)
16019 return false;
16020 break;
16021
16022 case GT:
16023 case GTU:
16024 /* SSE4.2 supports GT/GTU. */
16025 if (!TARGET_SSE4_2)
16026 return false;
16027 break;
16028
16029 default:
16030 gcc_unreachable ();
16031 }
16032 }
16033
16034 /* Unsigned parallel compare is not supported by the hardware. Play some
16035 tricks to turn this into a signed comparison against 0. */
16036 if (code == GTU)
16037 {
16038 cop0 = force_reg (mode, cop0);
16039
16040 switch (mode)
16041 {
16042 case V4SImode:
16043 case V2DImode:
16044 {
16045 rtx t1, t2, mask;
16046
16047 /* Perform a parallel modulo subtraction. */
16048 t1 = gen_reg_rtx (mode);
16049 emit_insn ((mode == V4SImode
16050 ? gen_subv4si3
16051 : gen_subv2di3) (t1, cop0, cop1));
16052
16053 /* Extract the original sign bit of op0. */
16054 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
16055 true, false);
16056 t2 = gen_reg_rtx (mode);
16057 emit_insn ((mode == V4SImode
16058 ? gen_andv4si3
16059 : gen_andv2di3) (t2, cop0, mask));
16060
16061 /* XOR it back into the result of the subtraction. This results
16062 in the sign bit set iff we saw unsigned underflow. */
16063 x = gen_reg_rtx (mode);
16064 emit_insn ((mode == V4SImode
16065 ? gen_xorv4si3
16066 : gen_xorv2di3) (x, t1, t2));
16067
16068 code = GT;
16069 }
16070 break;
16071
16072 case V16QImode:
16073 case V8HImode:
16074 /* Perform a parallel unsigned saturating subtraction. */
16075 x = gen_reg_rtx (mode);
16076 emit_insn (gen_rtx_SET (VOIDmode, x,
16077 gen_rtx_US_MINUS (mode, cop0, cop1)));
16078
16079 code = EQ;
16080 negate = !negate;
16081 break;
16082
16083 default:
16084 gcc_unreachable ();
16085 }
16086
16087 cop0 = x;
16088 cop1 = CONST0_RTX (mode);
16089 }
16090 }
16091
16092 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
16093 operands[1+negate], operands[2-negate]);
16094
16095 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
16096 operands[2-negate]);
16097 return true;
16098 }
16099
16100 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
16101 true if we should do zero extension, else sign extension. HIGH_P is
16102 true if we want the N/2 high elements, else the low elements. */
16103
16104 void
16105 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16106 {
16107 enum machine_mode imode = GET_MODE (operands[1]);
16108 rtx (*unpack)(rtx, rtx, rtx);
16109 rtx se, dest;
16110
16111 switch (imode)
16112 {
16113 case V16QImode:
16114 if (high_p)
16115 unpack = gen_vec_interleave_highv16qi;
16116 else
16117 unpack = gen_vec_interleave_lowv16qi;
16118 break;
16119 case V8HImode:
16120 if (high_p)
16121 unpack = gen_vec_interleave_highv8hi;
16122 else
16123 unpack = gen_vec_interleave_lowv8hi;
16124 break;
16125 case V4SImode:
16126 if (high_p)
16127 unpack = gen_vec_interleave_highv4si;
16128 else
16129 unpack = gen_vec_interleave_lowv4si;
16130 break;
16131 default:
16132 gcc_unreachable ();
16133 }
16134
16135 dest = gen_lowpart (imode, operands[0]);
16136
16137 if (unsigned_p)
16138 se = force_reg (imode, CONST0_RTX (imode));
16139 else
16140 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
16141 operands[1], pc_rtx, pc_rtx);
16142
16143 emit_insn (unpack (dest, operands[1], se));
16144 }
16145
16146 /* This function performs the same task as ix86_expand_sse_unpack,
16147 but with SSE4.1 instructions. */
16148
16149 void
16150 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16151 {
16152 enum machine_mode imode = GET_MODE (operands[1]);
16153 rtx (*unpack)(rtx, rtx);
16154 rtx src, dest;
16155
16156 switch (imode)
16157 {
16158 case V16QImode:
16159 if (unsigned_p)
16160 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
16161 else
16162 unpack = gen_sse4_1_extendv8qiv8hi2;
16163 break;
16164 case V8HImode:
16165 if (unsigned_p)
16166 unpack = gen_sse4_1_zero_extendv4hiv4si2;
16167 else
16168 unpack = gen_sse4_1_extendv4hiv4si2;
16169 break;
16170 case V4SImode:
16171 if (unsigned_p)
16172 unpack = gen_sse4_1_zero_extendv2siv2di2;
16173 else
16174 unpack = gen_sse4_1_extendv2siv2di2;
16175 break;
16176 default:
16177 gcc_unreachable ();
16178 }
16179
16180 dest = operands[0];
16181 if (high_p)
16182 {
16183 /* Shift higher 8 bytes to lower 8 bytes. */
16184 src = gen_reg_rtx (imode);
16185 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
16186 gen_lowpart (TImode, operands[1]),
16187 GEN_INT (64)));
16188 }
16189 else
16190 src = operands[1];
16191
16192 emit_insn (unpack (dest, src));
16193 }
16194
16195 /* This function performs the same task as ix86_expand_sse_unpack,
16196 but with sse5 instructions. */
16197
16198 void
16199 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16200 {
16201 enum machine_mode imode = GET_MODE (operands[1]);
16202 int pperm_bytes[16];
16203 int i;
16204 int h = (high_p) ? 8 : 0;
16205 int h2;
16206 int sign_extend;
16207 rtvec v = rtvec_alloc (16);
16208 rtvec vs;
16209 rtx x, p;
16210 rtx op0 = operands[0], op1 = operands[1];
16211
16212 switch (imode)
16213 {
16214 case V16QImode:
16215 vs = rtvec_alloc (8);
16216 h2 = (high_p) ? 8 : 0;
16217 for (i = 0; i < 8; i++)
16218 {
16219 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
16220 pperm_bytes[2*i+1] = ((unsigned_p)
16221 ? PPERM_ZERO
16222 : PPERM_SIGN | PPERM_SRC2 | i | h);
16223 }
16224
16225 for (i = 0; i < 16; i++)
16226 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16227
16228 for (i = 0; i < 8; i++)
16229 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16230
16231 p = gen_rtx_PARALLEL (VOIDmode, vs);
16232 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16233 if (unsigned_p)
16234 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
16235 else
16236 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
16237 break;
16238
16239 case V8HImode:
16240 vs = rtvec_alloc (4);
16241 h2 = (high_p) ? 4 : 0;
16242 for (i = 0; i < 4; i++)
16243 {
16244 sign_extend = ((unsigned_p)
16245 ? PPERM_ZERO
16246 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
16247 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
16248 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
16249 pperm_bytes[4*i+2] = sign_extend;
16250 pperm_bytes[4*i+3] = sign_extend;
16251 }
16252
16253 for (i = 0; i < 16; i++)
16254 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16255
16256 for (i = 0; i < 4; i++)
16257 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16258
16259 p = gen_rtx_PARALLEL (VOIDmode, vs);
16260 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16261 if (unsigned_p)
16262 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
16263 else
16264 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
16265 break;
16266
16267 case V4SImode:
16268 vs = rtvec_alloc (2);
16269 h2 = (high_p) ? 2 : 0;
16270 for (i = 0; i < 2; i++)
16271 {
16272 sign_extend = ((unsigned_p)
16273 ? PPERM_ZERO
16274 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
16275 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
16276 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
16277 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
16278 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
16279 pperm_bytes[8*i+4] = sign_extend;
16280 pperm_bytes[8*i+5] = sign_extend;
16281 pperm_bytes[8*i+6] = sign_extend;
16282 pperm_bytes[8*i+7] = sign_extend;
16283 }
16284
16285 for (i = 0; i < 16; i++)
16286 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16287
16288 for (i = 0; i < 2; i++)
16289 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16290
16291 p = gen_rtx_PARALLEL (VOIDmode, vs);
16292 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16293 if (unsigned_p)
16294 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
16295 else
16296 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
16297 break;
16298
16299 default:
16300 gcc_unreachable ();
16301 }
16302
16303 return;
16304 }
16305
16306 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
16307 next narrower integer vector type */
16308 void
16309 ix86_expand_sse5_pack (rtx operands[3])
16310 {
16311 enum machine_mode imode = GET_MODE (operands[0]);
16312 int pperm_bytes[16];
16313 int i;
16314 rtvec v = rtvec_alloc (16);
16315 rtx x;
16316 rtx op0 = operands[0];
16317 rtx op1 = operands[1];
16318 rtx op2 = operands[2];
16319
16320 switch (imode)
16321 {
16322 case V16QImode:
16323 for (i = 0; i < 8; i++)
16324 {
16325 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
16326 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
16327 }
16328
16329 for (i = 0; i < 16; i++)
16330 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16331
16332 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16333 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
16334 break;
16335
16336 case V8HImode:
16337 for (i = 0; i < 4; i++)
16338 {
16339 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
16340 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
16341 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
16342 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
16343 }
16344
16345 for (i = 0; i < 16; i++)
16346 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16347
16348 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16349 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
16350 break;
16351
16352 case V4SImode:
16353 for (i = 0; i < 2; i++)
16354 {
16355 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
16356 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
16357 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
16358 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
16359 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
16360 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
16361 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
16362 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
16363 }
16364
16365 for (i = 0; i < 16; i++)
16366 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16367
16368 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16369 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
16370 break;
16371
16372 default:
16373 gcc_unreachable ();
16374 }
16375
16376 return;
16377 }
16378
16379 /* Expand conditional increment or decrement using adb/sbb instructions.
16380 The default case using setcc followed by the conditional move can be
16381 done by generic code. */
16382 int
16383 ix86_expand_int_addcc (rtx operands[])
16384 {
16385 enum rtx_code code = GET_CODE (operands[1]);
16386 rtx compare_op;
16387 rtx val = const0_rtx;
16388 bool fpcmp = false;
16389 enum machine_mode mode = GET_MODE (operands[0]);
16390
16391 if (operands[3] != const1_rtx
16392 && operands[3] != constm1_rtx)
16393 return 0;
16394 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
16395 ix86_compare_op1, &compare_op))
16396 return 0;
16397 code = GET_CODE (compare_op);
16398
16399 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
16400 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
16401 {
16402 fpcmp = true;
16403 code = ix86_fp_compare_code_to_integer (code);
16404 }
16405
16406 if (code != LTU)
16407 {
16408 val = constm1_rtx;
16409 if (fpcmp)
16410 PUT_CODE (compare_op,
16411 reverse_condition_maybe_unordered
16412 (GET_CODE (compare_op)));
16413 else
16414 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
16415 }
16416 PUT_MODE (compare_op, mode);
16417
16418 /* Construct either adc or sbb insn. */
16419 if ((code == LTU) == (operands[3] == constm1_rtx))
16420 {
16421 switch (GET_MODE (operands[0]))
16422 {
16423 case QImode:
16424 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
16425 break;
16426 case HImode:
16427 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
16428 break;
16429 case SImode:
16430 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
16431 break;
16432 case DImode:
16433 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
16434 break;
16435 default:
16436 gcc_unreachable ();
16437 }
16438 }
16439 else
16440 {
16441 switch (GET_MODE (operands[0]))
16442 {
16443 case QImode:
16444 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
16445 break;
16446 case HImode:
16447 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
16448 break;
16449 case SImode:
16450 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
16451 break;
16452 case DImode:
16453 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
16454 break;
16455 default:
16456 gcc_unreachable ();
16457 }
16458 }
16459 return 1; /* DONE */
16460 }
16461
16462
16463 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
16464 works for floating pointer parameters and nonoffsetable memories.
16465 For pushes, it returns just stack offsets; the values will be saved
16466 in the right order. Maximally three parts are generated. */
16467
16468 static int
16469 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
16470 {
16471 int size;
16472
16473 if (!TARGET_64BIT)
16474 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
16475 else
16476 size = (GET_MODE_SIZE (mode) + 4) / 8;
16477
16478 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
16479 gcc_assert (size >= 2 && size <= 4);
16480
16481 /* Optimize constant pool reference to immediates. This is used by fp
16482 moves, that force all constants to memory to allow combining. */
16483 if (MEM_P (operand) && MEM_READONLY_P (operand))
16484 {
16485 rtx tmp = maybe_get_pool_constant (operand);
16486 if (tmp)
16487 operand = tmp;
16488 }
16489
16490 if (MEM_P (operand) && !offsettable_memref_p (operand))
16491 {
16492 /* The only non-offsetable memories we handle are pushes. */
16493 int ok = push_operand (operand, VOIDmode);
16494
16495 gcc_assert (ok);
16496
16497 operand = copy_rtx (operand);
16498 PUT_MODE (operand, Pmode);
16499 parts[0] = parts[1] = parts[2] = parts[3] = operand;
16500 return size;
16501 }
16502
16503 if (GET_CODE (operand) == CONST_VECTOR)
16504 {
16505 enum machine_mode imode = int_mode_for_mode (mode);
16506 /* Caution: if we looked through a constant pool memory above,
16507 the operand may actually have a different mode now. That's
16508 ok, since we want to pun this all the way back to an integer. */
16509 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
16510 gcc_assert (operand != NULL);
16511 mode = imode;
16512 }
16513
16514 if (!TARGET_64BIT)
16515 {
16516 if (mode == DImode)
16517 split_di (&operand, 1, &parts[0], &parts[1]);
16518 else
16519 {
16520 int i;
16521
16522 if (REG_P (operand))
16523 {
16524 gcc_assert (reload_completed);
16525 for (i = 0; i < size; i++)
16526 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
16527 }
16528 else if (offsettable_memref_p (operand))
16529 {
16530 operand = adjust_address (operand, SImode, 0);
16531 parts[0] = operand;
16532 for (i = 1; i < size; i++)
16533 parts[i] = adjust_address (operand, SImode, 4 * i);
16534 }
16535 else if (GET_CODE (operand) == CONST_DOUBLE)
16536 {
16537 REAL_VALUE_TYPE r;
16538 long l[4];
16539
16540 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16541 switch (mode)
16542 {
16543 case TFmode:
16544 real_to_target (l, &r, mode);
16545 parts[3] = gen_int_mode (l[3], SImode);
16546 parts[2] = gen_int_mode (l[2], SImode);
16547 break;
16548 case XFmode:
16549 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
16550 parts[2] = gen_int_mode (l[2], SImode);
16551 break;
16552 case DFmode:
16553 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
16554 break;
16555 default:
16556 gcc_unreachable ();
16557 }
16558 parts[1] = gen_int_mode (l[1], SImode);
16559 parts[0] = gen_int_mode (l[0], SImode);
16560 }
16561 else
16562 gcc_unreachable ();
16563 }
16564 }
16565 else
16566 {
16567 if (mode == TImode)
16568 split_ti (&operand, 1, &parts[0], &parts[1]);
16569 if (mode == XFmode || mode == TFmode)
16570 {
16571 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
16572 if (REG_P (operand))
16573 {
16574 gcc_assert (reload_completed);
16575 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
16576 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
16577 }
16578 else if (offsettable_memref_p (operand))
16579 {
16580 operand = adjust_address (operand, DImode, 0);
16581 parts[0] = operand;
16582 parts[1] = adjust_address (operand, upper_mode, 8);
16583 }
16584 else if (GET_CODE (operand) == CONST_DOUBLE)
16585 {
16586 REAL_VALUE_TYPE r;
16587 long l[4];
16588
16589 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16590 real_to_target (l, &r, mode);
16591
16592 /* Do not use shift by 32 to avoid warning on 32bit systems. */
16593 if (HOST_BITS_PER_WIDE_INT >= 64)
16594 parts[0]
16595 = gen_int_mode
16596 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
16597 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
16598 DImode);
16599 else
16600 parts[0] = immed_double_const (l[0], l[1], DImode);
16601
16602 if (upper_mode == SImode)
16603 parts[1] = gen_int_mode (l[2], SImode);
16604 else if (HOST_BITS_PER_WIDE_INT >= 64)
16605 parts[1]
16606 = gen_int_mode
16607 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
16608 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
16609 DImode);
16610 else
16611 parts[1] = immed_double_const (l[2], l[3], DImode);
16612 }
16613 else
16614 gcc_unreachable ();
16615 }
16616 }
16617
16618 return size;
16619 }
16620
16621 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
16622 Return false when normal moves are needed; true when all required
16623 insns have been emitted. Operands 2-4 contain the input values
16624 int the correct order; operands 5-7 contain the output values. */
16625
16626 void
16627 ix86_split_long_move (rtx operands[])
16628 {
16629 rtx part[2][4];
16630 int nparts, i, j;
16631 int push = 0;
16632 int collisions = 0;
16633 enum machine_mode mode = GET_MODE (operands[0]);
16634 bool collisionparts[4];
16635
16636 /* The DFmode expanders may ask us to move double.
16637 For 64bit target this is single move. By hiding the fact
16638 here we simplify i386.md splitters. */
16639 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
16640 {
16641 /* Optimize constant pool reference to immediates. This is used by
16642 fp moves, that force all constants to memory to allow combining. */
16643
16644 if (MEM_P (operands[1])
16645 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
16646 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
16647 operands[1] = get_pool_constant (XEXP (operands[1], 0));
16648 if (push_operand (operands[0], VOIDmode))
16649 {
16650 operands[0] = copy_rtx (operands[0]);
16651 PUT_MODE (operands[0], Pmode);
16652 }
16653 else
16654 operands[0] = gen_lowpart (DImode, operands[0]);
16655 operands[1] = gen_lowpart (DImode, operands[1]);
16656 emit_move_insn (operands[0], operands[1]);
16657 return;
16658 }
16659
16660 /* The only non-offsettable memory we handle is push. */
16661 if (push_operand (operands[0], VOIDmode))
16662 push = 1;
16663 else
16664 gcc_assert (!MEM_P (operands[0])
16665 || offsettable_memref_p (operands[0]));
16666
16667 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
16668 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
16669
16670 /* When emitting push, take care for source operands on the stack. */
16671 if (push && MEM_P (operands[1])
16672 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
16673 for (i = 0; i < nparts - 1; i++)
16674 part[1][i] = change_address (part[1][i],
16675 GET_MODE (part[1][i]),
16676 XEXP (part[1][i + 1], 0));
16677
16678 /* We need to do copy in the right order in case an address register
16679 of the source overlaps the destination. */
16680 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
16681 {
16682 rtx tmp;
16683
16684 for (i = 0; i < nparts; i++)
16685 {
16686 collisionparts[i]
16687 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
16688 if (collisionparts[i])
16689 collisions++;
16690 }
16691
16692 /* Collision in the middle part can be handled by reordering. */
16693 if (collisions == 1 && nparts == 3 && collisionparts [1])
16694 {
16695 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16696 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16697 }
16698 else if (collisions == 1
16699 && nparts == 4
16700 && (collisionparts [1] || collisionparts [2]))
16701 {
16702 if (collisionparts [1])
16703 {
16704 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16705 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16706 }
16707 else
16708 {
16709 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
16710 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
16711 }
16712 }
16713
16714 /* If there are more collisions, we can't handle it by reordering.
16715 Do an lea to the last part and use only one colliding move. */
16716 else if (collisions > 1)
16717 {
16718 rtx base;
16719
16720 collisions = 1;
16721
16722 base = part[0][nparts - 1];
16723
16724 /* Handle the case when the last part isn't valid for lea.
16725 Happens in 64-bit mode storing the 12-byte XFmode. */
16726 if (GET_MODE (base) != Pmode)
16727 base = gen_rtx_REG (Pmode, REGNO (base));
16728
16729 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
16730 part[1][0] = replace_equiv_address (part[1][0], base);
16731 for (i = 1; i < nparts; i++)
16732 {
16733 tmp = plus_constant (base, UNITS_PER_WORD * i);
16734 part[1][i] = replace_equiv_address (part[1][i], tmp);
16735 }
16736 }
16737 }
16738
16739 if (push)
16740 {
16741 if (!TARGET_64BIT)
16742 {
16743 if (nparts == 3)
16744 {
16745 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
16746 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
16747 emit_move_insn (part[0][2], part[1][2]);
16748 }
16749 else if (nparts == 4)
16750 {
16751 emit_move_insn (part[0][3], part[1][3]);
16752 emit_move_insn (part[0][2], part[1][2]);
16753 }
16754 }
16755 else
16756 {
16757 /* In 64bit mode we don't have 32bit push available. In case this is
16758 register, it is OK - we will just use larger counterpart. We also
16759 retype memory - these comes from attempt to avoid REX prefix on
16760 moving of second half of TFmode value. */
16761 if (GET_MODE (part[1][1]) == SImode)
16762 {
16763 switch (GET_CODE (part[1][1]))
16764 {
16765 case MEM:
16766 part[1][1] = adjust_address (part[1][1], DImode, 0);
16767 break;
16768
16769 case REG:
16770 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
16771 break;
16772
16773 default:
16774 gcc_unreachable ();
16775 }
16776
16777 if (GET_MODE (part[1][0]) == SImode)
16778 part[1][0] = part[1][1];
16779 }
16780 }
16781 emit_move_insn (part[0][1], part[1][1]);
16782 emit_move_insn (part[0][0], part[1][0]);
16783 return;
16784 }
16785
16786 /* Choose correct order to not overwrite the source before it is copied. */
16787 if ((REG_P (part[0][0])
16788 && REG_P (part[1][1])
16789 && (REGNO (part[0][0]) == REGNO (part[1][1])
16790 || (nparts == 3
16791 && REGNO (part[0][0]) == REGNO (part[1][2]))
16792 || (nparts == 4
16793 && REGNO (part[0][0]) == REGNO (part[1][3]))))
16794 || (collisions > 0
16795 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
16796 {
16797 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
16798 {
16799 operands[2 + i] = part[0][j];
16800 operands[6 + i] = part[1][j];
16801 }
16802 }
16803 else
16804 {
16805 for (i = 0; i < nparts; i++)
16806 {
16807 operands[2 + i] = part[0][i];
16808 operands[6 + i] = part[1][i];
16809 }
16810 }
16811
16812 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
16813 if (optimize_insn_for_size_p ())
16814 {
16815 for (j = 0; j < nparts - 1; j++)
16816 if (CONST_INT_P (operands[6 + j])
16817 && operands[6 + j] != const0_rtx
16818 && REG_P (operands[2 + j]))
16819 for (i = j; i < nparts - 1; i++)
16820 if (CONST_INT_P (operands[7 + i])
16821 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
16822 operands[7 + i] = operands[2 + j];
16823 }
16824
16825 for (i = 0; i < nparts; i++)
16826 emit_move_insn (operands[2 + i], operands[6 + i]);
16827
16828 return;
16829 }
16830
16831 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
16832 left shift by a constant, either using a single shift or
16833 a sequence of add instructions. */
16834
16835 static void
16836 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
16837 {
16838 if (count == 1)
16839 {
16840 emit_insn ((mode == DImode
16841 ? gen_addsi3
16842 : gen_adddi3) (operand, operand, operand));
16843 }
16844 else if (!optimize_insn_for_size_p ()
16845 && count * ix86_cost->add <= ix86_cost->shift_const)
16846 {
16847 int i;
16848 for (i=0; i<count; i++)
16849 {
16850 emit_insn ((mode == DImode
16851 ? gen_addsi3
16852 : gen_adddi3) (operand, operand, operand));
16853 }
16854 }
16855 else
16856 emit_insn ((mode == DImode
16857 ? gen_ashlsi3
16858 : gen_ashldi3) (operand, operand, GEN_INT (count)));
16859 }
16860
16861 void
16862 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
16863 {
16864 rtx low[2], high[2];
16865 int count;
16866 const int single_width = mode == DImode ? 32 : 64;
16867
16868 if (CONST_INT_P (operands[2]))
16869 {
16870 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16871 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16872
16873 if (count >= single_width)
16874 {
16875 emit_move_insn (high[0], low[1]);
16876 emit_move_insn (low[0], const0_rtx);
16877
16878 if (count > single_width)
16879 ix86_expand_ashl_const (high[0], count - single_width, mode);
16880 }
16881 else
16882 {
16883 if (!rtx_equal_p (operands[0], operands[1]))
16884 emit_move_insn (operands[0], operands[1]);
16885 emit_insn ((mode == DImode
16886 ? gen_x86_shld
16887 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
16888 ix86_expand_ashl_const (low[0], count, mode);
16889 }
16890 return;
16891 }
16892
16893 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16894
16895 if (operands[1] == const1_rtx)
16896 {
16897 /* Assuming we've chosen a QImode capable registers, then 1 << N
16898 can be done with two 32/64-bit shifts, no branches, no cmoves. */
16899 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
16900 {
16901 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
16902
16903 ix86_expand_clear (low[0]);
16904 ix86_expand_clear (high[0]);
16905 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
16906
16907 d = gen_lowpart (QImode, low[0]);
16908 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16909 s = gen_rtx_EQ (QImode, flags, const0_rtx);
16910 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16911
16912 d = gen_lowpart (QImode, high[0]);
16913 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16914 s = gen_rtx_NE (QImode, flags, const0_rtx);
16915 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16916 }
16917
16918 /* Otherwise, we can get the same results by manually performing
16919 a bit extract operation on bit 5/6, and then performing the two
16920 shifts. The two methods of getting 0/1 into low/high are exactly
16921 the same size. Avoiding the shift in the bit extract case helps
16922 pentium4 a bit; no one else seems to care much either way. */
16923 else
16924 {
16925 rtx x;
16926
16927 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
16928 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
16929 else
16930 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
16931 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
16932
16933 emit_insn ((mode == DImode
16934 ? gen_lshrsi3
16935 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
16936 emit_insn ((mode == DImode
16937 ? gen_andsi3
16938 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
16939 emit_move_insn (low[0], high[0]);
16940 emit_insn ((mode == DImode
16941 ? gen_xorsi3
16942 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
16943 }
16944
16945 emit_insn ((mode == DImode
16946 ? gen_ashlsi3
16947 : gen_ashldi3) (low[0], low[0], operands[2]));
16948 emit_insn ((mode == DImode
16949 ? gen_ashlsi3
16950 : gen_ashldi3) (high[0], high[0], operands[2]));
16951 return;
16952 }
16953
16954 if (operands[1] == constm1_rtx)
16955 {
16956 /* For -1 << N, we can avoid the shld instruction, because we
16957 know that we're shifting 0...31/63 ones into a -1. */
16958 emit_move_insn (low[0], constm1_rtx);
16959 if (optimize_insn_for_size_p ())
16960 emit_move_insn (high[0], low[0]);
16961 else
16962 emit_move_insn (high[0], constm1_rtx);
16963 }
16964 else
16965 {
16966 if (!rtx_equal_p (operands[0], operands[1]))
16967 emit_move_insn (operands[0], operands[1]);
16968
16969 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16970 emit_insn ((mode == DImode
16971 ? gen_x86_shld
16972 : gen_x86_64_shld) (high[0], low[0], operands[2]));
16973 }
16974
16975 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
16976
16977 if (TARGET_CMOVE && scratch)
16978 {
16979 ix86_expand_clear (scratch);
16980 emit_insn ((mode == DImode
16981 ? gen_x86_shift_adj_1
16982 : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2],
16983 scratch));
16984 }
16985 else
16986 emit_insn ((mode == DImode
16987 ? gen_x86_shift_adj_2
16988 : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2]));
16989 }
16990
16991 void
16992 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
16993 {
16994 rtx low[2], high[2];
16995 int count;
16996 const int single_width = mode == DImode ? 32 : 64;
16997
16998 if (CONST_INT_P (operands[2]))
16999 {
17000 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17001 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17002
17003 if (count == single_width * 2 - 1)
17004 {
17005 emit_move_insn (high[0], high[1]);
17006 emit_insn ((mode == DImode
17007 ? gen_ashrsi3
17008 : gen_ashrdi3) (high[0], high[0],
17009 GEN_INT (single_width - 1)));
17010 emit_move_insn (low[0], high[0]);
17011
17012 }
17013 else if (count >= single_width)
17014 {
17015 emit_move_insn (low[0], high[1]);
17016 emit_move_insn (high[0], low[0]);
17017 emit_insn ((mode == DImode
17018 ? gen_ashrsi3
17019 : gen_ashrdi3) (high[0], high[0],
17020 GEN_INT (single_width - 1)));
17021 if (count > single_width)
17022 emit_insn ((mode == DImode
17023 ? gen_ashrsi3
17024 : gen_ashrdi3) (low[0], low[0],
17025 GEN_INT (count - single_width)));
17026 }
17027 else
17028 {
17029 if (!rtx_equal_p (operands[0], operands[1]))
17030 emit_move_insn (operands[0], operands[1]);
17031 emit_insn ((mode == DImode
17032 ? gen_x86_shrd
17033 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
17034 emit_insn ((mode == DImode
17035 ? gen_ashrsi3
17036 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
17037 }
17038 }
17039 else
17040 {
17041 if (!rtx_equal_p (operands[0], operands[1]))
17042 emit_move_insn (operands[0], operands[1]);
17043
17044 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17045
17046 emit_insn ((mode == DImode
17047 ? gen_x86_shrd
17048 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
17049 emit_insn ((mode == DImode
17050 ? gen_ashrsi3
17051 : gen_ashrdi3) (high[0], high[0], operands[2]));
17052
17053 if (TARGET_CMOVE && scratch)
17054 {
17055 emit_move_insn (scratch, high[0]);
17056 emit_insn ((mode == DImode
17057 ? gen_ashrsi3
17058 : gen_ashrdi3) (scratch, scratch,
17059 GEN_INT (single_width - 1)));
17060 emit_insn ((mode == DImode
17061 ? gen_x86_shift_adj_1
17062 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
17063 scratch));
17064 }
17065 else
17066 emit_insn ((mode == DImode
17067 ? gen_x86_shift_adj_3
17068 : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2]));
17069 }
17070 }
17071
17072 void
17073 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
17074 {
17075 rtx low[2], high[2];
17076 int count;
17077 const int single_width = mode == DImode ? 32 : 64;
17078
17079 if (CONST_INT_P (operands[2]))
17080 {
17081 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17082 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17083
17084 if (count >= single_width)
17085 {
17086 emit_move_insn (low[0], high[1]);
17087 ix86_expand_clear (high[0]);
17088
17089 if (count > single_width)
17090 emit_insn ((mode == DImode
17091 ? gen_lshrsi3
17092 : gen_lshrdi3) (low[0], low[0],
17093 GEN_INT (count - single_width)));
17094 }
17095 else
17096 {
17097 if (!rtx_equal_p (operands[0], operands[1]))
17098 emit_move_insn (operands[0], operands[1]);
17099 emit_insn ((mode == DImode
17100 ? gen_x86_shrd
17101 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
17102 emit_insn ((mode == DImode
17103 ? gen_lshrsi3
17104 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
17105 }
17106 }
17107 else
17108 {
17109 if (!rtx_equal_p (operands[0], operands[1]))
17110 emit_move_insn (operands[0], operands[1]);
17111
17112 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17113
17114 emit_insn ((mode == DImode
17115 ? gen_x86_shrd
17116 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
17117 emit_insn ((mode == DImode
17118 ? gen_lshrsi3
17119 : gen_lshrdi3) (high[0], high[0], operands[2]));
17120
17121 /* Heh. By reversing the arguments, we can reuse this pattern. */
17122 if (TARGET_CMOVE && scratch)
17123 {
17124 ix86_expand_clear (scratch);
17125 emit_insn ((mode == DImode
17126 ? gen_x86_shift_adj_1
17127 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
17128 scratch));
17129 }
17130 else
17131 emit_insn ((mode == DImode
17132 ? gen_x86_shift_adj_2
17133 : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2]));
17134 }
17135 }
17136
17137 /* Predict just emitted jump instruction to be taken with probability PROB. */
17138 static void
17139 predict_jump (int prob)
17140 {
17141 rtx insn = get_last_insn ();
17142 gcc_assert (JUMP_P (insn));
17143 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
17144 }
17145
17146 /* Helper function for the string operations below. Dest VARIABLE whether
17147 it is aligned to VALUE bytes. If true, jump to the label. */
17148 static rtx
17149 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
17150 {
17151 rtx label = gen_label_rtx ();
17152 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
17153 if (GET_MODE (variable) == DImode)
17154 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
17155 else
17156 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
17157 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
17158 1, label);
17159 if (epilogue)
17160 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17161 else
17162 predict_jump (REG_BR_PROB_BASE * 90 / 100);
17163 return label;
17164 }
17165
17166 /* Adjust COUNTER by the VALUE. */
17167 static void
17168 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
17169 {
17170 if (GET_MODE (countreg) == DImode)
17171 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
17172 else
17173 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
17174 }
17175
17176 /* Zero extend possibly SImode EXP to Pmode register. */
17177 rtx
17178 ix86_zero_extend_to_Pmode (rtx exp)
17179 {
17180 rtx r;
17181 if (GET_MODE (exp) == VOIDmode)
17182 return force_reg (Pmode, exp);
17183 if (GET_MODE (exp) == Pmode)
17184 return copy_to_mode_reg (Pmode, exp);
17185 r = gen_reg_rtx (Pmode);
17186 emit_insn (gen_zero_extendsidi2 (r, exp));
17187 return r;
17188 }
17189
17190 /* Divide COUNTREG by SCALE. */
17191 static rtx
17192 scale_counter (rtx countreg, int scale)
17193 {
17194 rtx sc;
17195 rtx piece_size_mask;
17196
17197 if (scale == 1)
17198 return countreg;
17199 if (CONST_INT_P (countreg))
17200 return GEN_INT (INTVAL (countreg) / scale);
17201 gcc_assert (REG_P (countreg));
17202
17203 piece_size_mask = GEN_INT (scale - 1);
17204 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
17205 GEN_INT (exact_log2 (scale)),
17206 NULL, 1, OPTAB_DIRECT);
17207 return sc;
17208 }
17209
17210 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
17211 DImode for constant loop counts. */
17212
17213 static enum machine_mode
17214 counter_mode (rtx count_exp)
17215 {
17216 if (GET_MODE (count_exp) != VOIDmode)
17217 return GET_MODE (count_exp);
17218 if (GET_CODE (count_exp) != CONST_INT)
17219 return Pmode;
17220 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
17221 return DImode;
17222 return SImode;
17223 }
17224
17225 /* When SRCPTR is non-NULL, output simple loop to move memory
17226 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
17227 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
17228 equivalent loop to set memory by VALUE (supposed to be in MODE).
17229
17230 The size is rounded down to whole number of chunk size moved at once.
17231 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
17232
17233
17234 static void
17235 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
17236 rtx destptr, rtx srcptr, rtx value,
17237 rtx count, enum machine_mode mode, int unroll,
17238 int expected_size)
17239 {
17240 rtx out_label, top_label, iter, tmp;
17241 enum machine_mode iter_mode = counter_mode (count);
17242 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
17243 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
17244 rtx size;
17245 rtx x_addr;
17246 rtx y_addr;
17247 int i;
17248
17249 top_label = gen_label_rtx ();
17250 out_label = gen_label_rtx ();
17251 iter = gen_reg_rtx (iter_mode);
17252
17253 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
17254 NULL, 1, OPTAB_DIRECT);
17255 /* Those two should combine. */
17256 if (piece_size == const1_rtx)
17257 {
17258 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
17259 true, out_label);
17260 predict_jump (REG_BR_PROB_BASE * 10 / 100);
17261 }
17262 emit_move_insn (iter, const0_rtx);
17263
17264 emit_label (top_label);
17265
17266 tmp = convert_modes (Pmode, iter_mode, iter, true);
17267 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
17268 destmem = change_address (destmem, mode, x_addr);
17269
17270 if (srcmem)
17271 {
17272 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
17273 srcmem = change_address (srcmem, mode, y_addr);
17274
17275 /* When unrolling for chips that reorder memory reads and writes,
17276 we can save registers by using single temporary.
17277 Also using 4 temporaries is overkill in 32bit mode. */
17278 if (!TARGET_64BIT && 0)
17279 {
17280 for (i = 0; i < unroll; i++)
17281 {
17282 if (i)
17283 {
17284 destmem =
17285 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17286 srcmem =
17287 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17288 }
17289 emit_move_insn (destmem, srcmem);
17290 }
17291 }
17292 else
17293 {
17294 rtx tmpreg[4];
17295 gcc_assert (unroll <= 4);
17296 for (i = 0; i < unroll; i++)
17297 {
17298 tmpreg[i] = gen_reg_rtx (mode);
17299 if (i)
17300 {
17301 srcmem =
17302 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17303 }
17304 emit_move_insn (tmpreg[i], srcmem);
17305 }
17306 for (i = 0; i < unroll; i++)
17307 {
17308 if (i)
17309 {
17310 destmem =
17311 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17312 }
17313 emit_move_insn (destmem, tmpreg[i]);
17314 }
17315 }
17316 }
17317 else
17318 for (i = 0; i < unroll; i++)
17319 {
17320 if (i)
17321 destmem =
17322 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17323 emit_move_insn (destmem, value);
17324 }
17325
17326 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
17327 true, OPTAB_LIB_WIDEN);
17328 if (tmp != iter)
17329 emit_move_insn (iter, tmp);
17330
17331 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
17332 true, top_label);
17333 if (expected_size != -1)
17334 {
17335 expected_size /= GET_MODE_SIZE (mode) * unroll;
17336 if (expected_size == 0)
17337 predict_jump (0);
17338 else if (expected_size > REG_BR_PROB_BASE)
17339 predict_jump (REG_BR_PROB_BASE - 1);
17340 else
17341 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
17342 }
17343 else
17344 predict_jump (REG_BR_PROB_BASE * 80 / 100);
17345 iter = ix86_zero_extend_to_Pmode (iter);
17346 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
17347 true, OPTAB_LIB_WIDEN);
17348 if (tmp != destptr)
17349 emit_move_insn (destptr, tmp);
17350 if (srcptr)
17351 {
17352 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
17353 true, OPTAB_LIB_WIDEN);
17354 if (tmp != srcptr)
17355 emit_move_insn (srcptr, tmp);
17356 }
17357 emit_label (out_label);
17358 }
17359
17360 /* Output "rep; mov" instruction.
17361 Arguments have same meaning as for previous function */
17362 static void
17363 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
17364 rtx destptr, rtx srcptr,
17365 rtx count,
17366 enum machine_mode mode)
17367 {
17368 rtx destexp;
17369 rtx srcexp;
17370 rtx countreg;
17371
17372 /* If the size is known, it is shorter to use rep movs. */
17373 if (mode == QImode && CONST_INT_P (count)
17374 && !(INTVAL (count) & 3))
17375 mode = SImode;
17376
17377 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17378 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17379 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
17380 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
17381 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17382 if (mode != QImode)
17383 {
17384 destexp = gen_rtx_ASHIFT (Pmode, countreg,
17385 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17386 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17387 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
17388 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17389 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
17390 }
17391 else
17392 {
17393 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17394 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
17395 }
17396 if (CONST_INT_P (count))
17397 {
17398 count = GEN_INT (INTVAL (count)
17399 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17400 destmem = shallow_copy_rtx (destmem);
17401 srcmem = shallow_copy_rtx (srcmem);
17402 set_mem_size (destmem, count);
17403 set_mem_size (srcmem, count);
17404 }
17405 else
17406 {
17407 if (MEM_SIZE (destmem))
17408 set_mem_size (destmem, NULL_RTX);
17409 if (MEM_SIZE (srcmem))
17410 set_mem_size (srcmem, NULL_RTX);
17411 }
17412 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
17413 destexp, srcexp));
17414 }
17415
17416 /* Output "rep; stos" instruction.
17417 Arguments have same meaning as for previous function */
17418 static void
17419 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
17420 rtx count, enum machine_mode mode,
17421 rtx orig_value)
17422 {
17423 rtx destexp;
17424 rtx countreg;
17425
17426 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17427 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17428 value = force_reg (mode, gen_lowpart (mode, value));
17429 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17430 if (mode != QImode)
17431 {
17432 destexp = gen_rtx_ASHIFT (Pmode, countreg,
17433 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17434 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17435 }
17436 else
17437 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17438 if (orig_value == const0_rtx && CONST_INT_P (count))
17439 {
17440 count = GEN_INT (INTVAL (count)
17441 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17442 destmem = shallow_copy_rtx (destmem);
17443 set_mem_size (destmem, count);
17444 }
17445 else if (MEM_SIZE (destmem))
17446 set_mem_size (destmem, NULL_RTX);
17447 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
17448 }
17449
17450 static void
17451 emit_strmov (rtx destmem, rtx srcmem,
17452 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
17453 {
17454 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
17455 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
17456 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17457 }
17458
17459 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
17460 static void
17461 expand_movmem_epilogue (rtx destmem, rtx srcmem,
17462 rtx destptr, rtx srcptr, rtx count, int max_size)
17463 {
17464 rtx src, dest;
17465 if (CONST_INT_P (count))
17466 {
17467 HOST_WIDE_INT countval = INTVAL (count);
17468 int offset = 0;
17469
17470 if ((countval & 0x10) && max_size > 16)
17471 {
17472 if (TARGET_64BIT)
17473 {
17474 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17475 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
17476 }
17477 else
17478 gcc_unreachable ();
17479 offset += 16;
17480 }
17481 if ((countval & 0x08) && max_size > 8)
17482 {
17483 if (TARGET_64BIT)
17484 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17485 else
17486 {
17487 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17488 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
17489 }
17490 offset += 8;
17491 }
17492 if ((countval & 0x04) && max_size > 4)
17493 {
17494 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17495 offset += 4;
17496 }
17497 if ((countval & 0x02) && max_size > 2)
17498 {
17499 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
17500 offset += 2;
17501 }
17502 if ((countval & 0x01) && max_size > 1)
17503 {
17504 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
17505 offset += 1;
17506 }
17507 return;
17508 }
17509 if (max_size > 8)
17510 {
17511 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
17512 count, 1, OPTAB_DIRECT);
17513 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
17514 count, QImode, 1, 4);
17515 return;
17516 }
17517
17518 /* When there are stringops, we can cheaply increase dest and src pointers.
17519 Otherwise we save code size by maintaining offset (zero is readily
17520 available from preceding rep operation) and using x86 addressing modes.
17521 */
17522 if (TARGET_SINGLE_STRINGOP)
17523 {
17524 if (max_size > 4)
17525 {
17526 rtx label = ix86_expand_aligntest (count, 4, true);
17527 src = change_address (srcmem, SImode, srcptr);
17528 dest = change_address (destmem, SImode, destptr);
17529 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17530 emit_label (label);
17531 LABEL_NUSES (label) = 1;
17532 }
17533 if (max_size > 2)
17534 {
17535 rtx label = ix86_expand_aligntest (count, 2, true);
17536 src = change_address (srcmem, HImode, srcptr);
17537 dest = change_address (destmem, HImode, destptr);
17538 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17539 emit_label (label);
17540 LABEL_NUSES (label) = 1;
17541 }
17542 if (max_size > 1)
17543 {
17544 rtx label = ix86_expand_aligntest (count, 1, true);
17545 src = change_address (srcmem, QImode, srcptr);
17546 dest = change_address (destmem, QImode, destptr);
17547 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17548 emit_label (label);
17549 LABEL_NUSES (label) = 1;
17550 }
17551 }
17552 else
17553 {
17554 rtx offset = force_reg (Pmode, const0_rtx);
17555 rtx tmp;
17556
17557 if (max_size > 4)
17558 {
17559 rtx label = ix86_expand_aligntest (count, 4, true);
17560 src = change_address (srcmem, SImode, srcptr);
17561 dest = change_address (destmem, SImode, destptr);
17562 emit_move_insn (dest, src);
17563 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
17564 true, OPTAB_LIB_WIDEN);
17565 if (tmp != offset)
17566 emit_move_insn (offset, tmp);
17567 emit_label (label);
17568 LABEL_NUSES (label) = 1;
17569 }
17570 if (max_size > 2)
17571 {
17572 rtx label = ix86_expand_aligntest (count, 2, true);
17573 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17574 src = change_address (srcmem, HImode, tmp);
17575 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17576 dest = change_address (destmem, HImode, tmp);
17577 emit_move_insn (dest, src);
17578 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
17579 true, OPTAB_LIB_WIDEN);
17580 if (tmp != offset)
17581 emit_move_insn (offset, tmp);
17582 emit_label (label);
17583 LABEL_NUSES (label) = 1;
17584 }
17585 if (max_size > 1)
17586 {
17587 rtx label = ix86_expand_aligntest (count, 1, true);
17588 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17589 src = change_address (srcmem, QImode, tmp);
17590 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17591 dest = change_address (destmem, QImode, tmp);
17592 emit_move_insn (dest, src);
17593 emit_label (label);
17594 LABEL_NUSES (label) = 1;
17595 }
17596 }
17597 }
17598
17599 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17600 static void
17601 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
17602 rtx count, int max_size)
17603 {
17604 count =
17605 expand_simple_binop (counter_mode (count), AND, count,
17606 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
17607 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
17608 gen_lowpart (QImode, value), count, QImode,
17609 1, max_size / 2);
17610 }
17611
17612 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17613 static void
17614 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
17615 {
17616 rtx dest;
17617
17618 if (CONST_INT_P (count))
17619 {
17620 HOST_WIDE_INT countval = INTVAL (count);
17621 int offset = 0;
17622
17623 if ((countval & 0x10) && max_size > 16)
17624 {
17625 if (TARGET_64BIT)
17626 {
17627 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17628 emit_insn (gen_strset (destptr, dest, value));
17629 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
17630 emit_insn (gen_strset (destptr, dest, value));
17631 }
17632 else
17633 gcc_unreachable ();
17634 offset += 16;
17635 }
17636 if ((countval & 0x08) && max_size > 8)
17637 {
17638 if (TARGET_64BIT)
17639 {
17640 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17641 emit_insn (gen_strset (destptr, dest, value));
17642 }
17643 else
17644 {
17645 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17646 emit_insn (gen_strset (destptr, dest, value));
17647 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
17648 emit_insn (gen_strset (destptr, dest, value));
17649 }
17650 offset += 8;
17651 }
17652 if ((countval & 0x04) && max_size > 4)
17653 {
17654 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17655 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17656 offset += 4;
17657 }
17658 if ((countval & 0x02) && max_size > 2)
17659 {
17660 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
17661 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17662 offset += 2;
17663 }
17664 if ((countval & 0x01) && max_size > 1)
17665 {
17666 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
17667 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17668 offset += 1;
17669 }
17670 return;
17671 }
17672 if (max_size > 32)
17673 {
17674 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
17675 return;
17676 }
17677 if (max_size > 16)
17678 {
17679 rtx label = ix86_expand_aligntest (count, 16, true);
17680 if (TARGET_64BIT)
17681 {
17682 dest = change_address (destmem, DImode, destptr);
17683 emit_insn (gen_strset (destptr, dest, value));
17684 emit_insn (gen_strset (destptr, dest, value));
17685 }
17686 else
17687 {
17688 dest = change_address (destmem, SImode, destptr);
17689 emit_insn (gen_strset (destptr, dest, value));
17690 emit_insn (gen_strset (destptr, dest, value));
17691 emit_insn (gen_strset (destptr, dest, value));
17692 emit_insn (gen_strset (destptr, dest, value));
17693 }
17694 emit_label (label);
17695 LABEL_NUSES (label) = 1;
17696 }
17697 if (max_size > 8)
17698 {
17699 rtx label = ix86_expand_aligntest (count, 8, true);
17700 if (TARGET_64BIT)
17701 {
17702 dest = change_address (destmem, DImode, destptr);
17703 emit_insn (gen_strset (destptr, dest, value));
17704 }
17705 else
17706 {
17707 dest = change_address (destmem, SImode, destptr);
17708 emit_insn (gen_strset (destptr, dest, value));
17709 emit_insn (gen_strset (destptr, dest, value));
17710 }
17711 emit_label (label);
17712 LABEL_NUSES (label) = 1;
17713 }
17714 if (max_size > 4)
17715 {
17716 rtx label = ix86_expand_aligntest (count, 4, true);
17717 dest = change_address (destmem, SImode, destptr);
17718 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17719 emit_label (label);
17720 LABEL_NUSES (label) = 1;
17721 }
17722 if (max_size > 2)
17723 {
17724 rtx label = ix86_expand_aligntest (count, 2, true);
17725 dest = change_address (destmem, HImode, destptr);
17726 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17727 emit_label (label);
17728 LABEL_NUSES (label) = 1;
17729 }
17730 if (max_size > 1)
17731 {
17732 rtx label = ix86_expand_aligntest (count, 1, true);
17733 dest = change_address (destmem, QImode, destptr);
17734 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17735 emit_label (label);
17736 LABEL_NUSES (label) = 1;
17737 }
17738 }
17739
17740 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
17741 DESIRED_ALIGNMENT. */
17742 static void
17743 expand_movmem_prologue (rtx destmem, rtx srcmem,
17744 rtx destptr, rtx srcptr, rtx count,
17745 int align, int desired_alignment)
17746 {
17747 if (align <= 1 && desired_alignment > 1)
17748 {
17749 rtx label = ix86_expand_aligntest (destptr, 1, false);
17750 srcmem = change_address (srcmem, QImode, srcptr);
17751 destmem = change_address (destmem, QImode, destptr);
17752 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17753 ix86_adjust_counter (count, 1);
17754 emit_label (label);
17755 LABEL_NUSES (label) = 1;
17756 }
17757 if (align <= 2 && desired_alignment > 2)
17758 {
17759 rtx label = ix86_expand_aligntest (destptr, 2, false);
17760 srcmem = change_address (srcmem, HImode, srcptr);
17761 destmem = change_address (destmem, HImode, destptr);
17762 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17763 ix86_adjust_counter (count, 2);
17764 emit_label (label);
17765 LABEL_NUSES (label) = 1;
17766 }
17767 if (align <= 4 && desired_alignment > 4)
17768 {
17769 rtx label = ix86_expand_aligntest (destptr, 4, false);
17770 srcmem = change_address (srcmem, SImode, srcptr);
17771 destmem = change_address (destmem, SImode, destptr);
17772 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17773 ix86_adjust_counter (count, 4);
17774 emit_label (label);
17775 LABEL_NUSES (label) = 1;
17776 }
17777 gcc_assert (desired_alignment <= 8);
17778 }
17779
17780 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
17781 ALIGN_BYTES is how many bytes need to be copied. */
17782 static rtx
17783 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
17784 int desired_align, int align_bytes)
17785 {
17786 rtx src = *srcp;
17787 rtx src_size, dst_size;
17788 int off = 0;
17789 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
17790 if (src_align_bytes >= 0)
17791 src_align_bytes = desired_align - src_align_bytes;
17792 src_size = MEM_SIZE (src);
17793 dst_size = MEM_SIZE (dst);
17794 if (align_bytes & 1)
17795 {
17796 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17797 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
17798 off = 1;
17799 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17800 }
17801 if (align_bytes & 2)
17802 {
17803 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17804 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
17805 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17806 set_mem_align (dst, 2 * BITS_PER_UNIT);
17807 if (src_align_bytes >= 0
17808 && (src_align_bytes & 1) == (align_bytes & 1)
17809 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
17810 set_mem_align (src, 2 * BITS_PER_UNIT);
17811 off = 2;
17812 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17813 }
17814 if (align_bytes & 4)
17815 {
17816 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17817 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
17818 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17819 set_mem_align (dst, 4 * BITS_PER_UNIT);
17820 if (src_align_bytes >= 0)
17821 {
17822 unsigned int src_align = 0;
17823 if ((src_align_bytes & 3) == (align_bytes & 3))
17824 src_align = 4;
17825 else if ((src_align_bytes & 1) == (align_bytes & 1))
17826 src_align = 2;
17827 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17828 set_mem_align (src, src_align * BITS_PER_UNIT);
17829 }
17830 off = 4;
17831 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17832 }
17833 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17834 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
17835 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17836 set_mem_align (dst, desired_align * BITS_PER_UNIT);
17837 if (src_align_bytes >= 0)
17838 {
17839 unsigned int src_align = 0;
17840 if ((src_align_bytes & 7) == (align_bytes & 7))
17841 src_align = 8;
17842 else if ((src_align_bytes & 3) == (align_bytes & 3))
17843 src_align = 4;
17844 else if ((src_align_bytes & 1) == (align_bytes & 1))
17845 src_align = 2;
17846 if (src_align > (unsigned int) desired_align)
17847 src_align = desired_align;
17848 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17849 set_mem_align (src, src_align * BITS_PER_UNIT);
17850 }
17851 if (dst_size)
17852 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17853 if (src_size)
17854 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
17855 *srcp = src;
17856 return dst;
17857 }
17858
17859 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
17860 DESIRED_ALIGNMENT. */
17861 static void
17862 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
17863 int align, int desired_alignment)
17864 {
17865 if (align <= 1 && desired_alignment > 1)
17866 {
17867 rtx label = ix86_expand_aligntest (destptr, 1, false);
17868 destmem = change_address (destmem, QImode, destptr);
17869 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
17870 ix86_adjust_counter (count, 1);
17871 emit_label (label);
17872 LABEL_NUSES (label) = 1;
17873 }
17874 if (align <= 2 && desired_alignment > 2)
17875 {
17876 rtx label = ix86_expand_aligntest (destptr, 2, false);
17877 destmem = change_address (destmem, HImode, destptr);
17878 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
17879 ix86_adjust_counter (count, 2);
17880 emit_label (label);
17881 LABEL_NUSES (label) = 1;
17882 }
17883 if (align <= 4 && desired_alignment > 4)
17884 {
17885 rtx label = ix86_expand_aligntest (destptr, 4, false);
17886 destmem = change_address (destmem, SImode, destptr);
17887 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
17888 ix86_adjust_counter (count, 4);
17889 emit_label (label);
17890 LABEL_NUSES (label) = 1;
17891 }
17892 gcc_assert (desired_alignment <= 8);
17893 }
17894
17895 /* Set enough from DST to align DST known to by aligned by ALIGN to
17896 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
17897 static rtx
17898 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
17899 int desired_align, int align_bytes)
17900 {
17901 int off = 0;
17902 rtx dst_size = MEM_SIZE (dst);
17903 if (align_bytes & 1)
17904 {
17905 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17906 off = 1;
17907 emit_insn (gen_strset (destreg, dst,
17908 gen_lowpart (QImode, value)));
17909 }
17910 if (align_bytes & 2)
17911 {
17912 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17913 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17914 set_mem_align (dst, 2 * BITS_PER_UNIT);
17915 off = 2;
17916 emit_insn (gen_strset (destreg, dst,
17917 gen_lowpart (HImode, value)));
17918 }
17919 if (align_bytes & 4)
17920 {
17921 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17922 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17923 set_mem_align (dst, 4 * BITS_PER_UNIT);
17924 off = 4;
17925 emit_insn (gen_strset (destreg, dst,
17926 gen_lowpart (SImode, value)));
17927 }
17928 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17929 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17930 set_mem_align (dst, desired_align * BITS_PER_UNIT);
17931 if (dst_size)
17932 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17933 return dst;
17934 }
17935
17936 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
17937 static enum stringop_alg
17938 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
17939 int *dynamic_check)
17940 {
17941 const struct stringop_algs * algs;
17942 bool optimize_for_speed;
17943 /* Algorithms using the rep prefix want at least edi and ecx;
17944 additionally, memset wants eax and memcpy wants esi. Don't
17945 consider such algorithms if the user has appropriated those
17946 registers for their own purposes. */
17947 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
17948 || (memset
17949 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
17950
17951 #define ALG_USABLE_P(alg) (rep_prefix_usable \
17952 || (alg != rep_prefix_1_byte \
17953 && alg != rep_prefix_4_byte \
17954 && alg != rep_prefix_8_byte))
17955 const struct processor_costs *cost;
17956
17957 /* Even if the string operation call is cold, we still might spend a lot
17958 of time processing large blocks. */
17959 if (optimize_function_for_size_p (cfun)
17960 || (optimize_insn_for_size_p ()
17961 && expected_size != -1 && expected_size < 256))
17962 optimize_for_speed = false;
17963 else
17964 optimize_for_speed = true;
17965
17966 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
17967
17968 *dynamic_check = -1;
17969 if (memset)
17970 algs = &cost->memset[TARGET_64BIT != 0];
17971 else
17972 algs = &cost->memcpy[TARGET_64BIT != 0];
17973 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
17974 return stringop_alg;
17975 /* rep; movq or rep; movl is the smallest variant. */
17976 else if (!optimize_for_speed)
17977 {
17978 if (!count || (count & 3))
17979 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
17980 else
17981 return rep_prefix_usable ? rep_prefix_4_byte : loop;
17982 }
17983 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
17984 */
17985 else if (expected_size != -1 && expected_size < 4)
17986 return loop_1_byte;
17987 else if (expected_size != -1)
17988 {
17989 unsigned int i;
17990 enum stringop_alg alg = libcall;
17991 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
17992 {
17993 /* We get here if the algorithms that were not libcall-based
17994 were rep-prefix based and we are unable to use rep prefixes
17995 based on global register usage. Break out of the loop and
17996 use the heuristic below. */
17997 if (algs->size[i].max == 0)
17998 break;
17999 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
18000 {
18001 enum stringop_alg candidate = algs->size[i].alg;
18002
18003 if (candidate != libcall && ALG_USABLE_P (candidate))
18004 alg = candidate;
18005 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
18006 last non-libcall inline algorithm. */
18007 if (TARGET_INLINE_ALL_STRINGOPS)
18008 {
18009 /* When the current size is best to be copied by a libcall,
18010 but we are still forced to inline, run the heuristic below
18011 that will pick code for medium sized blocks. */
18012 if (alg != libcall)
18013 return alg;
18014 break;
18015 }
18016 else if (ALG_USABLE_P (candidate))
18017 return candidate;
18018 }
18019 }
18020 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
18021 }
18022 /* When asked to inline the call anyway, try to pick meaningful choice.
18023 We look for maximal size of block that is faster to copy by hand and
18024 take blocks of at most of that size guessing that average size will
18025 be roughly half of the block.
18026
18027 If this turns out to be bad, we might simply specify the preferred
18028 choice in ix86_costs. */
18029 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18030 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
18031 {
18032 int max = -1;
18033 enum stringop_alg alg;
18034 int i;
18035 bool any_alg_usable_p = true;
18036
18037 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
18038 {
18039 enum stringop_alg candidate = algs->size[i].alg;
18040 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
18041
18042 if (candidate != libcall && candidate
18043 && ALG_USABLE_P (candidate))
18044 max = algs->size[i].max;
18045 }
18046 /* If there aren't any usable algorithms, then recursing on
18047 smaller sizes isn't going to find anything. Just return the
18048 simple byte-at-a-time copy loop. */
18049 if (!any_alg_usable_p)
18050 {
18051 /* Pick something reasonable. */
18052 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18053 *dynamic_check = 128;
18054 return loop_1_byte;
18055 }
18056 if (max == -1)
18057 max = 4096;
18058 alg = decide_alg (count, max / 2, memset, dynamic_check);
18059 gcc_assert (*dynamic_check == -1);
18060 gcc_assert (alg != libcall);
18061 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18062 *dynamic_check = max;
18063 return alg;
18064 }
18065 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
18066 #undef ALG_USABLE_P
18067 }
18068
18069 /* Decide on alignment. We know that the operand is already aligned to ALIGN
18070 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
18071 static int
18072 decide_alignment (int align,
18073 enum stringop_alg alg,
18074 int expected_size)
18075 {
18076 int desired_align = 0;
18077 switch (alg)
18078 {
18079 case no_stringop:
18080 gcc_unreachable ();
18081 case loop:
18082 case unrolled_loop:
18083 desired_align = GET_MODE_SIZE (Pmode);
18084 break;
18085 case rep_prefix_8_byte:
18086 desired_align = 8;
18087 break;
18088 case rep_prefix_4_byte:
18089 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
18090 copying whole cacheline at once. */
18091 if (TARGET_PENTIUMPRO)
18092 desired_align = 8;
18093 else
18094 desired_align = 4;
18095 break;
18096 case rep_prefix_1_byte:
18097 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
18098 copying whole cacheline at once. */
18099 if (TARGET_PENTIUMPRO)
18100 desired_align = 8;
18101 else
18102 desired_align = 1;
18103 break;
18104 case loop_1_byte:
18105 desired_align = 1;
18106 break;
18107 case libcall:
18108 return 0;
18109 }
18110
18111 if (optimize_size)
18112 desired_align = 1;
18113 if (desired_align < align)
18114 desired_align = align;
18115 if (expected_size != -1 && expected_size < 4)
18116 desired_align = align;
18117 return desired_align;
18118 }
18119
18120 /* Return the smallest power of 2 greater than VAL. */
18121 static int
18122 smallest_pow2_greater_than (int val)
18123 {
18124 int ret = 1;
18125 while (ret <= val)
18126 ret <<= 1;
18127 return ret;
18128 }
18129
18130 /* Expand string move (memcpy) operation. Use i386 string operations when
18131 profitable. expand_setmem contains similar code. The code depends upon
18132 architecture, block size and alignment, but always has the same
18133 overall structure:
18134
18135 1) Prologue guard: Conditional that jumps up to epilogues for small
18136 blocks that can be handled by epilogue alone. This is faster but
18137 also needed for correctness, since prologue assume the block is larger
18138 than the desired alignment.
18139
18140 Optional dynamic check for size and libcall for large
18141 blocks is emitted here too, with -minline-stringops-dynamically.
18142
18143 2) Prologue: copy first few bytes in order to get destination aligned
18144 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
18145 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
18146 We emit either a jump tree on power of two sized blocks, or a byte loop.
18147
18148 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
18149 with specified algorithm.
18150
18151 4) Epilogue: code copying tail of the block that is too small to be
18152 handled by main body (or up to size guarded by prologue guard). */
18153
18154 int
18155 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
18156 rtx expected_align_exp, rtx expected_size_exp)
18157 {
18158 rtx destreg;
18159 rtx srcreg;
18160 rtx label = NULL;
18161 rtx tmp;
18162 rtx jump_around_label = NULL;
18163 HOST_WIDE_INT align = 1;
18164 unsigned HOST_WIDE_INT count = 0;
18165 HOST_WIDE_INT expected_size = -1;
18166 int size_needed = 0, epilogue_size_needed;
18167 int desired_align = 0, align_bytes = 0;
18168 enum stringop_alg alg;
18169 int dynamic_check;
18170 bool need_zero_guard = false;
18171
18172 if (CONST_INT_P (align_exp))
18173 align = INTVAL (align_exp);
18174 /* i386 can do misaligned access on reasonably increased cost. */
18175 if (CONST_INT_P (expected_align_exp)
18176 && INTVAL (expected_align_exp) > align)
18177 align = INTVAL (expected_align_exp);
18178 /* ALIGN is the minimum of destination and source alignment, but we care here
18179 just about destination alignment. */
18180 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
18181 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
18182
18183 if (CONST_INT_P (count_exp))
18184 count = expected_size = INTVAL (count_exp);
18185 if (CONST_INT_P (expected_size_exp) && count == 0)
18186 expected_size = INTVAL (expected_size_exp);
18187
18188 /* Make sure we don't need to care about overflow later on. */
18189 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18190 return 0;
18191
18192 /* Step 0: Decide on preferred algorithm, desired alignment and
18193 size of chunks to be copied by main loop. */
18194
18195 alg = decide_alg (count, expected_size, false, &dynamic_check);
18196 desired_align = decide_alignment (align, alg, expected_size);
18197
18198 if (!TARGET_ALIGN_STRINGOPS)
18199 align = desired_align;
18200
18201 if (alg == libcall)
18202 return 0;
18203 gcc_assert (alg != no_stringop);
18204 if (!count)
18205 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
18206 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18207 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
18208 switch (alg)
18209 {
18210 case libcall:
18211 case no_stringop:
18212 gcc_unreachable ();
18213 case loop:
18214 need_zero_guard = true;
18215 size_needed = GET_MODE_SIZE (Pmode);
18216 break;
18217 case unrolled_loop:
18218 need_zero_guard = true;
18219 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
18220 break;
18221 case rep_prefix_8_byte:
18222 size_needed = 8;
18223 break;
18224 case rep_prefix_4_byte:
18225 size_needed = 4;
18226 break;
18227 case rep_prefix_1_byte:
18228 size_needed = 1;
18229 break;
18230 case loop_1_byte:
18231 need_zero_guard = true;
18232 size_needed = 1;
18233 break;
18234 }
18235
18236 epilogue_size_needed = size_needed;
18237
18238 /* Step 1: Prologue guard. */
18239
18240 /* Alignment code needs count to be in register. */
18241 if (CONST_INT_P (count_exp) && desired_align > align)
18242 {
18243 if (INTVAL (count_exp) > desired_align
18244 && INTVAL (count_exp) > size_needed)
18245 {
18246 align_bytes
18247 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18248 if (align_bytes <= 0)
18249 align_bytes = 0;
18250 else
18251 align_bytes = desired_align - align_bytes;
18252 }
18253 if (align_bytes == 0)
18254 count_exp = force_reg (counter_mode (count_exp), count_exp);
18255 }
18256 gcc_assert (desired_align >= 1 && align >= 1);
18257
18258 /* Ensure that alignment prologue won't copy past end of block. */
18259 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18260 {
18261 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18262 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
18263 Make sure it is power of 2. */
18264 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18265
18266 if (count)
18267 {
18268 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18269 {
18270 /* If main algorithm works on QImode, no epilogue is needed.
18271 For small sizes just don't align anything. */
18272 if (size_needed == 1)
18273 desired_align = align;
18274 else
18275 goto epilogue;
18276 }
18277 }
18278 else
18279 {
18280 label = gen_label_rtx ();
18281 emit_cmp_and_jump_insns (count_exp,
18282 GEN_INT (epilogue_size_needed),
18283 LTU, 0, counter_mode (count_exp), 1, label);
18284 if (expected_size == -1 || expected_size < epilogue_size_needed)
18285 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18286 else
18287 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18288 }
18289 }
18290
18291 /* Emit code to decide on runtime whether library call or inline should be
18292 used. */
18293 if (dynamic_check != -1)
18294 {
18295 if (CONST_INT_P (count_exp))
18296 {
18297 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
18298 {
18299 emit_block_move_via_libcall (dst, src, count_exp, false);
18300 count_exp = const0_rtx;
18301 goto epilogue;
18302 }
18303 }
18304 else
18305 {
18306 rtx hot_label = gen_label_rtx ();
18307 jump_around_label = gen_label_rtx ();
18308 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18309 LEU, 0, GET_MODE (count_exp), 1, hot_label);
18310 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18311 emit_block_move_via_libcall (dst, src, count_exp, false);
18312 emit_jump (jump_around_label);
18313 emit_label (hot_label);
18314 }
18315 }
18316
18317 /* Step 2: Alignment prologue. */
18318
18319 if (desired_align > align)
18320 {
18321 if (align_bytes == 0)
18322 {
18323 /* Except for the first move in epilogue, we no longer know
18324 constant offset in aliasing info. It don't seems to worth
18325 the pain to maintain it for the first move, so throw away
18326 the info early. */
18327 src = change_address (src, BLKmode, srcreg);
18328 dst = change_address (dst, BLKmode, destreg);
18329 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
18330 desired_align);
18331 }
18332 else
18333 {
18334 /* If we know how many bytes need to be stored before dst is
18335 sufficiently aligned, maintain aliasing info accurately. */
18336 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
18337 desired_align, align_bytes);
18338 count_exp = plus_constant (count_exp, -align_bytes);
18339 count -= align_bytes;
18340 }
18341 if (need_zero_guard
18342 && (count < (unsigned HOST_WIDE_INT) size_needed
18343 || (align_bytes == 0
18344 && count < ((unsigned HOST_WIDE_INT) size_needed
18345 + desired_align - align))))
18346 {
18347 /* It is possible that we copied enough so the main loop will not
18348 execute. */
18349 gcc_assert (size_needed > 1);
18350 if (label == NULL_RTX)
18351 label = gen_label_rtx ();
18352 emit_cmp_and_jump_insns (count_exp,
18353 GEN_INT (size_needed),
18354 LTU, 0, counter_mode (count_exp), 1, label);
18355 if (expected_size == -1
18356 || expected_size < (desired_align - align) / 2 + size_needed)
18357 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18358 else
18359 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18360 }
18361 }
18362 if (label && size_needed == 1)
18363 {
18364 emit_label (label);
18365 LABEL_NUSES (label) = 1;
18366 label = NULL;
18367 epilogue_size_needed = 1;
18368 }
18369 else if (label == NULL_RTX)
18370 epilogue_size_needed = size_needed;
18371
18372 /* Step 3: Main loop. */
18373
18374 switch (alg)
18375 {
18376 case libcall:
18377 case no_stringop:
18378 gcc_unreachable ();
18379 case loop_1_byte:
18380 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18381 count_exp, QImode, 1, expected_size);
18382 break;
18383 case loop:
18384 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18385 count_exp, Pmode, 1, expected_size);
18386 break;
18387 case unrolled_loop:
18388 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
18389 registers for 4 temporaries anyway. */
18390 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18391 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
18392 expected_size);
18393 break;
18394 case rep_prefix_8_byte:
18395 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18396 DImode);
18397 break;
18398 case rep_prefix_4_byte:
18399 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18400 SImode);
18401 break;
18402 case rep_prefix_1_byte:
18403 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18404 QImode);
18405 break;
18406 }
18407 /* Adjust properly the offset of src and dest memory for aliasing. */
18408 if (CONST_INT_P (count_exp))
18409 {
18410 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
18411 (count / size_needed) * size_needed);
18412 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18413 (count / size_needed) * size_needed);
18414 }
18415 else
18416 {
18417 src = change_address (src, BLKmode, srcreg);
18418 dst = change_address (dst, BLKmode, destreg);
18419 }
18420
18421 /* Step 4: Epilogue to copy the remaining bytes. */
18422 epilogue:
18423 if (label)
18424 {
18425 /* When the main loop is done, COUNT_EXP might hold original count,
18426 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18427 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18428 bytes. Compensate if needed. */
18429
18430 if (size_needed < epilogue_size_needed)
18431 {
18432 tmp =
18433 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18434 GEN_INT (size_needed - 1), count_exp, 1,
18435 OPTAB_DIRECT);
18436 if (tmp != count_exp)
18437 emit_move_insn (count_exp, tmp);
18438 }
18439 emit_label (label);
18440 LABEL_NUSES (label) = 1;
18441 }
18442
18443 if (count_exp != const0_rtx && epilogue_size_needed > 1)
18444 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
18445 epilogue_size_needed);
18446 if (jump_around_label)
18447 emit_label (jump_around_label);
18448 return 1;
18449 }
18450
18451 /* Helper function for memcpy. For QImode value 0xXY produce
18452 0xXYXYXYXY of wide specified by MODE. This is essentially
18453 a * 0x10101010, but we can do slightly better than
18454 synth_mult by unwinding the sequence by hand on CPUs with
18455 slow multiply. */
18456 static rtx
18457 promote_duplicated_reg (enum machine_mode mode, rtx val)
18458 {
18459 enum machine_mode valmode = GET_MODE (val);
18460 rtx tmp;
18461 int nops = mode == DImode ? 3 : 2;
18462
18463 gcc_assert (mode == SImode || mode == DImode);
18464 if (val == const0_rtx)
18465 return copy_to_mode_reg (mode, const0_rtx);
18466 if (CONST_INT_P (val))
18467 {
18468 HOST_WIDE_INT v = INTVAL (val) & 255;
18469
18470 v |= v << 8;
18471 v |= v << 16;
18472 if (mode == DImode)
18473 v |= (v << 16) << 16;
18474 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
18475 }
18476
18477 if (valmode == VOIDmode)
18478 valmode = QImode;
18479 if (valmode != QImode)
18480 val = gen_lowpart (QImode, val);
18481 if (mode == QImode)
18482 return val;
18483 if (!TARGET_PARTIAL_REG_STALL)
18484 nops--;
18485 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
18486 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
18487 <= (ix86_cost->shift_const + ix86_cost->add) * nops
18488 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
18489 {
18490 rtx reg = convert_modes (mode, QImode, val, true);
18491 tmp = promote_duplicated_reg (mode, const1_rtx);
18492 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
18493 OPTAB_DIRECT);
18494 }
18495 else
18496 {
18497 rtx reg = convert_modes (mode, QImode, val, true);
18498
18499 if (!TARGET_PARTIAL_REG_STALL)
18500 if (mode == SImode)
18501 emit_insn (gen_movsi_insv_1 (reg, reg));
18502 else
18503 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
18504 else
18505 {
18506 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
18507 NULL, 1, OPTAB_DIRECT);
18508 reg =
18509 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18510 }
18511 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
18512 NULL, 1, OPTAB_DIRECT);
18513 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18514 if (mode == SImode)
18515 return reg;
18516 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
18517 NULL, 1, OPTAB_DIRECT);
18518 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18519 return reg;
18520 }
18521 }
18522
18523 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
18524 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
18525 alignment from ALIGN to DESIRED_ALIGN. */
18526 static rtx
18527 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
18528 {
18529 rtx promoted_val;
18530
18531 if (TARGET_64BIT
18532 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
18533 promoted_val = promote_duplicated_reg (DImode, val);
18534 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
18535 promoted_val = promote_duplicated_reg (SImode, val);
18536 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
18537 promoted_val = promote_duplicated_reg (HImode, val);
18538 else
18539 promoted_val = val;
18540
18541 return promoted_val;
18542 }
18543
18544 /* Expand string clear operation (bzero). Use i386 string operations when
18545 profitable. See expand_movmem comment for explanation of individual
18546 steps performed. */
18547 int
18548 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
18549 rtx expected_align_exp, rtx expected_size_exp)
18550 {
18551 rtx destreg;
18552 rtx label = NULL;
18553 rtx tmp;
18554 rtx jump_around_label = NULL;
18555 HOST_WIDE_INT align = 1;
18556 unsigned HOST_WIDE_INT count = 0;
18557 HOST_WIDE_INT expected_size = -1;
18558 int size_needed = 0, epilogue_size_needed;
18559 int desired_align = 0, align_bytes = 0;
18560 enum stringop_alg alg;
18561 rtx promoted_val = NULL;
18562 bool force_loopy_epilogue = false;
18563 int dynamic_check;
18564 bool need_zero_guard = false;
18565
18566 if (CONST_INT_P (align_exp))
18567 align = INTVAL (align_exp);
18568 /* i386 can do misaligned access on reasonably increased cost. */
18569 if (CONST_INT_P (expected_align_exp)
18570 && INTVAL (expected_align_exp) > align)
18571 align = INTVAL (expected_align_exp);
18572 if (CONST_INT_P (count_exp))
18573 count = expected_size = INTVAL (count_exp);
18574 if (CONST_INT_P (expected_size_exp) && count == 0)
18575 expected_size = INTVAL (expected_size_exp);
18576
18577 /* Make sure we don't need to care about overflow later on. */
18578 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18579 return 0;
18580
18581 /* Step 0: Decide on preferred algorithm, desired alignment and
18582 size of chunks to be copied by main loop. */
18583
18584 alg = decide_alg (count, expected_size, true, &dynamic_check);
18585 desired_align = decide_alignment (align, alg, expected_size);
18586
18587 if (!TARGET_ALIGN_STRINGOPS)
18588 align = desired_align;
18589
18590 if (alg == libcall)
18591 return 0;
18592 gcc_assert (alg != no_stringop);
18593 if (!count)
18594 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
18595 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18596 switch (alg)
18597 {
18598 case libcall:
18599 case no_stringop:
18600 gcc_unreachable ();
18601 case loop:
18602 need_zero_guard = true;
18603 size_needed = GET_MODE_SIZE (Pmode);
18604 break;
18605 case unrolled_loop:
18606 need_zero_guard = true;
18607 size_needed = GET_MODE_SIZE (Pmode) * 4;
18608 break;
18609 case rep_prefix_8_byte:
18610 size_needed = 8;
18611 break;
18612 case rep_prefix_4_byte:
18613 size_needed = 4;
18614 break;
18615 case rep_prefix_1_byte:
18616 size_needed = 1;
18617 break;
18618 case loop_1_byte:
18619 need_zero_guard = true;
18620 size_needed = 1;
18621 break;
18622 }
18623 epilogue_size_needed = size_needed;
18624
18625 /* Step 1: Prologue guard. */
18626
18627 /* Alignment code needs count to be in register. */
18628 if (CONST_INT_P (count_exp) && desired_align > align)
18629 {
18630 if (INTVAL (count_exp) > desired_align
18631 && INTVAL (count_exp) > size_needed)
18632 {
18633 align_bytes
18634 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18635 if (align_bytes <= 0)
18636 align_bytes = 0;
18637 else
18638 align_bytes = desired_align - align_bytes;
18639 }
18640 if (align_bytes == 0)
18641 {
18642 enum machine_mode mode = SImode;
18643 if (TARGET_64BIT && (count & ~0xffffffff))
18644 mode = DImode;
18645 count_exp = force_reg (mode, count_exp);
18646 }
18647 }
18648 /* Do the cheap promotion to allow better CSE across the
18649 main loop and epilogue (ie one load of the big constant in the
18650 front of all code. */
18651 if (CONST_INT_P (val_exp))
18652 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18653 desired_align, align);
18654 /* Ensure that alignment prologue won't copy past end of block. */
18655 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18656 {
18657 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18658 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
18659 Make sure it is power of 2. */
18660 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18661
18662 /* To improve performance of small blocks, we jump around the VAL
18663 promoting mode. This mean that if the promoted VAL is not constant,
18664 we might not use it in the epilogue and have to use byte
18665 loop variant. */
18666 if (epilogue_size_needed > 2 && !promoted_val)
18667 force_loopy_epilogue = true;
18668 if (count)
18669 {
18670 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18671 {
18672 /* If main algorithm works on QImode, no epilogue is needed.
18673 For small sizes just don't align anything. */
18674 if (size_needed == 1)
18675 desired_align = align;
18676 else
18677 goto epilogue;
18678 }
18679 }
18680 else
18681 {
18682 label = gen_label_rtx ();
18683 emit_cmp_and_jump_insns (count_exp,
18684 GEN_INT (epilogue_size_needed),
18685 LTU, 0, counter_mode (count_exp), 1, label);
18686 if (expected_size == -1 || expected_size <= epilogue_size_needed)
18687 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18688 else
18689 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18690 }
18691 }
18692 if (dynamic_check != -1)
18693 {
18694 rtx hot_label = gen_label_rtx ();
18695 jump_around_label = gen_label_rtx ();
18696 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18697 LEU, 0, counter_mode (count_exp), 1, hot_label);
18698 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18699 set_storage_via_libcall (dst, count_exp, val_exp, false);
18700 emit_jump (jump_around_label);
18701 emit_label (hot_label);
18702 }
18703
18704 /* Step 2: Alignment prologue. */
18705
18706 /* Do the expensive promotion once we branched off the small blocks. */
18707 if (!promoted_val)
18708 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18709 desired_align, align);
18710 gcc_assert (desired_align >= 1 && align >= 1);
18711
18712 if (desired_align > align)
18713 {
18714 if (align_bytes == 0)
18715 {
18716 /* Except for the first move in epilogue, we no longer know
18717 constant offset in aliasing info. It don't seems to worth
18718 the pain to maintain it for the first move, so throw away
18719 the info early. */
18720 dst = change_address (dst, BLKmode, destreg);
18721 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
18722 desired_align);
18723 }
18724 else
18725 {
18726 /* If we know how many bytes need to be stored before dst is
18727 sufficiently aligned, maintain aliasing info accurately. */
18728 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
18729 desired_align, align_bytes);
18730 count_exp = plus_constant (count_exp, -align_bytes);
18731 count -= align_bytes;
18732 }
18733 if (need_zero_guard
18734 && (count < (unsigned HOST_WIDE_INT) size_needed
18735 || (align_bytes == 0
18736 && count < ((unsigned HOST_WIDE_INT) size_needed
18737 + desired_align - align))))
18738 {
18739 /* It is possible that we copied enough so the main loop will not
18740 execute. */
18741 gcc_assert (size_needed > 1);
18742 if (label == NULL_RTX)
18743 label = gen_label_rtx ();
18744 emit_cmp_and_jump_insns (count_exp,
18745 GEN_INT (size_needed),
18746 LTU, 0, counter_mode (count_exp), 1, label);
18747 if (expected_size == -1
18748 || expected_size < (desired_align - align) / 2 + size_needed)
18749 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18750 else
18751 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18752 }
18753 }
18754 if (label && size_needed == 1)
18755 {
18756 emit_label (label);
18757 LABEL_NUSES (label) = 1;
18758 label = NULL;
18759 promoted_val = val_exp;
18760 epilogue_size_needed = 1;
18761 }
18762 else if (label == NULL_RTX)
18763 epilogue_size_needed = size_needed;
18764
18765 /* Step 3: Main loop. */
18766
18767 switch (alg)
18768 {
18769 case libcall:
18770 case no_stringop:
18771 gcc_unreachable ();
18772 case loop_1_byte:
18773 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18774 count_exp, QImode, 1, expected_size);
18775 break;
18776 case loop:
18777 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18778 count_exp, Pmode, 1, expected_size);
18779 break;
18780 case unrolled_loop:
18781 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18782 count_exp, Pmode, 4, expected_size);
18783 break;
18784 case rep_prefix_8_byte:
18785 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18786 DImode, val_exp);
18787 break;
18788 case rep_prefix_4_byte:
18789 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18790 SImode, val_exp);
18791 break;
18792 case rep_prefix_1_byte:
18793 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18794 QImode, val_exp);
18795 break;
18796 }
18797 /* Adjust properly the offset of src and dest memory for aliasing. */
18798 if (CONST_INT_P (count_exp))
18799 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18800 (count / size_needed) * size_needed);
18801 else
18802 dst = change_address (dst, BLKmode, destreg);
18803
18804 /* Step 4: Epilogue to copy the remaining bytes. */
18805
18806 if (label)
18807 {
18808 /* When the main loop is done, COUNT_EXP might hold original count,
18809 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18810 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18811 bytes. Compensate if needed. */
18812
18813 if (size_needed < epilogue_size_needed)
18814 {
18815 tmp =
18816 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18817 GEN_INT (size_needed - 1), count_exp, 1,
18818 OPTAB_DIRECT);
18819 if (tmp != count_exp)
18820 emit_move_insn (count_exp, tmp);
18821 }
18822 emit_label (label);
18823 LABEL_NUSES (label) = 1;
18824 }
18825 epilogue:
18826 if (count_exp != const0_rtx && epilogue_size_needed > 1)
18827 {
18828 if (force_loopy_epilogue)
18829 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
18830 epilogue_size_needed);
18831 else
18832 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
18833 epilogue_size_needed);
18834 }
18835 if (jump_around_label)
18836 emit_label (jump_around_label);
18837 return 1;
18838 }
18839
18840 /* Expand the appropriate insns for doing strlen if not just doing
18841 repnz; scasb
18842
18843 out = result, initialized with the start address
18844 align_rtx = alignment of the address.
18845 scratch = scratch register, initialized with the startaddress when
18846 not aligned, otherwise undefined
18847
18848 This is just the body. It needs the initializations mentioned above and
18849 some address computing at the end. These things are done in i386.md. */
18850
18851 static void
18852 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
18853 {
18854 int align;
18855 rtx tmp;
18856 rtx align_2_label = NULL_RTX;
18857 rtx align_3_label = NULL_RTX;
18858 rtx align_4_label = gen_label_rtx ();
18859 rtx end_0_label = gen_label_rtx ();
18860 rtx mem;
18861 rtx tmpreg = gen_reg_rtx (SImode);
18862 rtx scratch = gen_reg_rtx (SImode);
18863 rtx cmp;
18864
18865 align = 0;
18866 if (CONST_INT_P (align_rtx))
18867 align = INTVAL (align_rtx);
18868
18869 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
18870
18871 /* Is there a known alignment and is it less than 4? */
18872 if (align < 4)
18873 {
18874 rtx scratch1 = gen_reg_rtx (Pmode);
18875 emit_move_insn (scratch1, out);
18876 /* Is there a known alignment and is it not 2? */
18877 if (align != 2)
18878 {
18879 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
18880 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
18881
18882 /* Leave just the 3 lower bits. */
18883 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
18884 NULL_RTX, 0, OPTAB_WIDEN);
18885
18886 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18887 Pmode, 1, align_4_label);
18888 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
18889 Pmode, 1, align_2_label);
18890 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
18891 Pmode, 1, align_3_label);
18892 }
18893 else
18894 {
18895 /* Since the alignment is 2, we have to check 2 or 0 bytes;
18896 check if is aligned to 4 - byte. */
18897
18898 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
18899 NULL_RTX, 0, OPTAB_WIDEN);
18900
18901 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18902 Pmode, 1, align_4_label);
18903 }
18904
18905 mem = change_address (src, QImode, out);
18906
18907 /* Now compare the bytes. */
18908
18909 /* Compare the first n unaligned byte on a byte per byte basis. */
18910 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
18911 QImode, 1, end_0_label);
18912
18913 /* Increment the address. */
18914 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18915
18916 /* Not needed with an alignment of 2 */
18917 if (align != 2)
18918 {
18919 emit_label (align_2_label);
18920
18921 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
18922 end_0_label);
18923
18924 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18925
18926 emit_label (align_3_label);
18927 }
18928
18929 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
18930 end_0_label);
18931
18932 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18933 }
18934
18935 /* Generate loop to check 4 bytes at a time. It is not a good idea to
18936 align this loop. It gives only huge programs, but does not help to
18937 speed up. */
18938 emit_label (align_4_label);
18939
18940 mem = change_address (src, SImode, out);
18941 emit_move_insn (scratch, mem);
18942 emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
18943
18944 /* This formula yields a nonzero result iff one of the bytes is zero.
18945 This saves three branches inside loop and many cycles. */
18946
18947 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
18948 emit_insn (gen_one_cmplsi2 (scratch, scratch));
18949 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
18950 emit_insn (gen_andsi3 (tmpreg, tmpreg,
18951 gen_int_mode (0x80808080, SImode)));
18952 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
18953 align_4_label);
18954
18955 if (TARGET_CMOVE)
18956 {
18957 rtx reg = gen_reg_rtx (SImode);
18958 rtx reg2 = gen_reg_rtx (Pmode);
18959 emit_move_insn (reg, tmpreg);
18960 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
18961
18962 /* If zero is not in the first two bytes, move two bytes forward. */
18963 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
18964 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18965 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
18966 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
18967 gen_rtx_IF_THEN_ELSE (SImode, tmp,
18968 reg,
18969 tmpreg)));
18970 /* Emit lea manually to avoid clobbering of flags. */
18971 emit_insn (gen_rtx_SET (SImode, reg2,
18972 gen_rtx_PLUS (Pmode, out, const2_rtx)));
18973
18974 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18975 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
18976 emit_insn (gen_rtx_SET (VOIDmode, out,
18977 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
18978 reg2,
18979 out)));
18980
18981 }
18982 else
18983 {
18984 rtx end_2_label = gen_label_rtx ();
18985 /* Is zero in the first two bytes? */
18986
18987 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
18988 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18989 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
18990 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
18991 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
18992 pc_rtx);
18993 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
18994 JUMP_LABEL (tmp) = end_2_label;
18995
18996 /* Not in the first two. Move two bytes forward. */
18997 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
18998 emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
18999
19000 emit_label (end_2_label);
19001
19002 }
19003
19004 /* Avoid branch in fixing the byte. */
19005 tmpreg = gen_lowpart (QImode, tmpreg);
19006 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
19007 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
19008 emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp));
19009
19010 emit_label (end_0_label);
19011 }
19012
19013 /* Expand strlen. */
19014
19015 int
19016 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
19017 {
19018 rtx addr, scratch1, scratch2, scratch3, scratch4;
19019
19020 /* The generic case of strlen expander is long. Avoid it's
19021 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
19022
19023 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
19024 && !TARGET_INLINE_ALL_STRINGOPS
19025 && !optimize_insn_for_size_p ()
19026 && (!CONST_INT_P (align) || INTVAL (align) < 4))
19027 return 0;
19028
19029 addr = force_reg (Pmode, XEXP (src, 0));
19030 scratch1 = gen_reg_rtx (Pmode);
19031
19032 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
19033 && !optimize_insn_for_size_p ())
19034 {
19035 /* Well it seems that some optimizer does not combine a call like
19036 foo(strlen(bar), strlen(bar));
19037 when the move and the subtraction is done here. It does calculate
19038 the length just once when these instructions are done inside of
19039 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
19040 often used and I use one fewer register for the lifetime of
19041 output_strlen_unroll() this is better. */
19042
19043 emit_move_insn (out, addr);
19044
19045 ix86_expand_strlensi_unroll_1 (out, src, align);
19046
19047 /* strlensi_unroll_1 returns the address of the zero at the end of
19048 the string, like memchr(), so compute the length by subtracting
19049 the start address. */
19050 emit_insn ((*ix86_gen_sub3) (out, out, addr));
19051 }
19052 else
19053 {
19054 rtx unspec;
19055
19056 /* Can't use this if the user has appropriated eax, ecx, or edi. */
19057 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
19058 return false;
19059
19060 scratch2 = gen_reg_rtx (Pmode);
19061 scratch3 = gen_reg_rtx (Pmode);
19062 scratch4 = force_reg (Pmode, constm1_rtx);
19063
19064 emit_move_insn (scratch3, addr);
19065 eoschar = force_reg (QImode, eoschar);
19066
19067 src = replace_equiv_address_nv (src, scratch3);
19068
19069 /* If .md starts supporting :P, this can be done in .md. */
19070 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
19071 scratch4), UNSPEC_SCAS);
19072 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
19073 emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
19074 emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
19075 }
19076 return 1;
19077 }
19078
19079 /* For given symbol (function) construct code to compute address of it's PLT
19080 entry in large x86-64 PIC model. */
19081 rtx
19082 construct_plt_address (rtx symbol)
19083 {
19084 rtx tmp = gen_reg_rtx (Pmode);
19085 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
19086
19087 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
19088 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
19089
19090 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
19091 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
19092 return tmp;
19093 }
19094
19095 void
19096 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
19097 rtx callarg2,
19098 rtx pop, int sibcall)
19099 {
19100 rtx use = NULL, call;
19101
19102 if (pop == const0_rtx)
19103 pop = NULL;
19104 gcc_assert (!TARGET_64BIT || !pop);
19105
19106 if (TARGET_MACHO && !TARGET_64BIT)
19107 {
19108 #if TARGET_MACHO
19109 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
19110 fnaddr = machopic_indirect_call_target (fnaddr);
19111 #endif
19112 }
19113 else
19114 {
19115 /* Static functions and indirect calls don't need the pic register. */
19116 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
19117 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
19118 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
19119 use_reg (&use, pic_offset_table_rtx);
19120 }
19121
19122 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
19123 {
19124 rtx al = gen_rtx_REG (QImode, AX_REG);
19125 emit_move_insn (al, callarg2);
19126 use_reg (&use, al);
19127 }
19128
19129 if (ix86_cmodel == CM_LARGE_PIC
19130 && GET_CODE (fnaddr) == MEM
19131 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
19132 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
19133 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
19134 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
19135 {
19136 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
19137 fnaddr = gen_rtx_MEM (QImode, fnaddr);
19138 }
19139 if (sibcall && TARGET_64BIT
19140 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
19141 {
19142 rtx addr;
19143 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
19144 fnaddr = gen_rtx_REG (Pmode, R11_REG);
19145 emit_move_insn (fnaddr, addr);
19146 fnaddr = gen_rtx_MEM (QImode, fnaddr);
19147 }
19148
19149 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
19150 if (retval)
19151 call = gen_rtx_SET (VOIDmode, retval, call);
19152 if (pop)
19153 {
19154 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
19155 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
19156 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
19157 }
19158 if (TARGET_64BIT
19159 && ix86_cfun_abi () == MS_ABI
19160 && (!callarg2 || INTVAL (callarg2) != -2))
19161 {
19162 /* We need to represent that SI and DI registers are clobbered
19163 by SYSV calls. */
19164 static int clobbered_registers[] = {
19165 XMM6_REG, XMM7_REG, XMM8_REG,
19166 XMM9_REG, XMM10_REG, XMM11_REG,
19167 XMM12_REG, XMM13_REG, XMM14_REG,
19168 XMM15_REG, SI_REG, DI_REG
19169 };
19170 unsigned int i;
19171 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
19172 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
19173 UNSPEC_MS_TO_SYSV_CALL);
19174
19175 vec[0] = call;
19176 vec[1] = unspec;
19177 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
19178 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
19179 ? TImode : DImode,
19180 gen_rtx_REG
19181 (SSE_REGNO_P (clobbered_registers[i])
19182 ? TImode : DImode,
19183 clobbered_registers[i]));
19184
19185 call = gen_rtx_PARALLEL (VOIDmode,
19186 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
19187 + 2, vec));
19188 }
19189
19190 call = emit_call_insn (call);
19191 if (use)
19192 CALL_INSN_FUNCTION_USAGE (call) = use;
19193 }
19194
19195 \f
19196 /* Clear stack slot assignments remembered from previous functions.
19197 This is called from INIT_EXPANDERS once before RTL is emitted for each
19198 function. */
19199
19200 static struct machine_function *
19201 ix86_init_machine_status (void)
19202 {
19203 struct machine_function *f;
19204
19205 f = GGC_CNEW (struct machine_function);
19206 f->use_fast_prologue_epilogue_nregs = -1;
19207 f->tls_descriptor_call_expanded_p = 0;
19208 f->call_abi = ix86_abi;
19209
19210 return f;
19211 }
19212
19213 /* Return a MEM corresponding to a stack slot with mode MODE.
19214 Allocate a new slot if necessary.
19215
19216 The RTL for a function can have several slots available: N is
19217 which slot to use. */
19218
19219 rtx
19220 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
19221 {
19222 struct stack_local_entry *s;
19223
19224 gcc_assert (n < MAX_386_STACK_LOCALS);
19225
19226 /* Virtual slot is valid only before vregs are instantiated. */
19227 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
19228
19229 for (s = ix86_stack_locals; s; s = s->next)
19230 if (s->mode == mode && s->n == n)
19231 return copy_rtx (s->rtl);
19232
19233 s = (struct stack_local_entry *)
19234 ggc_alloc (sizeof (struct stack_local_entry));
19235 s->n = n;
19236 s->mode = mode;
19237 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
19238
19239 s->next = ix86_stack_locals;
19240 ix86_stack_locals = s;
19241 return s->rtl;
19242 }
19243
19244 /* Construct the SYMBOL_REF for the tls_get_addr function. */
19245
19246 static GTY(()) rtx ix86_tls_symbol;
19247 rtx
19248 ix86_tls_get_addr (void)
19249 {
19250
19251 if (!ix86_tls_symbol)
19252 {
19253 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
19254 (TARGET_ANY_GNU_TLS
19255 && !TARGET_64BIT)
19256 ? "___tls_get_addr"
19257 : "__tls_get_addr");
19258 }
19259
19260 return ix86_tls_symbol;
19261 }
19262
19263 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
19264
19265 static GTY(()) rtx ix86_tls_module_base_symbol;
19266 rtx
19267 ix86_tls_module_base (void)
19268 {
19269
19270 if (!ix86_tls_module_base_symbol)
19271 {
19272 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
19273 "_TLS_MODULE_BASE_");
19274 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
19275 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
19276 }
19277
19278 return ix86_tls_module_base_symbol;
19279 }
19280 \f
19281 /* Calculate the length of the memory address in the instruction
19282 encoding. Does not include the one-byte modrm, opcode, or prefix. */
19283
19284 int
19285 memory_address_length (rtx addr)
19286 {
19287 struct ix86_address parts;
19288 rtx base, index, disp;
19289 int len;
19290 int ok;
19291
19292 if (GET_CODE (addr) == PRE_DEC
19293 || GET_CODE (addr) == POST_INC
19294 || GET_CODE (addr) == PRE_MODIFY
19295 || GET_CODE (addr) == POST_MODIFY)
19296 return 0;
19297
19298 ok = ix86_decompose_address (addr, &parts);
19299 gcc_assert (ok);
19300
19301 if (parts.base && GET_CODE (parts.base) == SUBREG)
19302 parts.base = SUBREG_REG (parts.base);
19303 if (parts.index && GET_CODE (parts.index) == SUBREG)
19304 parts.index = SUBREG_REG (parts.index);
19305
19306 base = parts.base;
19307 index = parts.index;
19308 disp = parts.disp;
19309 len = 0;
19310
19311 /* Rule of thumb:
19312 - esp as the base always wants an index,
19313 - ebp as the base always wants a displacement. */
19314
19315 /* Register Indirect. */
19316 if (base && !index && !disp)
19317 {
19318 /* esp (for its index) and ebp (for its displacement) need
19319 the two-byte modrm form. */
19320 if (addr == stack_pointer_rtx
19321 || addr == arg_pointer_rtx
19322 || addr == frame_pointer_rtx
19323 || addr == hard_frame_pointer_rtx)
19324 len = 1;
19325 }
19326
19327 /* Direct Addressing. */
19328 else if (disp && !base && !index)
19329 len = 4;
19330
19331 else
19332 {
19333 /* Find the length of the displacement constant. */
19334 if (disp)
19335 {
19336 if (base && satisfies_constraint_K (disp))
19337 len = 1;
19338 else
19339 len = 4;
19340 }
19341 /* ebp always wants a displacement. */
19342 else if (base == hard_frame_pointer_rtx)
19343 len = 1;
19344
19345 /* An index requires the two-byte modrm form.... */
19346 if (index
19347 /* ...like esp, which always wants an index. */
19348 || base == stack_pointer_rtx
19349 || base == arg_pointer_rtx
19350 || base == frame_pointer_rtx)
19351 len += 1;
19352 }
19353
19354 return len;
19355 }
19356
19357 /* Compute default value for "length_immediate" attribute. When SHORTFORM
19358 is set, expect that insn have 8bit immediate alternative. */
19359 int
19360 ix86_attr_length_immediate_default (rtx insn, int shortform)
19361 {
19362 int len = 0;
19363 int i;
19364 extract_insn_cached (insn);
19365 for (i = recog_data.n_operands - 1; i >= 0; --i)
19366 if (CONSTANT_P (recog_data.operand[i]))
19367 {
19368 gcc_assert (!len);
19369 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
19370 len = 1;
19371 else
19372 {
19373 switch (get_attr_mode (insn))
19374 {
19375 case MODE_QI:
19376 len+=1;
19377 break;
19378 case MODE_HI:
19379 len+=2;
19380 break;
19381 case MODE_SI:
19382 len+=4;
19383 break;
19384 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
19385 case MODE_DI:
19386 len+=4;
19387 break;
19388 default:
19389 fatal_insn ("unknown insn mode", insn);
19390 }
19391 }
19392 }
19393 return len;
19394 }
19395 /* Compute default value for "length_address" attribute. */
19396 int
19397 ix86_attr_length_address_default (rtx insn)
19398 {
19399 int i;
19400
19401 if (get_attr_type (insn) == TYPE_LEA)
19402 {
19403 rtx set = PATTERN (insn);
19404
19405 if (GET_CODE (set) == PARALLEL)
19406 set = XVECEXP (set, 0, 0);
19407
19408 gcc_assert (GET_CODE (set) == SET);
19409
19410 return memory_address_length (SET_SRC (set));
19411 }
19412
19413 extract_insn_cached (insn);
19414 for (i = recog_data.n_operands - 1; i >= 0; --i)
19415 if (MEM_P (recog_data.operand[i]))
19416 {
19417 return memory_address_length (XEXP (recog_data.operand[i], 0));
19418 break;
19419 }
19420 return 0;
19421 }
19422
19423 /* Compute default value for "length_vex" attribute. It includes
19424 2 or 3 byte VEX prefix and 1 opcode byte. */
19425
19426 int
19427 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
19428 int has_vex_w)
19429 {
19430 int i;
19431
19432 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
19433 byte VEX prefix. */
19434 if (!has_0f_opcode || has_vex_w)
19435 return 3 + 1;
19436
19437 /* We can always use 2 byte VEX prefix in 32bit. */
19438 if (!TARGET_64BIT)
19439 return 2 + 1;
19440
19441 extract_insn_cached (insn);
19442
19443 for (i = recog_data.n_operands - 1; i >= 0; --i)
19444 if (REG_P (recog_data.operand[i]))
19445 {
19446 /* REX.W bit uses 3 byte VEX prefix. */
19447 if (GET_MODE (recog_data.operand[i]) == DImode)
19448 return 3 + 1;
19449 }
19450 else
19451 {
19452 /* REX.X or REX.B bits use 3 byte VEX prefix. */
19453 if (MEM_P (recog_data.operand[i])
19454 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
19455 return 3 + 1;
19456 }
19457
19458 return 2 + 1;
19459 }
19460 \f
19461 /* Return the maximum number of instructions a cpu can issue. */
19462
19463 static int
19464 ix86_issue_rate (void)
19465 {
19466 switch (ix86_tune)
19467 {
19468 case PROCESSOR_PENTIUM:
19469 case PROCESSOR_ATOM:
19470 case PROCESSOR_K6:
19471 return 2;
19472
19473 case PROCESSOR_PENTIUMPRO:
19474 case PROCESSOR_PENTIUM4:
19475 case PROCESSOR_ATHLON:
19476 case PROCESSOR_K8:
19477 case PROCESSOR_AMDFAM10:
19478 case PROCESSOR_NOCONA:
19479 case PROCESSOR_GENERIC32:
19480 case PROCESSOR_GENERIC64:
19481 return 3;
19482
19483 case PROCESSOR_CORE2:
19484 return 4;
19485
19486 default:
19487 return 1;
19488 }
19489 }
19490
19491 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
19492 by DEP_INSN and nothing set by DEP_INSN. */
19493
19494 static int
19495 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
19496 {
19497 rtx set, set2;
19498
19499 /* Simplify the test for uninteresting insns. */
19500 if (insn_type != TYPE_SETCC
19501 && insn_type != TYPE_ICMOV
19502 && insn_type != TYPE_FCMOV
19503 && insn_type != TYPE_IBR)
19504 return 0;
19505
19506 if ((set = single_set (dep_insn)) != 0)
19507 {
19508 set = SET_DEST (set);
19509 set2 = NULL_RTX;
19510 }
19511 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
19512 && XVECLEN (PATTERN (dep_insn), 0) == 2
19513 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
19514 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
19515 {
19516 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19517 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19518 }
19519 else
19520 return 0;
19521
19522 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
19523 return 0;
19524
19525 /* This test is true if the dependent insn reads the flags but
19526 not any other potentially set register. */
19527 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
19528 return 0;
19529
19530 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
19531 return 0;
19532
19533 return 1;
19534 }
19535
19536 /* Return true iff USE_INSN has a memory address with operands set by
19537 SET_INSN. */
19538
19539 bool
19540 ix86_agi_dependent (rtx set_insn, rtx use_insn)
19541 {
19542 int i;
19543 extract_insn_cached (use_insn);
19544 for (i = recog_data.n_operands - 1; i >= 0; --i)
19545 if (MEM_P (recog_data.operand[i]))
19546 {
19547 rtx addr = XEXP (recog_data.operand[i], 0);
19548 return modified_in_p (addr, set_insn) != 0;
19549 }
19550 return false;
19551 }
19552
19553 static int
19554 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
19555 {
19556 enum attr_type insn_type, dep_insn_type;
19557 enum attr_memory memory;
19558 rtx set, set2;
19559 int dep_insn_code_number;
19560
19561 /* Anti and output dependencies have zero cost on all CPUs. */
19562 if (REG_NOTE_KIND (link) != 0)
19563 return 0;
19564
19565 dep_insn_code_number = recog_memoized (dep_insn);
19566
19567 /* If we can't recognize the insns, we can't really do anything. */
19568 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
19569 return cost;
19570
19571 insn_type = get_attr_type (insn);
19572 dep_insn_type = get_attr_type (dep_insn);
19573
19574 switch (ix86_tune)
19575 {
19576 case PROCESSOR_PENTIUM:
19577 /* Address Generation Interlock adds a cycle of latency. */
19578 if (insn_type == TYPE_LEA)
19579 {
19580 rtx addr = PATTERN (insn);
19581
19582 if (GET_CODE (addr) == PARALLEL)
19583 addr = XVECEXP (addr, 0, 0);
19584
19585 gcc_assert (GET_CODE (addr) == SET);
19586
19587 addr = SET_SRC (addr);
19588 if (modified_in_p (addr, dep_insn))
19589 cost += 1;
19590 }
19591 else if (ix86_agi_dependent (dep_insn, insn))
19592 cost += 1;
19593
19594 /* ??? Compares pair with jump/setcc. */
19595 if (ix86_flags_dependent (insn, dep_insn, insn_type))
19596 cost = 0;
19597
19598 /* Floating point stores require value to be ready one cycle earlier. */
19599 if (insn_type == TYPE_FMOV
19600 && get_attr_memory (insn) == MEMORY_STORE
19601 && !ix86_agi_dependent (dep_insn, insn))
19602 cost += 1;
19603 break;
19604
19605 case PROCESSOR_PENTIUMPRO:
19606 memory = get_attr_memory (insn);
19607
19608 /* INT->FP conversion is expensive. */
19609 if (get_attr_fp_int_src (dep_insn))
19610 cost += 5;
19611
19612 /* There is one cycle extra latency between an FP op and a store. */
19613 if (insn_type == TYPE_FMOV
19614 && (set = single_set (dep_insn)) != NULL_RTX
19615 && (set2 = single_set (insn)) != NULL_RTX
19616 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
19617 && MEM_P (SET_DEST (set2)))
19618 cost += 1;
19619
19620 /* Show ability of reorder buffer to hide latency of load by executing
19621 in parallel with previous instruction in case
19622 previous instruction is not needed to compute the address. */
19623 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19624 && !ix86_agi_dependent (dep_insn, insn))
19625 {
19626 /* Claim moves to take one cycle, as core can issue one load
19627 at time and the next load can start cycle later. */
19628 if (dep_insn_type == TYPE_IMOV
19629 || dep_insn_type == TYPE_FMOV)
19630 cost = 1;
19631 else if (cost > 1)
19632 cost--;
19633 }
19634 break;
19635
19636 case PROCESSOR_K6:
19637 memory = get_attr_memory (insn);
19638
19639 /* The esp dependency is resolved before the instruction is really
19640 finished. */
19641 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
19642 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
19643 return 1;
19644
19645 /* INT->FP conversion is expensive. */
19646 if (get_attr_fp_int_src (dep_insn))
19647 cost += 5;
19648
19649 /* Show ability of reorder buffer to hide latency of load by executing
19650 in parallel with previous instruction in case
19651 previous instruction is not needed to compute the address. */
19652 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19653 && !ix86_agi_dependent (dep_insn, insn))
19654 {
19655 /* Claim moves to take one cycle, as core can issue one load
19656 at time and the next load can start cycle later. */
19657 if (dep_insn_type == TYPE_IMOV
19658 || dep_insn_type == TYPE_FMOV)
19659 cost = 1;
19660 else if (cost > 2)
19661 cost -= 2;
19662 else
19663 cost = 1;
19664 }
19665 break;
19666
19667 case PROCESSOR_ATHLON:
19668 case PROCESSOR_K8:
19669 case PROCESSOR_AMDFAM10:
19670 case PROCESSOR_ATOM:
19671 case PROCESSOR_GENERIC32:
19672 case PROCESSOR_GENERIC64:
19673 memory = get_attr_memory (insn);
19674
19675 /* Show ability of reorder buffer to hide latency of load by executing
19676 in parallel with previous instruction in case
19677 previous instruction is not needed to compute the address. */
19678 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19679 && !ix86_agi_dependent (dep_insn, insn))
19680 {
19681 enum attr_unit unit = get_attr_unit (insn);
19682 int loadcost = 3;
19683
19684 /* Because of the difference between the length of integer and
19685 floating unit pipeline preparation stages, the memory operands
19686 for floating point are cheaper.
19687
19688 ??? For Athlon it the difference is most probably 2. */
19689 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
19690 loadcost = 3;
19691 else
19692 loadcost = TARGET_ATHLON ? 2 : 0;
19693
19694 if (cost >= loadcost)
19695 cost -= loadcost;
19696 else
19697 cost = 0;
19698 }
19699
19700 default:
19701 break;
19702 }
19703
19704 return cost;
19705 }
19706
19707 /* How many alternative schedules to try. This should be as wide as the
19708 scheduling freedom in the DFA, but no wider. Making this value too
19709 large results extra work for the scheduler. */
19710
19711 static int
19712 ia32_multipass_dfa_lookahead (void)
19713 {
19714 switch (ix86_tune)
19715 {
19716 case PROCESSOR_PENTIUM:
19717 return 2;
19718
19719 case PROCESSOR_PENTIUMPRO:
19720 case PROCESSOR_K6:
19721 return 1;
19722
19723 default:
19724 return 0;
19725 }
19726 }
19727
19728 \f
19729 /* Compute the alignment given to a constant that is being placed in memory.
19730 EXP is the constant and ALIGN is the alignment that the object would
19731 ordinarily have.
19732 The value of this function is used instead of that alignment to align
19733 the object. */
19734
19735 int
19736 ix86_constant_alignment (tree exp, int align)
19737 {
19738 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
19739 || TREE_CODE (exp) == INTEGER_CST)
19740 {
19741 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
19742 return 64;
19743 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
19744 return 128;
19745 }
19746 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
19747 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
19748 return BITS_PER_WORD;
19749
19750 return align;
19751 }
19752
19753 /* Compute the alignment for a static variable.
19754 TYPE is the data type, and ALIGN is the alignment that
19755 the object would ordinarily have. The value of this function is used
19756 instead of that alignment to align the object. */
19757
19758 int
19759 ix86_data_alignment (tree type, int align)
19760 {
19761 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
19762
19763 if (AGGREGATE_TYPE_P (type)
19764 && TYPE_SIZE (type)
19765 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19766 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
19767 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
19768 && align < max_align)
19769 align = max_align;
19770
19771 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19772 to 16byte boundary. */
19773 if (TARGET_64BIT)
19774 {
19775 if (AGGREGATE_TYPE_P (type)
19776 && TYPE_SIZE (type)
19777 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19778 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
19779 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19780 return 128;
19781 }
19782
19783 if (TREE_CODE (type) == ARRAY_TYPE)
19784 {
19785 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19786 return 64;
19787 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19788 return 128;
19789 }
19790 else if (TREE_CODE (type) == COMPLEX_TYPE)
19791 {
19792
19793 if (TYPE_MODE (type) == DCmode && align < 64)
19794 return 64;
19795 if ((TYPE_MODE (type) == XCmode
19796 || TYPE_MODE (type) == TCmode) && align < 128)
19797 return 128;
19798 }
19799 else if ((TREE_CODE (type) == RECORD_TYPE
19800 || TREE_CODE (type) == UNION_TYPE
19801 || TREE_CODE (type) == QUAL_UNION_TYPE)
19802 && TYPE_FIELDS (type))
19803 {
19804 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
19805 return 64;
19806 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
19807 return 128;
19808 }
19809 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
19810 || TREE_CODE (type) == INTEGER_TYPE)
19811 {
19812 if (TYPE_MODE (type) == DFmode && align < 64)
19813 return 64;
19814 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
19815 return 128;
19816 }
19817
19818 return align;
19819 }
19820
19821 /* Compute the alignment for a local variable or a stack slot. EXP is
19822 the data type or decl itself, MODE is the widest mode available and
19823 ALIGN is the alignment that the object would ordinarily have. The
19824 value of this macro is used instead of that alignment to align the
19825 object. */
19826
19827 unsigned int
19828 ix86_local_alignment (tree exp, enum machine_mode mode,
19829 unsigned int align)
19830 {
19831 tree type, decl;
19832
19833 if (exp && DECL_P (exp))
19834 {
19835 type = TREE_TYPE (exp);
19836 decl = exp;
19837 }
19838 else
19839 {
19840 type = exp;
19841 decl = NULL;
19842 }
19843
19844 /* Don't do dynamic stack realignment for long long objects with
19845 -mpreferred-stack-boundary=2. */
19846 if (!TARGET_64BIT
19847 && align == 64
19848 && ix86_preferred_stack_boundary < 64
19849 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
19850 && (!type || !TYPE_USER_ALIGN (type))
19851 && (!decl || !DECL_USER_ALIGN (decl)))
19852 align = 32;
19853
19854 /* If TYPE is NULL, we are allocating a stack slot for caller-save
19855 register in MODE. We will return the largest alignment of XF
19856 and DF. */
19857 if (!type)
19858 {
19859 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
19860 align = GET_MODE_ALIGNMENT (DFmode);
19861 return align;
19862 }
19863
19864 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19865 to 16byte boundary. */
19866 if (TARGET_64BIT)
19867 {
19868 if (AGGREGATE_TYPE_P (type)
19869 && TYPE_SIZE (type)
19870 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19871 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
19872 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19873 return 128;
19874 }
19875 if (TREE_CODE (type) == ARRAY_TYPE)
19876 {
19877 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19878 return 64;
19879 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19880 return 128;
19881 }
19882 else if (TREE_CODE (type) == COMPLEX_TYPE)
19883 {
19884 if (TYPE_MODE (type) == DCmode && align < 64)
19885 return 64;
19886 if ((TYPE_MODE (type) == XCmode
19887 || TYPE_MODE (type) == TCmode) && align < 128)
19888 return 128;
19889 }
19890 else if ((TREE_CODE (type) == RECORD_TYPE
19891 || TREE_CODE (type) == UNION_TYPE
19892 || TREE_CODE (type) == QUAL_UNION_TYPE)
19893 && TYPE_FIELDS (type))
19894 {
19895 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
19896 return 64;
19897 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
19898 return 128;
19899 }
19900 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
19901 || TREE_CODE (type) == INTEGER_TYPE)
19902 {
19903
19904 if (TYPE_MODE (type) == DFmode && align < 64)
19905 return 64;
19906 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
19907 return 128;
19908 }
19909 return align;
19910 }
19911 \f
19912 /* Emit RTL insns to initialize the variable parts of a trampoline.
19913 FNADDR is an RTX for the address of the function's pure code.
19914 CXT is an RTX for the static chain value for the function. */
19915 void
19916 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
19917 {
19918 if (!TARGET_64BIT)
19919 {
19920 /* Compute offset from the end of the jmp to the target function. */
19921 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
19922 plus_constant (tramp, 10),
19923 NULL_RTX, 1, OPTAB_DIRECT);
19924 emit_move_insn (gen_rtx_MEM (QImode, tramp),
19925 gen_int_mode (0xb9, QImode));
19926 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
19927 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
19928 gen_int_mode (0xe9, QImode));
19929 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
19930 }
19931 else
19932 {
19933 int offset = 0;
19934 /* Try to load address using shorter movl instead of movabs.
19935 We may want to support movq for kernel mode, but kernel does not use
19936 trampolines at the moment. */
19937 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
19938 {
19939 fnaddr = copy_to_mode_reg (DImode, fnaddr);
19940 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19941 gen_int_mode (0xbb41, HImode));
19942 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
19943 gen_lowpart (SImode, fnaddr));
19944 offset += 6;
19945 }
19946 else
19947 {
19948 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19949 gen_int_mode (0xbb49, HImode));
19950 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
19951 fnaddr);
19952 offset += 10;
19953 }
19954 /* Load static chain using movabs to r10. */
19955 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19956 gen_int_mode (0xba49, HImode));
19957 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
19958 cxt);
19959 offset += 10;
19960 /* Jump to the r11 */
19961 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19962 gen_int_mode (0xff49, HImode));
19963 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
19964 gen_int_mode (0xe3, QImode));
19965 offset += 3;
19966 gcc_assert (offset <= TRAMPOLINE_SIZE);
19967 }
19968
19969 #ifdef ENABLE_EXECUTE_STACK
19970 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
19971 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
19972 #endif
19973 }
19974 \f
19975 /* Codes for all the SSE/MMX builtins. */
19976 enum ix86_builtins
19977 {
19978 IX86_BUILTIN_ADDPS,
19979 IX86_BUILTIN_ADDSS,
19980 IX86_BUILTIN_DIVPS,
19981 IX86_BUILTIN_DIVSS,
19982 IX86_BUILTIN_MULPS,
19983 IX86_BUILTIN_MULSS,
19984 IX86_BUILTIN_SUBPS,
19985 IX86_BUILTIN_SUBSS,
19986
19987 IX86_BUILTIN_CMPEQPS,
19988 IX86_BUILTIN_CMPLTPS,
19989 IX86_BUILTIN_CMPLEPS,
19990 IX86_BUILTIN_CMPGTPS,
19991 IX86_BUILTIN_CMPGEPS,
19992 IX86_BUILTIN_CMPNEQPS,
19993 IX86_BUILTIN_CMPNLTPS,
19994 IX86_BUILTIN_CMPNLEPS,
19995 IX86_BUILTIN_CMPNGTPS,
19996 IX86_BUILTIN_CMPNGEPS,
19997 IX86_BUILTIN_CMPORDPS,
19998 IX86_BUILTIN_CMPUNORDPS,
19999 IX86_BUILTIN_CMPEQSS,
20000 IX86_BUILTIN_CMPLTSS,
20001 IX86_BUILTIN_CMPLESS,
20002 IX86_BUILTIN_CMPNEQSS,
20003 IX86_BUILTIN_CMPNLTSS,
20004 IX86_BUILTIN_CMPNLESS,
20005 IX86_BUILTIN_CMPNGTSS,
20006 IX86_BUILTIN_CMPNGESS,
20007 IX86_BUILTIN_CMPORDSS,
20008 IX86_BUILTIN_CMPUNORDSS,
20009
20010 IX86_BUILTIN_COMIEQSS,
20011 IX86_BUILTIN_COMILTSS,
20012 IX86_BUILTIN_COMILESS,
20013 IX86_BUILTIN_COMIGTSS,
20014 IX86_BUILTIN_COMIGESS,
20015 IX86_BUILTIN_COMINEQSS,
20016 IX86_BUILTIN_UCOMIEQSS,
20017 IX86_BUILTIN_UCOMILTSS,
20018 IX86_BUILTIN_UCOMILESS,
20019 IX86_BUILTIN_UCOMIGTSS,
20020 IX86_BUILTIN_UCOMIGESS,
20021 IX86_BUILTIN_UCOMINEQSS,
20022
20023 IX86_BUILTIN_CVTPI2PS,
20024 IX86_BUILTIN_CVTPS2PI,
20025 IX86_BUILTIN_CVTSI2SS,
20026 IX86_BUILTIN_CVTSI642SS,
20027 IX86_BUILTIN_CVTSS2SI,
20028 IX86_BUILTIN_CVTSS2SI64,
20029 IX86_BUILTIN_CVTTPS2PI,
20030 IX86_BUILTIN_CVTTSS2SI,
20031 IX86_BUILTIN_CVTTSS2SI64,
20032
20033 IX86_BUILTIN_MAXPS,
20034 IX86_BUILTIN_MAXSS,
20035 IX86_BUILTIN_MINPS,
20036 IX86_BUILTIN_MINSS,
20037
20038 IX86_BUILTIN_LOADUPS,
20039 IX86_BUILTIN_STOREUPS,
20040 IX86_BUILTIN_MOVSS,
20041
20042 IX86_BUILTIN_MOVHLPS,
20043 IX86_BUILTIN_MOVLHPS,
20044 IX86_BUILTIN_LOADHPS,
20045 IX86_BUILTIN_LOADLPS,
20046 IX86_BUILTIN_STOREHPS,
20047 IX86_BUILTIN_STORELPS,
20048
20049 IX86_BUILTIN_MASKMOVQ,
20050 IX86_BUILTIN_MOVMSKPS,
20051 IX86_BUILTIN_PMOVMSKB,
20052
20053 IX86_BUILTIN_MOVNTPS,
20054 IX86_BUILTIN_MOVNTQ,
20055
20056 IX86_BUILTIN_LOADDQU,
20057 IX86_BUILTIN_STOREDQU,
20058
20059 IX86_BUILTIN_PACKSSWB,
20060 IX86_BUILTIN_PACKSSDW,
20061 IX86_BUILTIN_PACKUSWB,
20062
20063 IX86_BUILTIN_PADDB,
20064 IX86_BUILTIN_PADDW,
20065 IX86_BUILTIN_PADDD,
20066 IX86_BUILTIN_PADDQ,
20067 IX86_BUILTIN_PADDSB,
20068 IX86_BUILTIN_PADDSW,
20069 IX86_BUILTIN_PADDUSB,
20070 IX86_BUILTIN_PADDUSW,
20071 IX86_BUILTIN_PSUBB,
20072 IX86_BUILTIN_PSUBW,
20073 IX86_BUILTIN_PSUBD,
20074 IX86_BUILTIN_PSUBQ,
20075 IX86_BUILTIN_PSUBSB,
20076 IX86_BUILTIN_PSUBSW,
20077 IX86_BUILTIN_PSUBUSB,
20078 IX86_BUILTIN_PSUBUSW,
20079
20080 IX86_BUILTIN_PAND,
20081 IX86_BUILTIN_PANDN,
20082 IX86_BUILTIN_POR,
20083 IX86_BUILTIN_PXOR,
20084
20085 IX86_BUILTIN_PAVGB,
20086 IX86_BUILTIN_PAVGW,
20087
20088 IX86_BUILTIN_PCMPEQB,
20089 IX86_BUILTIN_PCMPEQW,
20090 IX86_BUILTIN_PCMPEQD,
20091 IX86_BUILTIN_PCMPGTB,
20092 IX86_BUILTIN_PCMPGTW,
20093 IX86_BUILTIN_PCMPGTD,
20094
20095 IX86_BUILTIN_PMADDWD,
20096
20097 IX86_BUILTIN_PMAXSW,
20098 IX86_BUILTIN_PMAXUB,
20099 IX86_BUILTIN_PMINSW,
20100 IX86_BUILTIN_PMINUB,
20101
20102 IX86_BUILTIN_PMULHUW,
20103 IX86_BUILTIN_PMULHW,
20104 IX86_BUILTIN_PMULLW,
20105
20106 IX86_BUILTIN_PSADBW,
20107 IX86_BUILTIN_PSHUFW,
20108
20109 IX86_BUILTIN_PSLLW,
20110 IX86_BUILTIN_PSLLD,
20111 IX86_BUILTIN_PSLLQ,
20112 IX86_BUILTIN_PSRAW,
20113 IX86_BUILTIN_PSRAD,
20114 IX86_BUILTIN_PSRLW,
20115 IX86_BUILTIN_PSRLD,
20116 IX86_BUILTIN_PSRLQ,
20117 IX86_BUILTIN_PSLLWI,
20118 IX86_BUILTIN_PSLLDI,
20119 IX86_BUILTIN_PSLLQI,
20120 IX86_BUILTIN_PSRAWI,
20121 IX86_BUILTIN_PSRADI,
20122 IX86_BUILTIN_PSRLWI,
20123 IX86_BUILTIN_PSRLDI,
20124 IX86_BUILTIN_PSRLQI,
20125
20126 IX86_BUILTIN_PUNPCKHBW,
20127 IX86_BUILTIN_PUNPCKHWD,
20128 IX86_BUILTIN_PUNPCKHDQ,
20129 IX86_BUILTIN_PUNPCKLBW,
20130 IX86_BUILTIN_PUNPCKLWD,
20131 IX86_BUILTIN_PUNPCKLDQ,
20132
20133 IX86_BUILTIN_SHUFPS,
20134
20135 IX86_BUILTIN_RCPPS,
20136 IX86_BUILTIN_RCPSS,
20137 IX86_BUILTIN_RSQRTPS,
20138 IX86_BUILTIN_RSQRTPS_NR,
20139 IX86_BUILTIN_RSQRTSS,
20140 IX86_BUILTIN_RSQRTF,
20141 IX86_BUILTIN_SQRTPS,
20142 IX86_BUILTIN_SQRTPS_NR,
20143 IX86_BUILTIN_SQRTSS,
20144
20145 IX86_BUILTIN_UNPCKHPS,
20146 IX86_BUILTIN_UNPCKLPS,
20147
20148 IX86_BUILTIN_ANDPS,
20149 IX86_BUILTIN_ANDNPS,
20150 IX86_BUILTIN_ORPS,
20151 IX86_BUILTIN_XORPS,
20152
20153 IX86_BUILTIN_EMMS,
20154 IX86_BUILTIN_LDMXCSR,
20155 IX86_BUILTIN_STMXCSR,
20156 IX86_BUILTIN_SFENCE,
20157
20158 /* 3DNow! Original */
20159 IX86_BUILTIN_FEMMS,
20160 IX86_BUILTIN_PAVGUSB,
20161 IX86_BUILTIN_PF2ID,
20162 IX86_BUILTIN_PFACC,
20163 IX86_BUILTIN_PFADD,
20164 IX86_BUILTIN_PFCMPEQ,
20165 IX86_BUILTIN_PFCMPGE,
20166 IX86_BUILTIN_PFCMPGT,
20167 IX86_BUILTIN_PFMAX,
20168 IX86_BUILTIN_PFMIN,
20169 IX86_BUILTIN_PFMUL,
20170 IX86_BUILTIN_PFRCP,
20171 IX86_BUILTIN_PFRCPIT1,
20172 IX86_BUILTIN_PFRCPIT2,
20173 IX86_BUILTIN_PFRSQIT1,
20174 IX86_BUILTIN_PFRSQRT,
20175 IX86_BUILTIN_PFSUB,
20176 IX86_BUILTIN_PFSUBR,
20177 IX86_BUILTIN_PI2FD,
20178 IX86_BUILTIN_PMULHRW,
20179
20180 /* 3DNow! Athlon Extensions */
20181 IX86_BUILTIN_PF2IW,
20182 IX86_BUILTIN_PFNACC,
20183 IX86_BUILTIN_PFPNACC,
20184 IX86_BUILTIN_PI2FW,
20185 IX86_BUILTIN_PSWAPDSI,
20186 IX86_BUILTIN_PSWAPDSF,
20187
20188 /* SSE2 */
20189 IX86_BUILTIN_ADDPD,
20190 IX86_BUILTIN_ADDSD,
20191 IX86_BUILTIN_DIVPD,
20192 IX86_BUILTIN_DIVSD,
20193 IX86_BUILTIN_MULPD,
20194 IX86_BUILTIN_MULSD,
20195 IX86_BUILTIN_SUBPD,
20196 IX86_BUILTIN_SUBSD,
20197
20198 IX86_BUILTIN_CMPEQPD,
20199 IX86_BUILTIN_CMPLTPD,
20200 IX86_BUILTIN_CMPLEPD,
20201 IX86_BUILTIN_CMPGTPD,
20202 IX86_BUILTIN_CMPGEPD,
20203 IX86_BUILTIN_CMPNEQPD,
20204 IX86_BUILTIN_CMPNLTPD,
20205 IX86_BUILTIN_CMPNLEPD,
20206 IX86_BUILTIN_CMPNGTPD,
20207 IX86_BUILTIN_CMPNGEPD,
20208 IX86_BUILTIN_CMPORDPD,
20209 IX86_BUILTIN_CMPUNORDPD,
20210 IX86_BUILTIN_CMPEQSD,
20211 IX86_BUILTIN_CMPLTSD,
20212 IX86_BUILTIN_CMPLESD,
20213 IX86_BUILTIN_CMPNEQSD,
20214 IX86_BUILTIN_CMPNLTSD,
20215 IX86_BUILTIN_CMPNLESD,
20216 IX86_BUILTIN_CMPORDSD,
20217 IX86_BUILTIN_CMPUNORDSD,
20218
20219 IX86_BUILTIN_COMIEQSD,
20220 IX86_BUILTIN_COMILTSD,
20221 IX86_BUILTIN_COMILESD,
20222 IX86_BUILTIN_COMIGTSD,
20223 IX86_BUILTIN_COMIGESD,
20224 IX86_BUILTIN_COMINEQSD,
20225 IX86_BUILTIN_UCOMIEQSD,
20226 IX86_BUILTIN_UCOMILTSD,
20227 IX86_BUILTIN_UCOMILESD,
20228 IX86_BUILTIN_UCOMIGTSD,
20229 IX86_BUILTIN_UCOMIGESD,
20230 IX86_BUILTIN_UCOMINEQSD,
20231
20232 IX86_BUILTIN_MAXPD,
20233 IX86_BUILTIN_MAXSD,
20234 IX86_BUILTIN_MINPD,
20235 IX86_BUILTIN_MINSD,
20236
20237 IX86_BUILTIN_ANDPD,
20238 IX86_BUILTIN_ANDNPD,
20239 IX86_BUILTIN_ORPD,
20240 IX86_BUILTIN_XORPD,
20241
20242 IX86_BUILTIN_SQRTPD,
20243 IX86_BUILTIN_SQRTSD,
20244
20245 IX86_BUILTIN_UNPCKHPD,
20246 IX86_BUILTIN_UNPCKLPD,
20247
20248 IX86_BUILTIN_SHUFPD,
20249
20250 IX86_BUILTIN_LOADUPD,
20251 IX86_BUILTIN_STOREUPD,
20252 IX86_BUILTIN_MOVSD,
20253
20254 IX86_BUILTIN_LOADHPD,
20255 IX86_BUILTIN_LOADLPD,
20256
20257 IX86_BUILTIN_CVTDQ2PD,
20258 IX86_BUILTIN_CVTDQ2PS,
20259
20260 IX86_BUILTIN_CVTPD2DQ,
20261 IX86_BUILTIN_CVTPD2PI,
20262 IX86_BUILTIN_CVTPD2PS,
20263 IX86_BUILTIN_CVTTPD2DQ,
20264 IX86_BUILTIN_CVTTPD2PI,
20265
20266 IX86_BUILTIN_CVTPI2PD,
20267 IX86_BUILTIN_CVTSI2SD,
20268 IX86_BUILTIN_CVTSI642SD,
20269
20270 IX86_BUILTIN_CVTSD2SI,
20271 IX86_BUILTIN_CVTSD2SI64,
20272 IX86_BUILTIN_CVTSD2SS,
20273 IX86_BUILTIN_CVTSS2SD,
20274 IX86_BUILTIN_CVTTSD2SI,
20275 IX86_BUILTIN_CVTTSD2SI64,
20276
20277 IX86_BUILTIN_CVTPS2DQ,
20278 IX86_BUILTIN_CVTPS2PD,
20279 IX86_BUILTIN_CVTTPS2DQ,
20280
20281 IX86_BUILTIN_MOVNTI,
20282 IX86_BUILTIN_MOVNTPD,
20283 IX86_BUILTIN_MOVNTDQ,
20284
20285 IX86_BUILTIN_MOVQ128,
20286
20287 /* SSE2 MMX */
20288 IX86_BUILTIN_MASKMOVDQU,
20289 IX86_BUILTIN_MOVMSKPD,
20290 IX86_BUILTIN_PMOVMSKB128,
20291
20292 IX86_BUILTIN_PACKSSWB128,
20293 IX86_BUILTIN_PACKSSDW128,
20294 IX86_BUILTIN_PACKUSWB128,
20295
20296 IX86_BUILTIN_PADDB128,
20297 IX86_BUILTIN_PADDW128,
20298 IX86_BUILTIN_PADDD128,
20299 IX86_BUILTIN_PADDQ128,
20300 IX86_BUILTIN_PADDSB128,
20301 IX86_BUILTIN_PADDSW128,
20302 IX86_BUILTIN_PADDUSB128,
20303 IX86_BUILTIN_PADDUSW128,
20304 IX86_BUILTIN_PSUBB128,
20305 IX86_BUILTIN_PSUBW128,
20306 IX86_BUILTIN_PSUBD128,
20307 IX86_BUILTIN_PSUBQ128,
20308 IX86_BUILTIN_PSUBSB128,
20309 IX86_BUILTIN_PSUBSW128,
20310 IX86_BUILTIN_PSUBUSB128,
20311 IX86_BUILTIN_PSUBUSW128,
20312
20313 IX86_BUILTIN_PAND128,
20314 IX86_BUILTIN_PANDN128,
20315 IX86_BUILTIN_POR128,
20316 IX86_BUILTIN_PXOR128,
20317
20318 IX86_BUILTIN_PAVGB128,
20319 IX86_BUILTIN_PAVGW128,
20320
20321 IX86_BUILTIN_PCMPEQB128,
20322 IX86_BUILTIN_PCMPEQW128,
20323 IX86_BUILTIN_PCMPEQD128,
20324 IX86_BUILTIN_PCMPGTB128,
20325 IX86_BUILTIN_PCMPGTW128,
20326 IX86_BUILTIN_PCMPGTD128,
20327
20328 IX86_BUILTIN_PMADDWD128,
20329
20330 IX86_BUILTIN_PMAXSW128,
20331 IX86_BUILTIN_PMAXUB128,
20332 IX86_BUILTIN_PMINSW128,
20333 IX86_BUILTIN_PMINUB128,
20334
20335 IX86_BUILTIN_PMULUDQ,
20336 IX86_BUILTIN_PMULUDQ128,
20337 IX86_BUILTIN_PMULHUW128,
20338 IX86_BUILTIN_PMULHW128,
20339 IX86_BUILTIN_PMULLW128,
20340
20341 IX86_BUILTIN_PSADBW128,
20342 IX86_BUILTIN_PSHUFHW,
20343 IX86_BUILTIN_PSHUFLW,
20344 IX86_BUILTIN_PSHUFD,
20345
20346 IX86_BUILTIN_PSLLDQI128,
20347 IX86_BUILTIN_PSLLWI128,
20348 IX86_BUILTIN_PSLLDI128,
20349 IX86_BUILTIN_PSLLQI128,
20350 IX86_BUILTIN_PSRAWI128,
20351 IX86_BUILTIN_PSRADI128,
20352 IX86_BUILTIN_PSRLDQI128,
20353 IX86_BUILTIN_PSRLWI128,
20354 IX86_BUILTIN_PSRLDI128,
20355 IX86_BUILTIN_PSRLQI128,
20356
20357 IX86_BUILTIN_PSLLDQ128,
20358 IX86_BUILTIN_PSLLW128,
20359 IX86_BUILTIN_PSLLD128,
20360 IX86_BUILTIN_PSLLQ128,
20361 IX86_BUILTIN_PSRAW128,
20362 IX86_BUILTIN_PSRAD128,
20363 IX86_BUILTIN_PSRLW128,
20364 IX86_BUILTIN_PSRLD128,
20365 IX86_BUILTIN_PSRLQ128,
20366
20367 IX86_BUILTIN_PUNPCKHBW128,
20368 IX86_BUILTIN_PUNPCKHWD128,
20369 IX86_BUILTIN_PUNPCKHDQ128,
20370 IX86_BUILTIN_PUNPCKHQDQ128,
20371 IX86_BUILTIN_PUNPCKLBW128,
20372 IX86_BUILTIN_PUNPCKLWD128,
20373 IX86_BUILTIN_PUNPCKLDQ128,
20374 IX86_BUILTIN_PUNPCKLQDQ128,
20375
20376 IX86_BUILTIN_CLFLUSH,
20377 IX86_BUILTIN_MFENCE,
20378 IX86_BUILTIN_LFENCE,
20379
20380 /* SSE3. */
20381 IX86_BUILTIN_ADDSUBPS,
20382 IX86_BUILTIN_HADDPS,
20383 IX86_BUILTIN_HSUBPS,
20384 IX86_BUILTIN_MOVSHDUP,
20385 IX86_BUILTIN_MOVSLDUP,
20386 IX86_BUILTIN_ADDSUBPD,
20387 IX86_BUILTIN_HADDPD,
20388 IX86_BUILTIN_HSUBPD,
20389 IX86_BUILTIN_LDDQU,
20390
20391 IX86_BUILTIN_MONITOR,
20392 IX86_BUILTIN_MWAIT,
20393
20394 /* SSSE3. */
20395 IX86_BUILTIN_PHADDW,
20396 IX86_BUILTIN_PHADDD,
20397 IX86_BUILTIN_PHADDSW,
20398 IX86_BUILTIN_PHSUBW,
20399 IX86_BUILTIN_PHSUBD,
20400 IX86_BUILTIN_PHSUBSW,
20401 IX86_BUILTIN_PMADDUBSW,
20402 IX86_BUILTIN_PMULHRSW,
20403 IX86_BUILTIN_PSHUFB,
20404 IX86_BUILTIN_PSIGNB,
20405 IX86_BUILTIN_PSIGNW,
20406 IX86_BUILTIN_PSIGND,
20407 IX86_BUILTIN_PALIGNR,
20408 IX86_BUILTIN_PABSB,
20409 IX86_BUILTIN_PABSW,
20410 IX86_BUILTIN_PABSD,
20411
20412 IX86_BUILTIN_PHADDW128,
20413 IX86_BUILTIN_PHADDD128,
20414 IX86_BUILTIN_PHADDSW128,
20415 IX86_BUILTIN_PHSUBW128,
20416 IX86_BUILTIN_PHSUBD128,
20417 IX86_BUILTIN_PHSUBSW128,
20418 IX86_BUILTIN_PMADDUBSW128,
20419 IX86_BUILTIN_PMULHRSW128,
20420 IX86_BUILTIN_PSHUFB128,
20421 IX86_BUILTIN_PSIGNB128,
20422 IX86_BUILTIN_PSIGNW128,
20423 IX86_BUILTIN_PSIGND128,
20424 IX86_BUILTIN_PALIGNR128,
20425 IX86_BUILTIN_PABSB128,
20426 IX86_BUILTIN_PABSW128,
20427 IX86_BUILTIN_PABSD128,
20428
20429 /* AMDFAM10 - SSE4A New Instructions. */
20430 IX86_BUILTIN_MOVNTSD,
20431 IX86_BUILTIN_MOVNTSS,
20432 IX86_BUILTIN_EXTRQI,
20433 IX86_BUILTIN_EXTRQ,
20434 IX86_BUILTIN_INSERTQI,
20435 IX86_BUILTIN_INSERTQ,
20436
20437 /* SSE4.1. */
20438 IX86_BUILTIN_BLENDPD,
20439 IX86_BUILTIN_BLENDPS,
20440 IX86_BUILTIN_BLENDVPD,
20441 IX86_BUILTIN_BLENDVPS,
20442 IX86_BUILTIN_PBLENDVB128,
20443 IX86_BUILTIN_PBLENDW128,
20444
20445 IX86_BUILTIN_DPPD,
20446 IX86_BUILTIN_DPPS,
20447
20448 IX86_BUILTIN_INSERTPS128,
20449
20450 IX86_BUILTIN_MOVNTDQA,
20451 IX86_BUILTIN_MPSADBW128,
20452 IX86_BUILTIN_PACKUSDW128,
20453 IX86_BUILTIN_PCMPEQQ,
20454 IX86_BUILTIN_PHMINPOSUW128,
20455
20456 IX86_BUILTIN_PMAXSB128,
20457 IX86_BUILTIN_PMAXSD128,
20458 IX86_BUILTIN_PMAXUD128,
20459 IX86_BUILTIN_PMAXUW128,
20460
20461 IX86_BUILTIN_PMINSB128,
20462 IX86_BUILTIN_PMINSD128,
20463 IX86_BUILTIN_PMINUD128,
20464 IX86_BUILTIN_PMINUW128,
20465
20466 IX86_BUILTIN_PMOVSXBW128,
20467 IX86_BUILTIN_PMOVSXBD128,
20468 IX86_BUILTIN_PMOVSXBQ128,
20469 IX86_BUILTIN_PMOVSXWD128,
20470 IX86_BUILTIN_PMOVSXWQ128,
20471 IX86_BUILTIN_PMOVSXDQ128,
20472
20473 IX86_BUILTIN_PMOVZXBW128,
20474 IX86_BUILTIN_PMOVZXBD128,
20475 IX86_BUILTIN_PMOVZXBQ128,
20476 IX86_BUILTIN_PMOVZXWD128,
20477 IX86_BUILTIN_PMOVZXWQ128,
20478 IX86_BUILTIN_PMOVZXDQ128,
20479
20480 IX86_BUILTIN_PMULDQ128,
20481 IX86_BUILTIN_PMULLD128,
20482
20483 IX86_BUILTIN_ROUNDPD,
20484 IX86_BUILTIN_ROUNDPS,
20485 IX86_BUILTIN_ROUNDSD,
20486 IX86_BUILTIN_ROUNDSS,
20487
20488 IX86_BUILTIN_PTESTZ,
20489 IX86_BUILTIN_PTESTC,
20490 IX86_BUILTIN_PTESTNZC,
20491
20492 IX86_BUILTIN_VEC_INIT_V2SI,
20493 IX86_BUILTIN_VEC_INIT_V4HI,
20494 IX86_BUILTIN_VEC_INIT_V8QI,
20495 IX86_BUILTIN_VEC_EXT_V2DF,
20496 IX86_BUILTIN_VEC_EXT_V2DI,
20497 IX86_BUILTIN_VEC_EXT_V4SF,
20498 IX86_BUILTIN_VEC_EXT_V4SI,
20499 IX86_BUILTIN_VEC_EXT_V8HI,
20500 IX86_BUILTIN_VEC_EXT_V2SI,
20501 IX86_BUILTIN_VEC_EXT_V4HI,
20502 IX86_BUILTIN_VEC_EXT_V16QI,
20503 IX86_BUILTIN_VEC_SET_V2DI,
20504 IX86_BUILTIN_VEC_SET_V4SF,
20505 IX86_BUILTIN_VEC_SET_V4SI,
20506 IX86_BUILTIN_VEC_SET_V8HI,
20507 IX86_BUILTIN_VEC_SET_V4HI,
20508 IX86_BUILTIN_VEC_SET_V16QI,
20509
20510 IX86_BUILTIN_VEC_PACK_SFIX,
20511
20512 /* SSE4.2. */
20513 IX86_BUILTIN_CRC32QI,
20514 IX86_BUILTIN_CRC32HI,
20515 IX86_BUILTIN_CRC32SI,
20516 IX86_BUILTIN_CRC32DI,
20517
20518 IX86_BUILTIN_PCMPESTRI128,
20519 IX86_BUILTIN_PCMPESTRM128,
20520 IX86_BUILTIN_PCMPESTRA128,
20521 IX86_BUILTIN_PCMPESTRC128,
20522 IX86_BUILTIN_PCMPESTRO128,
20523 IX86_BUILTIN_PCMPESTRS128,
20524 IX86_BUILTIN_PCMPESTRZ128,
20525 IX86_BUILTIN_PCMPISTRI128,
20526 IX86_BUILTIN_PCMPISTRM128,
20527 IX86_BUILTIN_PCMPISTRA128,
20528 IX86_BUILTIN_PCMPISTRC128,
20529 IX86_BUILTIN_PCMPISTRO128,
20530 IX86_BUILTIN_PCMPISTRS128,
20531 IX86_BUILTIN_PCMPISTRZ128,
20532
20533 IX86_BUILTIN_PCMPGTQ,
20534
20535 /* AES instructions */
20536 IX86_BUILTIN_AESENC128,
20537 IX86_BUILTIN_AESENCLAST128,
20538 IX86_BUILTIN_AESDEC128,
20539 IX86_BUILTIN_AESDECLAST128,
20540 IX86_BUILTIN_AESIMC128,
20541 IX86_BUILTIN_AESKEYGENASSIST128,
20542
20543 /* PCLMUL instruction */
20544 IX86_BUILTIN_PCLMULQDQ128,
20545
20546 /* AVX */
20547 IX86_BUILTIN_ADDPD256,
20548 IX86_BUILTIN_ADDPS256,
20549 IX86_BUILTIN_ADDSUBPD256,
20550 IX86_BUILTIN_ADDSUBPS256,
20551 IX86_BUILTIN_ANDPD256,
20552 IX86_BUILTIN_ANDPS256,
20553 IX86_BUILTIN_ANDNPD256,
20554 IX86_BUILTIN_ANDNPS256,
20555 IX86_BUILTIN_BLENDPD256,
20556 IX86_BUILTIN_BLENDPS256,
20557 IX86_BUILTIN_BLENDVPD256,
20558 IX86_BUILTIN_BLENDVPS256,
20559 IX86_BUILTIN_DIVPD256,
20560 IX86_BUILTIN_DIVPS256,
20561 IX86_BUILTIN_DPPS256,
20562 IX86_BUILTIN_HADDPD256,
20563 IX86_BUILTIN_HADDPS256,
20564 IX86_BUILTIN_HSUBPD256,
20565 IX86_BUILTIN_HSUBPS256,
20566 IX86_BUILTIN_MAXPD256,
20567 IX86_BUILTIN_MAXPS256,
20568 IX86_BUILTIN_MINPD256,
20569 IX86_BUILTIN_MINPS256,
20570 IX86_BUILTIN_MULPD256,
20571 IX86_BUILTIN_MULPS256,
20572 IX86_BUILTIN_ORPD256,
20573 IX86_BUILTIN_ORPS256,
20574 IX86_BUILTIN_SHUFPD256,
20575 IX86_BUILTIN_SHUFPS256,
20576 IX86_BUILTIN_SUBPD256,
20577 IX86_BUILTIN_SUBPS256,
20578 IX86_BUILTIN_XORPD256,
20579 IX86_BUILTIN_XORPS256,
20580 IX86_BUILTIN_CMPSD,
20581 IX86_BUILTIN_CMPSS,
20582 IX86_BUILTIN_CMPPD,
20583 IX86_BUILTIN_CMPPS,
20584 IX86_BUILTIN_CMPPD256,
20585 IX86_BUILTIN_CMPPS256,
20586 IX86_BUILTIN_CVTDQ2PD256,
20587 IX86_BUILTIN_CVTDQ2PS256,
20588 IX86_BUILTIN_CVTPD2PS256,
20589 IX86_BUILTIN_CVTPS2DQ256,
20590 IX86_BUILTIN_CVTPS2PD256,
20591 IX86_BUILTIN_CVTTPD2DQ256,
20592 IX86_BUILTIN_CVTPD2DQ256,
20593 IX86_BUILTIN_CVTTPS2DQ256,
20594 IX86_BUILTIN_EXTRACTF128PD256,
20595 IX86_BUILTIN_EXTRACTF128PS256,
20596 IX86_BUILTIN_EXTRACTF128SI256,
20597 IX86_BUILTIN_VZEROALL,
20598 IX86_BUILTIN_VZEROUPPER,
20599 IX86_BUILTIN_VZEROUPPER_REX64,
20600 IX86_BUILTIN_VPERMILVARPD,
20601 IX86_BUILTIN_VPERMILVARPS,
20602 IX86_BUILTIN_VPERMILVARPD256,
20603 IX86_BUILTIN_VPERMILVARPS256,
20604 IX86_BUILTIN_VPERMILPD,
20605 IX86_BUILTIN_VPERMILPS,
20606 IX86_BUILTIN_VPERMILPD256,
20607 IX86_BUILTIN_VPERMILPS256,
20608 IX86_BUILTIN_VPERM2F128PD256,
20609 IX86_BUILTIN_VPERM2F128PS256,
20610 IX86_BUILTIN_VPERM2F128SI256,
20611 IX86_BUILTIN_VBROADCASTSS,
20612 IX86_BUILTIN_VBROADCASTSD256,
20613 IX86_BUILTIN_VBROADCASTSS256,
20614 IX86_BUILTIN_VBROADCASTPD256,
20615 IX86_BUILTIN_VBROADCASTPS256,
20616 IX86_BUILTIN_VINSERTF128PD256,
20617 IX86_BUILTIN_VINSERTF128PS256,
20618 IX86_BUILTIN_VINSERTF128SI256,
20619 IX86_BUILTIN_LOADUPD256,
20620 IX86_BUILTIN_LOADUPS256,
20621 IX86_BUILTIN_STOREUPD256,
20622 IX86_BUILTIN_STOREUPS256,
20623 IX86_BUILTIN_LDDQU256,
20624 IX86_BUILTIN_MOVNTDQ256,
20625 IX86_BUILTIN_MOVNTPD256,
20626 IX86_BUILTIN_MOVNTPS256,
20627 IX86_BUILTIN_LOADDQU256,
20628 IX86_BUILTIN_STOREDQU256,
20629 IX86_BUILTIN_MASKLOADPD,
20630 IX86_BUILTIN_MASKLOADPS,
20631 IX86_BUILTIN_MASKSTOREPD,
20632 IX86_BUILTIN_MASKSTOREPS,
20633 IX86_BUILTIN_MASKLOADPD256,
20634 IX86_BUILTIN_MASKLOADPS256,
20635 IX86_BUILTIN_MASKSTOREPD256,
20636 IX86_BUILTIN_MASKSTOREPS256,
20637 IX86_BUILTIN_MOVSHDUP256,
20638 IX86_BUILTIN_MOVSLDUP256,
20639 IX86_BUILTIN_MOVDDUP256,
20640
20641 IX86_BUILTIN_SQRTPD256,
20642 IX86_BUILTIN_SQRTPS256,
20643 IX86_BUILTIN_SQRTPS_NR256,
20644 IX86_BUILTIN_RSQRTPS256,
20645 IX86_BUILTIN_RSQRTPS_NR256,
20646
20647 IX86_BUILTIN_RCPPS256,
20648
20649 IX86_BUILTIN_ROUNDPD256,
20650 IX86_BUILTIN_ROUNDPS256,
20651
20652 IX86_BUILTIN_UNPCKHPD256,
20653 IX86_BUILTIN_UNPCKLPD256,
20654 IX86_BUILTIN_UNPCKHPS256,
20655 IX86_BUILTIN_UNPCKLPS256,
20656
20657 IX86_BUILTIN_SI256_SI,
20658 IX86_BUILTIN_PS256_PS,
20659 IX86_BUILTIN_PD256_PD,
20660 IX86_BUILTIN_SI_SI256,
20661 IX86_BUILTIN_PS_PS256,
20662 IX86_BUILTIN_PD_PD256,
20663
20664 IX86_BUILTIN_VTESTZPD,
20665 IX86_BUILTIN_VTESTCPD,
20666 IX86_BUILTIN_VTESTNZCPD,
20667 IX86_BUILTIN_VTESTZPS,
20668 IX86_BUILTIN_VTESTCPS,
20669 IX86_BUILTIN_VTESTNZCPS,
20670 IX86_BUILTIN_VTESTZPD256,
20671 IX86_BUILTIN_VTESTCPD256,
20672 IX86_BUILTIN_VTESTNZCPD256,
20673 IX86_BUILTIN_VTESTZPS256,
20674 IX86_BUILTIN_VTESTCPS256,
20675 IX86_BUILTIN_VTESTNZCPS256,
20676 IX86_BUILTIN_PTESTZ256,
20677 IX86_BUILTIN_PTESTC256,
20678 IX86_BUILTIN_PTESTNZC256,
20679
20680 IX86_BUILTIN_MOVMSKPD256,
20681 IX86_BUILTIN_MOVMSKPS256,
20682
20683 /* TFmode support builtins. */
20684 IX86_BUILTIN_INFQ,
20685 IX86_BUILTIN_HUGE_VALQ,
20686 IX86_BUILTIN_FABSQ,
20687 IX86_BUILTIN_COPYSIGNQ,
20688
20689 /* SSE5 instructions */
20690 IX86_BUILTIN_FMADDSS,
20691 IX86_BUILTIN_FMADDSD,
20692 IX86_BUILTIN_FMADDPS,
20693 IX86_BUILTIN_FMADDPD,
20694 IX86_BUILTIN_FMSUBSS,
20695 IX86_BUILTIN_FMSUBSD,
20696 IX86_BUILTIN_FMSUBPS,
20697 IX86_BUILTIN_FMSUBPD,
20698 IX86_BUILTIN_FNMADDSS,
20699 IX86_BUILTIN_FNMADDSD,
20700 IX86_BUILTIN_FNMADDPS,
20701 IX86_BUILTIN_FNMADDPD,
20702 IX86_BUILTIN_FNMSUBSS,
20703 IX86_BUILTIN_FNMSUBSD,
20704 IX86_BUILTIN_FNMSUBPS,
20705 IX86_BUILTIN_FNMSUBPD,
20706 IX86_BUILTIN_PCMOV,
20707 IX86_BUILTIN_PCMOV_V2DI,
20708 IX86_BUILTIN_PCMOV_V4SI,
20709 IX86_BUILTIN_PCMOV_V8HI,
20710 IX86_BUILTIN_PCMOV_V16QI,
20711 IX86_BUILTIN_PCMOV_V4SF,
20712 IX86_BUILTIN_PCMOV_V2DF,
20713 IX86_BUILTIN_PPERM,
20714 IX86_BUILTIN_PERMPS,
20715 IX86_BUILTIN_PERMPD,
20716 IX86_BUILTIN_PMACSSWW,
20717 IX86_BUILTIN_PMACSWW,
20718 IX86_BUILTIN_PMACSSWD,
20719 IX86_BUILTIN_PMACSWD,
20720 IX86_BUILTIN_PMACSSDD,
20721 IX86_BUILTIN_PMACSDD,
20722 IX86_BUILTIN_PMACSSDQL,
20723 IX86_BUILTIN_PMACSSDQH,
20724 IX86_BUILTIN_PMACSDQL,
20725 IX86_BUILTIN_PMACSDQH,
20726 IX86_BUILTIN_PMADCSSWD,
20727 IX86_BUILTIN_PMADCSWD,
20728 IX86_BUILTIN_PHADDBW,
20729 IX86_BUILTIN_PHADDBD,
20730 IX86_BUILTIN_PHADDBQ,
20731 IX86_BUILTIN_PHADDWD,
20732 IX86_BUILTIN_PHADDWQ,
20733 IX86_BUILTIN_PHADDDQ,
20734 IX86_BUILTIN_PHADDUBW,
20735 IX86_BUILTIN_PHADDUBD,
20736 IX86_BUILTIN_PHADDUBQ,
20737 IX86_BUILTIN_PHADDUWD,
20738 IX86_BUILTIN_PHADDUWQ,
20739 IX86_BUILTIN_PHADDUDQ,
20740 IX86_BUILTIN_PHSUBBW,
20741 IX86_BUILTIN_PHSUBWD,
20742 IX86_BUILTIN_PHSUBDQ,
20743 IX86_BUILTIN_PROTB,
20744 IX86_BUILTIN_PROTW,
20745 IX86_BUILTIN_PROTD,
20746 IX86_BUILTIN_PROTQ,
20747 IX86_BUILTIN_PROTB_IMM,
20748 IX86_BUILTIN_PROTW_IMM,
20749 IX86_BUILTIN_PROTD_IMM,
20750 IX86_BUILTIN_PROTQ_IMM,
20751 IX86_BUILTIN_PSHLB,
20752 IX86_BUILTIN_PSHLW,
20753 IX86_BUILTIN_PSHLD,
20754 IX86_BUILTIN_PSHLQ,
20755 IX86_BUILTIN_PSHAB,
20756 IX86_BUILTIN_PSHAW,
20757 IX86_BUILTIN_PSHAD,
20758 IX86_BUILTIN_PSHAQ,
20759 IX86_BUILTIN_FRCZSS,
20760 IX86_BUILTIN_FRCZSD,
20761 IX86_BUILTIN_FRCZPS,
20762 IX86_BUILTIN_FRCZPD,
20763 IX86_BUILTIN_CVTPH2PS,
20764 IX86_BUILTIN_CVTPS2PH,
20765
20766 IX86_BUILTIN_COMEQSS,
20767 IX86_BUILTIN_COMNESS,
20768 IX86_BUILTIN_COMLTSS,
20769 IX86_BUILTIN_COMLESS,
20770 IX86_BUILTIN_COMGTSS,
20771 IX86_BUILTIN_COMGESS,
20772 IX86_BUILTIN_COMUEQSS,
20773 IX86_BUILTIN_COMUNESS,
20774 IX86_BUILTIN_COMULTSS,
20775 IX86_BUILTIN_COMULESS,
20776 IX86_BUILTIN_COMUGTSS,
20777 IX86_BUILTIN_COMUGESS,
20778 IX86_BUILTIN_COMORDSS,
20779 IX86_BUILTIN_COMUNORDSS,
20780 IX86_BUILTIN_COMFALSESS,
20781 IX86_BUILTIN_COMTRUESS,
20782
20783 IX86_BUILTIN_COMEQSD,
20784 IX86_BUILTIN_COMNESD,
20785 IX86_BUILTIN_COMLTSD,
20786 IX86_BUILTIN_COMLESD,
20787 IX86_BUILTIN_COMGTSD,
20788 IX86_BUILTIN_COMGESD,
20789 IX86_BUILTIN_COMUEQSD,
20790 IX86_BUILTIN_COMUNESD,
20791 IX86_BUILTIN_COMULTSD,
20792 IX86_BUILTIN_COMULESD,
20793 IX86_BUILTIN_COMUGTSD,
20794 IX86_BUILTIN_COMUGESD,
20795 IX86_BUILTIN_COMORDSD,
20796 IX86_BUILTIN_COMUNORDSD,
20797 IX86_BUILTIN_COMFALSESD,
20798 IX86_BUILTIN_COMTRUESD,
20799
20800 IX86_BUILTIN_COMEQPS,
20801 IX86_BUILTIN_COMNEPS,
20802 IX86_BUILTIN_COMLTPS,
20803 IX86_BUILTIN_COMLEPS,
20804 IX86_BUILTIN_COMGTPS,
20805 IX86_BUILTIN_COMGEPS,
20806 IX86_BUILTIN_COMUEQPS,
20807 IX86_BUILTIN_COMUNEPS,
20808 IX86_BUILTIN_COMULTPS,
20809 IX86_BUILTIN_COMULEPS,
20810 IX86_BUILTIN_COMUGTPS,
20811 IX86_BUILTIN_COMUGEPS,
20812 IX86_BUILTIN_COMORDPS,
20813 IX86_BUILTIN_COMUNORDPS,
20814 IX86_BUILTIN_COMFALSEPS,
20815 IX86_BUILTIN_COMTRUEPS,
20816
20817 IX86_BUILTIN_COMEQPD,
20818 IX86_BUILTIN_COMNEPD,
20819 IX86_BUILTIN_COMLTPD,
20820 IX86_BUILTIN_COMLEPD,
20821 IX86_BUILTIN_COMGTPD,
20822 IX86_BUILTIN_COMGEPD,
20823 IX86_BUILTIN_COMUEQPD,
20824 IX86_BUILTIN_COMUNEPD,
20825 IX86_BUILTIN_COMULTPD,
20826 IX86_BUILTIN_COMULEPD,
20827 IX86_BUILTIN_COMUGTPD,
20828 IX86_BUILTIN_COMUGEPD,
20829 IX86_BUILTIN_COMORDPD,
20830 IX86_BUILTIN_COMUNORDPD,
20831 IX86_BUILTIN_COMFALSEPD,
20832 IX86_BUILTIN_COMTRUEPD,
20833
20834 IX86_BUILTIN_PCOMEQUB,
20835 IX86_BUILTIN_PCOMNEUB,
20836 IX86_BUILTIN_PCOMLTUB,
20837 IX86_BUILTIN_PCOMLEUB,
20838 IX86_BUILTIN_PCOMGTUB,
20839 IX86_BUILTIN_PCOMGEUB,
20840 IX86_BUILTIN_PCOMFALSEUB,
20841 IX86_BUILTIN_PCOMTRUEUB,
20842 IX86_BUILTIN_PCOMEQUW,
20843 IX86_BUILTIN_PCOMNEUW,
20844 IX86_BUILTIN_PCOMLTUW,
20845 IX86_BUILTIN_PCOMLEUW,
20846 IX86_BUILTIN_PCOMGTUW,
20847 IX86_BUILTIN_PCOMGEUW,
20848 IX86_BUILTIN_PCOMFALSEUW,
20849 IX86_BUILTIN_PCOMTRUEUW,
20850 IX86_BUILTIN_PCOMEQUD,
20851 IX86_BUILTIN_PCOMNEUD,
20852 IX86_BUILTIN_PCOMLTUD,
20853 IX86_BUILTIN_PCOMLEUD,
20854 IX86_BUILTIN_PCOMGTUD,
20855 IX86_BUILTIN_PCOMGEUD,
20856 IX86_BUILTIN_PCOMFALSEUD,
20857 IX86_BUILTIN_PCOMTRUEUD,
20858 IX86_BUILTIN_PCOMEQUQ,
20859 IX86_BUILTIN_PCOMNEUQ,
20860 IX86_BUILTIN_PCOMLTUQ,
20861 IX86_BUILTIN_PCOMLEUQ,
20862 IX86_BUILTIN_PCOMGTUQ,
20863 IX86_BUILTIN_PCOMGEUQ,
20864 IX86_BUILTIN_PCOMFALSEUQ,
20865 IX86_BUILTIN_PCOMTRUEUQ,
20866
20867 IX86_BUILTIN_PCOMEQB,
20868 IX86_BUILTIN_PCOMNEB,
20869 IX86_BUILTIN_PCOMLTB,
20870 IX86_BUILTIN_PCOMLEB,
20871 IX86_BUILTIN_PCOMGTB,
20872 IX86_BUILTIN_PCOMGEB,
20873 IX86_BUILTIN_PCOMFALSEB,
20874 IX86_BUILTIN_PCOMTRUEB,
20875 IX86_BUILTIN_PCOMEQW,
20876 IX86_BUILTIN_PCOMNEW,
20877 IX86_BUILTIN_PCOMLTW,
20878 IX86_BUILTIN_PCOMLEW,
20879 IX86_BUILTIN_PCOMGTW,
20880 IX86_BUILTIN_PCOMGEW,
20881 IX86_BUILTIN_PCOMFALSEW,
20882 IX86_BUILTIN_PCOMTRUEW,
20883 IX86_BUILTIN_PCOMEQD,
20884 IX86_BUILTIN_PCOMNED,
20885 IX86_BUILTIN_PCOMLTD,
20886 IX86_BUILTIN_PCOMLED,
20887 IX86_BUILTIN_PCOMGTD,
20888 IX86_BUILTIN_PCOMGED,
20889 IX86_BUILTIN_PCOMFALSED,
20890 IX86_BUILTIN_PCOMTRUED,
20891 IX86_BUILTIN_PCOMEQQ,
20892 IX86_BUILTIN_PCOMNEQ,
20893 IX86_BUILTIN_PCOMLTQ,
20894 IX86_BUILTIN_PCOMLEQ,
20895 IX86_BUILTIN_PCOMGTQ,
20896 IX86_BUILTIN_PCOMGEQ,
20897 IX86_BUILTIN_PCOMFALSEQ,
20898 IX86_BUILTIN_PCOMTRUEQ,
20899
20900 IX86_BUILTIN_MAX
20901 };
20902
20903 /* Table for the ix86 builtin decls. */
20904 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
20905
20906 /* Table of all of the builtin functions that are possible with different ISA's
20907 but are waiting to be built until a function is declared to use that
20908 ISA. */
20909 struct GTY(()) builtin_isa {
20910 tree type; /* builtin type to use in the declaration */
20911 const char *name; /* function name */
20912 int isa; /* isa_flags this builtin is defined for */
20913 bool const_p; /* true if the declaration is constant */
20914 };
20915
20916 static GTY(()) struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
20917
20918
20919 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
20920 * of which isa_flags to use in the ix86_builtins_isa array. Stores the
20921 * function decl in the ix86_builtins array. Returns the function decl or
20922 * NULL_TREE, if the builtin was not added.
20923 *
20924 * If the front end has a special hook for builtin functions, delay adding
20925 * builtin functions that aren't in the current ISA until the ISA is changed
20926 * with function specific optimization. Doing so, can save about 300K for the
20927 * default compiler. When the builtin is expanded, check at that time whether
20928 * it is valid.
20929 *
20930 * If the front end doesn't have a special hook, record all builtins, even if
20931 * it isn't an instruction set in the current ISA in case the user uses
20932 * function specific options for a different ISA, so that we don't get scope
20933 * errors if a builtin is added in the middle of a function scope. */
20934
20935 static inline tree
20936 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
20937 {
20938 tree decl = NULL_TREE;
20939
20940 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
20941 {
20942 ix86_builtins_isa[(int) code].isa = mask;
20943
20944 if ((mask & ix86_isa_flags) != 0
20945 || (lang_hooks.builtin_function
20946 == lang_hooks.builtin_function_ext_scope))
20947
20948 {
20949 decl = add_builtin_function (name, type, code, BUILT_IN_MD, NULL,
20950 NULL_TREE);
20951 ix86_builtins[(int) code] = decl;
20952 ix86_builtins_isa[(int) code].type = NULL_TREE;
20953 }
20954 else
20955 {
20956 ix86_builtins[(int) code] = NULL_TREE;
20957 ix86_builtins_isa[(int) code].const_p = false;
20958 ix86_builtins_isa[(int) code].type = type;
20959 ix86_builtins_isa[(int) code].name = name;
20960 }
20961 }
20962
20963 return decl;
20964 }
20965
20966 /* Like def_builtin, but also marks the function decl "const". */
20967
20968 static inline tree
20969 def_builtin_const (int mask, const char *name, tree type,
20970 enum ix86_builtins code)
20971 {
20972 tree decl = def_builtin (mask, name, type, code);
20973 if (decl)
20974 TREE_READONLY (decl) = 1;
20975 else
20976 ix86_builtins_isa[(int) code].const_p = true;
20977
20978 return decl;
20979 }
20980
20981 /* Add any new builtin functions for a given ISA that may not have been
20982 declared. This saves a bit of space compared to adding all of the
20983 declarations to the tree, even if we didn't use them. */
20984
20985 static void
20986 ix86_add_new_builtins (int isa)
20987 {
20988 int i;
20989 tree decl;
20990
20991 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
20992 {
20993 if ((ix86_builtins_isa[i].isa & isa) != 0
20994 && ix86_builtins_isa[i].type != NULL_TREE)
20995 {
20996 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
20997 ix86_builtins_isa[i].type,
20998 i, BUILT_IN_MD, NULL,
20999 NULL_TREE);
21000
21001 ix86_builtins[i] = decl;
21002 ix86_builtins_isa[i].type = NULL_TREE;
21003 if (ix86_builtins_isa[i].const_p)
21004 TREE_READONLY (decl) = 1;
21005 }
21006 }
21007 }
21008
21009 /* Bits for builtin_description.flag. */
21010
21011 /* Set when we don't support the comparison natively, and should
21012 swap_comparison in order to support it. */
21013 #define BUILTIN_DESC_SWAP_OPERANDS 1
21014
21015 struct builtin_description
21016 {
21017 const unsigned int mask;
21018 const enum insn_code icode;
21019 const char *const name;
21020 const enum ix86_builtins code;
21021 const enum rtx_code comparison;
21022 const int flag;
21023 };
21024
21025 static const struct builtin_description bdesc_comi[] =
21026 {
21027 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
21028 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
21029 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
21030 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
21031 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
21032 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
21033 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
21034 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
21035 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
21036 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
21037 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
21038 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
21039 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
21040 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
21041 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
21042 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
21043 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
21044 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
21045 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
21046 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
21047 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
21048 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
21049 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
21050 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
21051 };
21052
21053 static const struct builtin_description bdesc_pcmpestr[] =
21054 {
21055 /* SSE4.2 */
21056 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
21057 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
21058 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
21059 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
21060 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
21061 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
21062 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
21063 };
21064
21065 static const struct builtin_description bdesc_pcmpistr[] =
21066 {
21067 /* SSE4.2 */
21068 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
21069 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
21070 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
21071 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
21072 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
21073 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
21074 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
21075 };
21076
21077 /* Special builtin types */
21078 enum ix86_special_builtin_type
21079 {
21080 SPECIAL_FTYPE_UNKNOWN,
21081 VOID_FTYPE_VOID,
21082 V32QI_FTYPE_PCCHAR,
21083 V16QI_FTYPE_PCCHAR,
21084 V8SF_FTYPE_PCV4SF,
21085 V8SF_FTYPE_PCFLOAT,
21086 V4DF_FTYPE_PCV2DF,
21087 V4DF_FTYPE_PCDOUBLE,
21088 V4SF_FTYPE_PCFLOAT,
21089 V2DF_FTYPE_PCDOUBLE,
21090 V8SF_FTYPE_PCV8SF_V8SF,
21091 V4DF_FTYPE_PCV4DF_V4DF,
21092 V4SF_FTYPE_V4SF_PCV2SF,
21093 V4SF_FTYPE_PCV4SF_V4SF,
21094 V2DF_FTYPE_V2DF_PCDOUBLE,
21095 V2DF_FTYPE_PCV2DF_V2DF,
21096 V2DI_FTYPE_PV2DI,
21097 VOID_FTYPE_PV2SF_V4SF,
21098 VOID_FTYPE_PV4DI_V4DI,
21099 VOID_FTYPE_PV2DI_V2DI,
21100 VOID_FTYPE_PCHAR_V32QI,
21101 VOID_FTYPE_PCHAR_V16QI,
21102 VOID_FTYPE_PFLOAT_V8SF,
21103 VOID_FTYPE_PFLOAT_V4SF,
21104 VOID_FTYPE_PDOUBLE_V4DF,
21105 VOID_FTYPE_PDOUBLE_V2DF,
21106 VOID_FTYPE_PDI_DI,
21107 VOID_FTYPE_PINT_INT,
21108 VOID_FTYPE_PV8SF_V8SF_V8SF,
21109 VOID_FTYPE_PV4DF_V4DF_V4DF,
21110 VOID_FTYPE_PV4SF_V4SF_V4SF,
21111 VOID_FTYPE_PV2DF_V2DF_V2DF
21112 };
21113
21114 /* Builtin types */
21115 enum ix86_builtin_type
21116 {
21117 FTYPE_UNKNOWN,
21118 FLOAT128_FTYPE_FLOAT128,
21119 FLOAT_FTYPE_FLOAT,
21120 FLOAT128_FTYPE_FLOAT128_FLOAT128,
21121 INT_FTYPE_V8SF_V8SF_PTEST,
21122 INT_FTYPE_V4DI_V4DI_PTEST,
21123 INT_FTYPE_V4DF_V4DF_PTEST,
21124 INT_FTYPE_V4SF_V4SF_PTEST,
21125 INT_FTYPE_V2DI_V2DI_PTEST,
21126 INT_FTYPE_V2DF_V2DF_PTEST,
21127 INT64_FTYPE_V4SF,
21128 INT64_FTYPE_V2DF,
21129 INT_FTYPE_V16QI,
21130 INT_FTYPE_V8QI,
21131 INT_FTYPE_V8SF,
21132 INT_FTYPE_V4DF,
21133 INT_FTYPE_V4SF,
21134 INT_FTYPE_V2DF,
21135 V16QI_FTYPE_V16QI,
21136 V8SI_FTYPE_V8SF,
21137 V8SI_FTYPE_V4SI,
21138 V8HI_FTYPE_V8HI,
21139 V8HI_FTYPE_V16QI,
21140 V8QI_FTYPE_V8QI,
21141 V8SF_FTYPE_V8SF,
21142 V8SF_FTYPE_V8SI,
21143 V8SF_FTYPE_V4SF,
21144 V4SI_FTYPE_V4SI,
21145 V4SI_FTYPE_V16QI,
21146 V4SI_FTYPE_V8SI,
21147 V4SI_FTYPE_V8HI,
21148 V4SI_FTYPE_V4DF,
21149 V4SI_FTYPE_V4SF,
21150 V4SI_FTYPE_V2DF,
21151 V4HI_FTYPE_V4HI,
21152 V4DF_FTYPE_V4DF,
21153 V4DF_FTYPE_V4SI,
21154 V4DF_FTYPE_V4SF,
21155 V4DF_FTYPE_V2DF,
21156 V4SF_FTYPE_V4DF,
21157 V4SF_FTYPE_V4SF,
21158 V4SF_FTYPE_V4SF_VEC_MERGE,
21159 V4SF_FTYPE_V8SF,
21160 V4SF_FTYPE_V4SI,
21161 V4SF_FTYPE_V2DF,
21162 V2DI_FTYPE_V2DI,
21163 V2DI_FTYPE_V16QI,
21164 V2DI_FTYPE_V8HI,
21165 V2DI_FTYPE_V4SI,
21166 V2DF_FTYPE_V2DF,
21167 V2DF_FTYPE_V2DF_VEC_MERGE,
21168 V2DF_FTYPE_V4SI,
21169 V2DF_FTYPE_V4DF,
21170 V2DF_FTYPE_V4SF,
21171 V2DF_FTYPE_V2SI,
21172 V2SI_FTYPE_V2SI,
21173 V2SI_FTYPE_V4SF,
21174 V2SI_FTYPE_V2SF,
21175 V2SI_FTYPE_V2DF,
21176 V2SF_FTYPE_V2SF,
21177 V2SF_FTYPE_V2SI,
21178 V16QI_FTYPE_V16QI_V16QI,
21179 V16QI_FTYPE_V8HI_V8HI,
21180 V8QI_FTYPE_V8QI_V8QI,
21181 V8QI_FTYPE_V4HI_V4HI,
21182 V8HI_FTYPE_V8HI_V8HI,
21183 V8HI_FTYPE_V8HI_V8HI_COUNT,
21184 V8HI_FTYPE_V16QI_V16QI,
21185 V8HI_FTYPE_V4SI_V4SI,
21186 V8HI_FTYPE_V8HI_SI_COUNT,
21187 V8SF_FTYPE_V8SF_V8SF,
21188 V8SF_FTYPE_V8SF_V8SI,
21189 V4SI_FTYPE_V4SI_V4SI,
21190 V4SI_FTYPE_V4SI_V4SI_COUNT,
21191 V4SI_FTYPE_V8HI_V8HI,
21192 V4SI_FTYPE_V4SF_V4SF,
21193 V4SI_FTYPE_V2DF_V2DF,
21194 V4SI_FTYPE_V4SI_SI_COUNT,
21195 V4HI_FTYPE_V4HI_V4HI,
21196 V4HI_FTYPE_V4HI_V4HI_COUNT,
21197 V4HI_FTYPE_V8QI_V8QI,
21198 V4HI_FTYPE_V2SI_V2SI,
21199 V4HI_FTYPE_V4HI_SI_COUNT,
21200 V4DF_FTYPE_V4DF_V4DF,
21201 V4DF_FTYPE_V4DF_V4DI,
21202 V4SF_FTYPE_V4SF_V4SF,
21203 V4SF_FTYPE_V4SF_V4SF_SWAP,
21204 V4SF_FTYPE_V4SF_V4SI,
21205 V4SF_FTYPE_V4SF_V2SI,
21206 V4SF_FTYPE_V4SF_V2DF,
21207 V4SF_FTYPE_V4SF_DI,
21208 V4SF_FTYPE_V4SF_SI,
21209 V2DI_FTYPE_V2DI_V2DI,
21210 V2DI_FTYPE_V2DI_V2DI_COUNT,
21211 V2DI_FTYPE_V16QI_V16QI,
21212 V2DI_FTYPE_V4SI_V4SI,
21213 V2DI_FTYPE_V2DI_V16QI,
21214 V2DI_FTYPE_V2DF_V2DF,
21215 V2DI_FTYPE_V2DI_SI_COUNT,
21216 V2SI_FTYPE_V2SI_V2SI,
21217 V2SI_FTYPE_V2SI_V2SI_COUNT,
21218 V2SI_FTYPE_V4HI_V4HI,
21219 V2SI_FTYPE_V2SF_V2SF,
21220 V2SI_FTYPE_V2SI_SI_COUNT,
21221 V2DF_FTYPE_V2DF_V2DF,
21222 V2DF_FTYPE_V2DF_V2DF_SWAP,
21223 V2DF_FTYPE_V2DF_V4SF,
21224 V2DF_FTYPE_V2DF_V2DI,
21225 V2DF_FTYPE_V2DF_DI,
21226 V2DF_FTYPE_V2DF_SI,
21227 V2SF_FTYPE_V2SF_V2SF,
21228 V1DI_FTYPE_V1DI_V1DI,
21229 V1DI_FTYPE_V1DI_V1DI_COUNT,
21230 V1DI_FTYPE_V8QI_V8QI,
21231 V1DI_FTYPE_V2SI_V2SI,
21232 V1DI_FTYPE_V1DI_SI_COUNT,
21233 UINT64_FTYPE_UINT64_UINT64,
21234 UINT_FTYPE_UINT_UINT,
21235 UINT_FTYPE_UINT_USHORT,
21236 UINT_FTYPE_UINT_UCHAR,
21237 V8HI_FTYPE_V8HI_INT,
21238 V4SI_FTYPE_V4SI_INT,
21239 V4HI_FTYPE_V4HI_INT,
21240 V8SF_FTYPE_V8SF_INT,
21241 V4SI_FTYPE_V8SI_INT,
21242 V4SF_FTYPE_V8SF_INT,
21243 V2DF_FTYPE_V4DF_INT,
21244 V4DF_FTYPE_V4DF_INT,
21245 V4SF_FTYPE_V4SF_INT,
21246 V2DI_FTYPE_V2DI_INT,
21247 V2DI2TI_FTYPE_V2DI_INT,
21248 V2DF_FTYPE_V2DF_INT,
21249 V16QI_FTYPE_V16QI_V16QI_V16QI,
21250 V8SF_FTYPE_V8SF_V8SF_V8SF,
21251 V4DF_FTYPE_V4DF_V4DF_V4DF,
21252 V4SF_FTYPE_V4SF_V4SF_V4SF,
21253 V2DF_FTYPE_V2DF_V2DF_V2DF,
21254 V16QI_FTYPE_V16QI_V16QI_INT,
21255 V8SI_FTYPE_V8SI_V8SI_INT,
21256 V8SI_FTYPE_V8SI_V4SI_INT,
21257 V8HI_FTYPE_V8HI_V8HI_INT,
21258 V8SF_FTYPE_V8SF_V8SF_INT,
21259 V8SF_FTYPE_V8SF_V4SF_INT,
21260 V4SI_FTYPE_V4SI_V4SI_INT,
21261 V4DF_FTYPE_V4DF_V4DF_INT,
21262 V4DF_FTYPE_V4DF_V2DF_INT,
21263 V4SF_FTYPE_V4SF_V4SF_INT,
21264 V2DI_FTYPE_V2DI_V2DI_INT,
21265 V2DI2TI_FTYPE_V2DI_V2DI_INT,
21266 V1DI2DI_FTYPE_V1DI_V1DI_INT,
21267 V2DF_FTYPE_V2DF_V2DF_INT,
21268 V2DI_FTYPE_V2DI_UINT_UINT,
21269 V2DI_FTYPE_V2DI_V2DI_UINT_UINT
21270 };
21271
21272 /* Special builtins with variable number of arguments. */
21273 static const struct builtin_description bdesc_special_args[] =
21274 {
21275 /* MMX */
21276 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21277
21278 /* 3DNow! */
21279 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21280
21281 /* SSE */
21282 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21283 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21284 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21285
21286 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21287 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21288 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21289 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21290
21291 /* SSE or 3DNow!A */
21292 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21293 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
21294
21295 /* SSE2 */
21296 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21297 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21298 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21299 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
21300 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21301 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
21302 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
21303 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
21304 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21305
21306 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21307 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21308
21309 /* SSE3 */
21310 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21311
21312 /* SSE4.1 */
21313 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
21314
21315 /* SSE4A */
21316 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21317 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21318
21319 /* AVX */
21320 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
21321 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, 0, IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
21322 { OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_64BIT, CODE_FOR_avx_vzeroupper_rex64, 0, IX86_BUILTIN_VZEROUPPER_REX64, UNKNOWN, (int) VOID_FTYPE_VOID },
21323
21324 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21325 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastsd256, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21326 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss256, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21327 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_pd256, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
21328 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_ps256, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
21329
21330 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21331 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21332 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21333 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21334 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21335 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
21336 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21337
21338 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
21339 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21340 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21341
21342 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
21343 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
21344 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
21345 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
21346 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
21347 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
21348 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
21349 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
21350 };
21351
21352 /* Builtins with variable number of arguments. */
21353 static const struct builtin_description bdesc_args[] =
21354 {
21355 /* MMX */
21356 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21357 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21358 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21359 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21360 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21361 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21362
21363 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21364 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21365 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21366 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21367 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21368 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21369 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21370 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21371
21372 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21373 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21374
21375 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21376 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21377 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21378 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21379
21380 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21381 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21382 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21383 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21384 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21385 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21386
21387 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21388 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21389 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21390 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21391 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
21392 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
21393
21394 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21395 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
21396 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21397
21398 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
21399
21400 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21401 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21402 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21403 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21404 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21405 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21406
21407 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21408 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21409 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21410 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21411 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21412 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21413
21414 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21415 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21416 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21417 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21418
21419 /* 3DNow! */
21420 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21421 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21422 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21423 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21424
21425 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21426 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21427 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21428 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21429 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21430 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21431 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21432 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21433 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21434 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21435 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21436 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21437 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21438 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21439 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21440
21441 /* 3DNow!A */
21442 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21443 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21444 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21445 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21446 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21447 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21448
21449 /* SSE */
21450 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
21451 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21452 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21453 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21454 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21455 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21456 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21457 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21458 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21459 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21460 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21461 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21462
21463 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21464
21465 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21466 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21467 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21468 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21469 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21470 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21471 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21472 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21473
21474 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21475 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21476 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21477 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21478 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21479 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21480 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21481 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21482 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21483 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21484 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
21485 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21486 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21487 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21488 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21489 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21490 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21491 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21492 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21493 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21494 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21495 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21496
21497 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21498 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21499 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21500 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21501
21502 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21503 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21504 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21505 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21506
21507 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21508 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21509 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21510 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21511 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21512
21513 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
21514 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
21515 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
21516
21517 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
21518
21519 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21520 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21521 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21522
21523 /* SSE MMX or 3Dnow!A */
21524 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21525 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21526 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21527
21528 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21529 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21530 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21531 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21532
21533 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
21534 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
21535
21536 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
21537
21538 /* SSE2 */
21539 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21540
21541 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
21542 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
21543 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
21544 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
21545 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
21546
21547 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21548 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21549 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
21550 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21551 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21552
21553 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
21554
21555 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21556 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21557 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21558 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21559
21560 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21561 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
21562 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21563
21564 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21565 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21566 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21567 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21568 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21569 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21570 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21571 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21572
21573 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21574 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21575 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21576 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21577 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
21578 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21579 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21580 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21581 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21582 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21583 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21584 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21585 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21586 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21587 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21588 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21589 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21590 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21591 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21592 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21593
21594 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21595 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21596 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21597 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21598
21599 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21600 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21601 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21602 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21603
21604 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21605 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21606 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21607
21608 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
21609
21610 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21611 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21612 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21613 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21614 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21615 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21616 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21617 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21618
21619 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21620 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21621 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21622 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21623 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21624 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21625 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21626 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21627
21628 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21629 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
21630
21631 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21632 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21633 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21634 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21635
21636 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21637 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21638
21639 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21640 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21641 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21642 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21643 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21644 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21645
21646 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21647 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21648 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21649 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21650
21651 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21652 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21653 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21654 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21655 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21656 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21657 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21658 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21659
21660 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21661 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21662 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21663
21664 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21665 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
21666
21667 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
21668 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21669
21670 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
21671
21672 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
21673 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
21674 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
21675 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
21676
21677 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21678 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21679 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21680 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21681 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21682 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21683 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21684
21685 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21686 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21687 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21688 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21689 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21690 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21691 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21692
21693 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21694 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21695 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21696 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21697
21698 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
21699 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21700 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21701
21702 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
21703
21704 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
21705 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
21706
21707 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21708
21709 /* SSE2 MMX */
21710 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21711 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21712
21713 /* SSE3 */
21714 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
21715 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21716
21717 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21718 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21719 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21720 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21721 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21722 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21723
21724 /* SSSE3 */
21725 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
21726 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
21727 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21728 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
21729 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
21730 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21731
21732 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21733 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21734 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21735 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21736 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21737 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21738 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21739 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21740 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21741 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21742 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21743 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21744 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
21745 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
21746 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21747 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21748 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21749 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21750 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21751 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21752 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21753 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21754 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21755 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21756
21757 /* SSSE3. */
21758 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT },
21759 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT },
21760
21761 /* SSE4.1 */
21762 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21763 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21764 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
21765 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
21766 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21767 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21768 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21769 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
21770 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
21771 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
21772
21773 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21774 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21775 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21776 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21777 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21778 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21779 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21780 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21781 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21782 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21783 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21784 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21785 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21786
21787 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21788 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21789 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21790 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21791 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21792 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21793 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21794 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21795 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21796 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21797 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21798 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21799
21800 /* SSE4.1 and SSE5 */
21801 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
21802 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
21803 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21804 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21805
21806 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21807 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21808 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21809
21810 /* SSE4.2 */
21811 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21812 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
21813 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
21814 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
21815 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
21816
21817 /* SSE4A */
21818 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
21819 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
21820 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
21821 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21822
21823 /* AES */
21824 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
21825 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21826
21827 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21828 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21829 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21830 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21831
21832 /* PCLMUL */
21833 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
21834
21835 /* AVX */
21836 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21837 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21838 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21839 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21840 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21841 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21842 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21843 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21844 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21845 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21846 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21847 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21848 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21849 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21850 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21851 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21852 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21853 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21854 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21855 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21856 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21857 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21858 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21859 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21860 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21861 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21862
21863 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
21864 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
21865 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
21866 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
21867
21868 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21869 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21870 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
21871 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
21872 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21873 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21874 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21875 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21876 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21877 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21878 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21879 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21880 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21881 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
21882 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
21883 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
21884 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
21885 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
21886 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
21887 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
21888 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
21889 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
21890 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
21891 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
21892 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21893 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21894 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
21895 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
21896 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
21897 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
21898 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
21899 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
21900 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
21901 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
21902
21903 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21904 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21905 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
21906
21907 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
21908 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21909 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21910 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21911 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21912
21913 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21914
21915 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
21916 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
21917
21918 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21919 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21920 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21921 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21922
21923 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
21924 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
21925 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
21926 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si_si256, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
21927 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps_ps256, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
21928 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd_pd256, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
21929
21930 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21931 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21932 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21933 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21934 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21935 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21936 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21937 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21938 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21939 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21940 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21941 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21942 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21943 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21944 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21945
21946 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
21947 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
21948 };
21949
21950 /* SSE5 */
21951 enum multi_arg_type {
21952 MULTI_ARG_UNKNOWN,
21953 MULTI_ARG_3_SF,
21954 MULTI_ARG_3_DF,
21955 MULTI_ARG_3_DI,
21956 MULTI_ARG_3_SI,
21957 MULTI_ARG_3_SI_DI,
21958 MULTI_ARG_3_HI,
21959 MULTI_ARG_3_HI_SI,
21960 MULTI_ARG_3_QI,
21961 MULTI_ARG_3_PERMPS,
21962 MULTI_ARG_3_PERMPD,
21963 MULTI_ARG_2_SF,
21964 MULTI_ARG_2_DF,
21965 MULTI_ARG_2_DI,
21966 MULTI_ARG_2_SI,
21967 MULTI_ARG_2_HI,
21968 MULTI_ARG_2_QI,
21969 MULTI_ARG_2_DI_IMM,
21970 MULTI_ARG_2_SI_IMM,
21971 MULTI_ARG_2_HI_IMM,
21972 MULTI_ARG_2_QI_IMM,
21973 MULTI_ARG_2_SF_CMP,
21974 MULTI_ARG_2_DF_CMP,
21975 MULTI_ARG_2_DI_CMP,
21976 MULTI_ARG_2_SI_CMP,
21977 MULTI_ARG_2_HI_CMP,
21978 MULTI_ARG_2_QI_CMP,
21979 MULTI_ARG_2_DI_TF,
21980 MULTI_ARG_2_SI_TF,
21981 MULTI_ARG_2_HI_TF,
21982 MULTI_ARG_2_QI_TF,
21983 MULTI_ARG_2_SF_TF,
21984 MULTI_ARG_2_DF_TF,
21985 MULTI_ARG_1_SF,
21986 MULTI_ARG_1_DF,
21987 MULTI_ARG_1_DI,
21988 MULTI_ARG_1_SI,
21989 MULTI_ARG_1_HI,
21990 MULTI_ARG_1_QI,
21991 MULTI_ARG_1_SI_DI,
21992 MULTI_ARG_1_HI_DI,
21993 MULTI_ARG_1_HI_SI,
21994 MULTI_ARG_1_QI_DI,
21995 MULTI_ARG_1_QI_SI,
21996 MULTI_ARG_1_QI_HI,
21997 MULTI_ARG_1_PH2PS,
21998 MULTI_ARG_1_PS2PH
21999 };
22000
22001 static const struct builtin_description bdesc_multi_arg[] =
22002 {
22003 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22004 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22005 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22006 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22007 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22008 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22009 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22010 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22011 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22012 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22013 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22014 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22015 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22016 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22017 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22018 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22019 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
22020 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
22021 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
22022 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
22023 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
22024 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
22025 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
22026 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
22027 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, UNKNOWN, (int)MULTI_ARG_3_PERMPS },
22028 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, UNKNOWN, (int)MULTI_ARG_3_PERMPD },
22029 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
22030 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
22031 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22032 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22033 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
22034 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
22035 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22036 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22037 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22038 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22039 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22040 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22041 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
22042 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
22043 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
22044 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
22045 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
22046 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
22047 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
22048 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
22049 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
22050 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
22051 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
22052 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
22053 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
22054 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
22055 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
22056 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
22057 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
22058 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
22059 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
22060 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
22061 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int)MULTI_ARG_1_PH2PS },
22062 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int)MULTI_ARG_1_PS2PH },
22063 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
22064 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
22065 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
22066 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
22067 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
22068 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
22069 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
22070 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
22071 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
22072 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
22073 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
22074 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
22075 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
22076 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
22077 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
22078
22079 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
22080 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
22081 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
22082 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
22083 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
22084 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
22085 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
22086 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
22087 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22088 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22089 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
22090 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
22091 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
22092 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
22093 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
22094 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
22095
22096 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
22097 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
22098 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
22099 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
22100 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
22101 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
22102 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
22103 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
22104 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22105 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22106 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
22107 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
22108 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
22109 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
22110 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
22111 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
22112
22113 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
22114 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
22115 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
22116 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
22117 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
22118 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
22119 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
22120 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
22121 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22122 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22123 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
22124 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
22125 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
22126 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
22127 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
22128 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
22129
22130 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
22131 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
22132 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
22133 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
22134 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
22135 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
22136 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
22137 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
22138 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22139 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22140 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
22141 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
22142 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
22143 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
22144 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
22145 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
22146
22147 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
22148 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
22149 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
22150 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
22151 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
22152 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
22153 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
22154
22155 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
22156 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
22157 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
22158 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
22159 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
22160 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
22161 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
22162
22163 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
22164 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
22165 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
22166 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
22167 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
22168 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
22169 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
22170
22171 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
22172 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
22173 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
22174 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
22175 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
22176 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
22177 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
22178
22179 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
22180 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
22181 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
22182 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
22183 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
22184 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
22185 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
22186
22187 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
22188 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
22189 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
22190 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
22191 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
22192 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
22193 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
22194
22195 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
22196 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
22197 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
22198 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
22199 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
22200 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
22201 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
22202
22203 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
22204 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
22205 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
22206 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
22207 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
22208 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
22209 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
22210
22211 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, (enum rtx_code) COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
22212 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, (enum rtx_code) COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
22213 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, (enum rtx_code) COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
22214 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, (enum rtx_code) COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
22215 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, (enum rtx_code) COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
22216 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, (enum rtx_code) COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
22217 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, (enum rtx_code) COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
22218 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, (enum rtx_code) COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
22219
22220 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
22221 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
22222 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
22223 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
22224 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
22225 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
22226 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
22227 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
22228
22229 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
22230 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
22231 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
22232 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
22233 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
22234 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
22235 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
22236 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
22237 };
22238
22239 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
22240 in the current target ISA to allow the user to compile particular modules
22241 with different target specific options that differ from the command line
22242 options. */
22243 static void
22244 ix86_init_mmx_sse_builtins (void)
22245 {
22246 const struct builtin_description * d;
22247 size_t i;
22248
22249 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
22250 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
22251 tree V1DI_type_node
22252 = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
22253 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
22254 tree V2DI_type_node
22255 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
22256 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
22257 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
22258 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
22259 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
22260 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
22261 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
22262
22263 tree pchar_type_node = build_pointer_type (char_type_node);
22264 tree pcchar_type_node
22265 = build_pointer_type (build_type_variant (char_type_node, 1, 0));
22266 tree pfloat_type_node = build_pointer_type (float_type_node);
22267 tree pcfloat_type_node
22268 = build_pointer_type (build_type_variant (float_type_node, 1, 0));
22269 tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
22270 tree pcv2sf_type_node
22271 = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
22272 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
22273 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
22274
22275 /* Comparisons. */
22276 tree int_ftype_v4sf_v4sf
22277 = build_function_type_list (integer_type_node,
22278 V4SF_type_node, V4SF_type_node, NULL_TREE);
22279 tree v4si_ftype_v4sf_v4sf
22280 = build_function_type_list (V4SI_type_node,
22281 V4SF_type_node, V4SF_type_node, NULL_TREE);
22282 /* MMX/SSE/integer conversions. */
22283 tree int_ftype_v4sf
22284 = build_function_type_list (integer_type_node,
22285 V4SF_type_node, NULL_TREE);
22286 tree int64_ftype_v4sf
22287 = build_function_type_list (long_long_integer_type_node,
22288 V4SF_type_node, NULL_TREE);
22289 tree int_ftype_v8qi
22290 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
22291 tree v4sf_ftype_v4sf_int
22292 = build_function_type_list (V4SF_type_node,
22293 V4SF_type_node, integer_type_node, NULL_TREE);
22294 tree v4sf_ftype_v4sf_int64
22295 = build_function_type_list (V4SF_type_node,
22296 V4SF_type_node, long_long_integer_type_node,
22297 NULL_TREE);
22298 tree v4sf_ftype_v4sf_v2si
22299 = build_function_type_list (V4SF_type_node,
22300 V4SF_type_node, V2SI_type_node, NULL_TREE);
22301
22302 /* Miscellaneous. */
22303 tree v8qi_ftype_v4hi_v4hi
22304 = build_function_type_list (V8QI_type_node,
22305 V4HI_type_node, V4HI_type_node, NULL_TREE);
22306 tree v4hi_ftype_v2si_v2si
22307 = build_function_type_list (V4HI_type_node,
22308 V2SI_type_node, V2SI_type_node, NULL_TREE);
22309 tree v4sf_ftype_v4sf_v4sf_int
22310 = build_function_type_list (V4SF_type_node,
22311 V4SF_type_node, V4SF_type_node,
22312 integer_type_node, NULL_TREE);
22313 tree v2si_ftype_v4hi_v4hi
22314 = build_function_type_list (V2SI_type_node,
22315 V4HI_type_node, V4HI_type_node, NULL_TREE);
22316 tree v4hi_ftype_v4hi_int
22317 = build_function_type_list (V4HI_type_node,
22318 V4HI_type_node, integer_type_node, NULL_TREE);
22319 tree v2si_ftype_v2si_int
22320 = build_function_type_list (V2SI_type_node,
22321 V2SI_type_node, integer_type_node, NULL_TREE);
22322 tree v1di_ftype_v1di_int
22323 = build_function_type_list (V1DI_type_node,
22324 V1DI_type_node, integer_type_node, NULL_TREE);
22325
22326 tree void_ftype_void
22327 = build_function_type (void_type_node, void_list_node);
22328 tree void_ftype_unsigned
22329 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
22330 tree void_ftype_unsigned_unsigned
22331 = build_function_type_list (void_type_node, unsigned_type_node,
22332 unsigned_type_node, NULL_TREE);
22333 tree void_ftype_pcvoid_unsigned_unsigned
22334 = build_function_type_list (void_type_node, const_ptr_type_node,
22335 unsigned_type_node, unsigned_type_node,
22336 NULL_TREE);
22337 tree unsigned_ftype_void
22338 = build_function_type (unsigned_type_node, void_list_node);
22339 tree v2si_ftype_v4sf
22340 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
22341 /* Loads/stores. */
22342 tree void_ftype_v8qi_v8qi_pchar
22343 = build_function_type_list (void_type_node,
22344 V8QI_type_node, V8QI_type_node,
22345 pchar_type_node, NULL_TREE);
22346 tree v4sf_ftype_pcfloat
22347 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
22348 tree v4sf_ftype_v4sf_pcv2sf
22349 = build_function_type_list (V4SF_type_node,
22350 V4SF_type_node, pcv2sf_type_node, NULL_TREE);
22351 tree void_ftype_pv2sf_v4sf
22352 = build_function_type_list (void_type_node,
22353 pv2sf_type_node, V4SF_type_node, NULL_TREE);
22354 tree void_ftype_pfloat_v4sf
22355 = build_function_type_list (void_type_node,
22356 pfloat_type_node, V4SF_type_node, NULL_TREE);
22357 tree void_ftype_pdi_di
22358 = build_function_type_list (void_type_node,
22359 pdi_type_node, long_long_unsigned_type_node,
22360 NULL_TREE);
22361 tree void_ftype_pv2di_v2di
22362 = build_function_type_list (void_type_node,
22363 pv2di_type_node, V2DI_type_node, NULL_TREE);
22364 /* Normal vector unops. */
22365 tree v4sf_ftype_v4sf
22366 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
22367 tree v16qi_ftype_v16qi
22368 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
22369 tree v8hi_ftype_v8hi
22370 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
22371 tree v4si_ftype_v4si
22372 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
22373 tree v8qi_ftype_v8qi
22374 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
22375 tree v4hi_ftype_v4hi
22376 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
22377
22378 /* Normal vector binops. */
22379 tree v4sf_ftype_v4sf_v4sf
22380 = build_function_type_list (V4SF_type_node,
22381 V4SF_type_node, V4SF_type_node, NULL_TREE);
22382 tree v8qi_ftype_v8qi_v8qi
22383 = build_function_type_list (V8QI_type_node,
22384 V8QI_type_node, V8QI_type_node, NULL_TREE);
22385 tree v4hi_ftype_v4hi_v4hi
22386 = build_function_type_list (V4HI_type_node,
22387 V4HI_type_node, V4HI_type_node, NULL_TREE);
22388 tree v2si_ftype_v2si_v2si
22389 = build_function_type_list (V2SI_type_node,
22390 V2SI_type_node, V2SI_type_node, NULL_TREE);
22391 tree v1di_ftype_v1di_v1di
22392 = build_function_type_list (V1DI_type_node,
22393 V1DI_type_node, V1DI_type_node, NULL_TREE);
22394 tree v1di_ftype_v1di_v1di_int
22395 = build_function_type_list (V1DI_type_node,
22396 V1DI_type_node, V1DI_type_node,
22397 integer_type_node, NULL_TREE);
22398 tree v2si_ftype_v2sf
22399 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
22400 tree v2sf_ftype_v2si
22401 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
22402 tree v2si_ftype_v2si
22403 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
22404 tree v2sf_ftype_v2sf
22405 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
22406 tree v2sf_ftype_v2sf_v2sf
22407 = build_function_type_list (V2SF_type_node,
22408 V2SF_type_node, V2SF_type_node, NULL_TREE);
22409 tree v2si_ftype_v2sf_v2sf
22410 = build_function_type_list (V2SI_type_node,
22411 V2SF_type_node, V2SF_type_node, NULL_TREE);
22412 tree pint_type_node = build_pointer_type (integer_type_node);
22413 tree pdouble_type_node = build_pointer_type (double_type_node);
22414 tree pcdouble_type_node = build_pointer_type (
22415 build_type_variant (double_type_node, 1, 0));
22416 tree int_ftype_v2df_v2df
22417 = build_function_type_list (integer_type_node,
22418 V2DF_type_node, V2DF_type_node, NULL_TREE);
22419
22420 tree void_ftype_pcvoid
22421 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
22422 tree v4sf_ftype_v4si
22423 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
22424 tree v4si_ftype_v4sf
22425 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
22426 tree v2df_ftype_v4si
22427 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
22428 tree v4si_ftype_v2df
22429 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
22430 tree v4si_ftype_v2df_v2df
22431 = build_function_type_list (V4SI_type_node,
22432 V2DF_type_node, V2DF_type_node, NULL_TREE);
22433 tree v2si_ftype_v2df
22434 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
22435 tree v4sf_ftype_v2df
22436 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
22437 tree v2df_ftype_v2si
22438 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
22439 tree v2df_ftype_v4sf
22440 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
22441 tree int_ftype_v2df
22442 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
22443 tree int64_ftype_v2df
22444 = build_function_type_list (long_long_integer_type_node,
22445 V2DF_type_node, NULL_TREE);
22446 tree v2df_ftype_v2df_int
22447 = build_function_type_list (V2DF_type_node,
22448 V2DF_type_node, integer_type_node, NULL_TREE);
22449 tree v2df_ftype_v2df_int64
22450 = build_function_type_list (V2DF_type_node,
22451 V2DF_type_node, long_long_integer_type_node,
22452 NULL_TREE);
22453 tree v4sf_ftype_v4sf_v2df
22454 = build_function_type_list (V4SF_type_node,
22455 V4SF_type_node, V2DF_type_node, NULL_TREE);
22456 tree v2df_ftype_v2df_v4sf
22457 = build_function_type_list (V2DF_type_node,
22458 V2DF_type_node, V4SF_type_node, NULL_TREE);
22459 tree v2df_ftype_v2df_v2df_int
22460 = build_function_type_list (V2DF_type_node,
22461 V2DF_type_node, V2DF_type_node,
22462 integer_type_node,
22463 NULL_TREE);
22464 tree v2df_ftype_v2df_pcdouble
22465 = build_function_type_list (V2DF_type_node,
22466 V2DF_type_node, pcdouble_type_node, NULL_TREE);
22467 tree void_ftype_pdouble_v2df
22468 = build_function_type_list (void_type_node,
22469 pdouble_type_node, V2DF_type_node, NULL_TREE);
22470 tree void_ftype_pint_int
22471 = build_function_type_list (void_type_node,
22472 pint_type_node, integer_type_node, NULL_TREE);
22473 tree void_ftype_v16qi_v16qi_pchar
22474 = build_function_type_list (void_type_node,
22475 V16QI_type_node, V16QI_type_node,
22476 pchar_type_node, NULL_TREE);
22477 tree v2df_ftype_pcdouble
22478 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
22479 tree v2df_ftype_v2df_v2df
22480 = build_function_type_list (V2DF_type_node,
22481 V2DF_type_node, V2DF_type_node, NULL_TREE);
22482 tree v16qi_ftype_v16qi_v16qi
22483 = build_function_type_list (V16QI_type_node,
22484 V16QI_type_node, V16QI_type_node, NULL_TREE);
22485 tree v8hi_ftype_v8hi_v8hi
22486 = build_function_type_list (V8HI_type_node,
22487 V8HI_type_node, V8HI_type_node, NULL_TREE);
22488 tree v4si_ftype_v4si_v4si
22489 = build_function_type_list (V4SI_type_node,
22490 V4SI_type_node, V4SI_type_node, NULL_TREE);
22491 tree v2di_ftype_v2di_v2di
22492 = build_function_type_list (V2DI_type_node,
22493 V2DI_type_node, V2DI_type_node, NULL_TREE);
22494 tree v2di_ftype_v2df_v2df
22495 = build_function_type_list (V2DI_type_node,
22496 V2DF_type_node, V2DF_type_node, NULL_TREE);
22497 tree v2df_ftype_v2df
22498 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
22499 tree v2di_ftype_v2di_int
22500 = build_function_type_list (V2DI_type_node,
22501 V2DI_type_node, integer_type_node, NULL_TREE);
22502 tree v2di_ftype_v2di_v2di_int
22503 = build_function_type_list (V2DI_type_node, V2DI_type_node,
22504 V2DI_type_node, integer_type_node, NULL_TREE);
22505 tree v4si_ftype_v4si_int
22506 = build_function_type_list (V4SI_type_node,
22507 V4SI_type_node, integer_type_node, NULL_TREE);
22508 tree v8hi_ftype_v8hi_int
22509 = build_function_type_list (V8HI_type_node,
22510 V8HI_type_node, integer_type_node, NULL_TREE);
22511 tree v4si_ftype_v8hi_v8hi
22512 = build_function_type_list (V4SI_type_node,
22513 V8HI_type_node, V8HI_type_node, NULL_TREE);
22514 tree v1di_ftype_v8qi_v8qi
22515 = build_function_type_list (V1DI_type_node,
22516 V8QI_type_node, V8QI_type_node, NULL_TREE);
22517 tree v1di_ftype_v2si_v2si
22518 = build_function_type_list (V1DI_type_node,
22519 V2SI_type_node, V2SI_type_node, NULL_TREE);
22520 tree v2di_ftype_v16qi_v16qi
22521 = build_function_type_list (V2DI_type_node,
22522 V16QI_type_node, V16QI_type_node, NULL_TREE);
22523 tree v2di_ftype_v4si_v4si
22524 = build_function_type_list (V2DI_type_node,
22525 V4SI_type_node, V4SI_type_node, NULL_TREE);
22526 tree int_ftype_v16qi
22527 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
22528 tree v16qi_ftype_pcchar
22529 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
22530 tree void_ftype_pchar_v16qi
22531 = build_function_type_list (void_type_node,
22532 pchar_type_node, V16QI_type_node, NULL_TREE);
22533
22534 tree v2di_ftype_v2di_unsigned_unsigned
22535 = build_function_type_list (V2DI_type_node, V2DI_type_node,
22536 unsigned_type_node, unsigned_type_node,
22537 NULL_TREE);
22538 tree v2di_ftype_v2di_v2di_unsigned_unsigned
22539 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
22540 unsigned_type_node, unsigned_type_node,
22541 NULL_TREE);
22542 tree v2di_ftype_v2di_v16qi
22543 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
22544 NULL_TREE);
22545 tree v2df_ftype_v2df_v2df_v2df
22546 = build_function_type_list (V2DF_type_node,
22547 V2DF_type_node, V2DF_type_node,
22548 V2DF_type_node, NULL_TREE);
22549 tree v4sf_ftype_v4sf_v4sf_v4sf
22550 = build_function_type_list (V4SF_type_node,
22551 V4SF_type_node, V4SF_type_node,
22552 V4SF_type_node, NULL_TREE);
22553 tree v8hi_ftype_v16qi
22554 = build_function_type_list (V8HI_type_node, V16QI_type_node,
22555 NULL_TREE);
22556 tree v4si_ftype_v16qi
22557 = build_function_type_list (V4SI_type_node, V16QI_type_node,
22558 NULL_TREE);
22559 tree v2di_ftype_v16qi
22560 = build_function_type_list (V2DI_type_node, V16QI_type_node,
22561 NULL_TREE);
22562 tree v4si_ftype_v8hi
22563 = build_function_type_list (V4SI_type_node, V8HI_type_node,
22564 NULL_TREE);
22565 tree v2di_ftype_v8hi
22566 = build_function_type_list (V2DI_type_node, V8HI_type_node,
22567 NULL_TREE);
22568 tree v2di_ftype_v4si
22569 = build_function_type_list (V2DI_type_node, V4SI_type_node,
22570 NULL_TREE);
22571 tree v2di_ftype_pv2di
22572 = build_function_type_list (V2DI_type_node, pv2di_type_node,
22573 NULL_TREE);
22574 tree v16qi_ftype_v16qi_v16qi_int
22575 = build_function_type_list (V16QI_type_node, V16QI_type_node,
22576 V16QI_type_node, integer_type_node,
22577 NULL_TREE);
22578 tree v16qi_ftype_v16qi_v16qi_v16qi
22579 = build_function_type_list (V16QI_type_node, V16QI_type_node,
22580 V16QI_type_node, V16QI_type_node,
22581 NULL_TREE);
22582 tree v8hi_ftype_v8hi_v8hi_int
22583 = build_function_type_list (V8HI_type_node, V8HI_type_node,
22584 V8HI_type_node, integer_type_node,
22585 NULL_TREE);
22586 tree v4si_ftype_v4si_v4si_int
22587 = build_function_type_list (V4SI_type_node, V4SI_type_node,
22588 V4SI_type_node, integer_type_node,
22589 NULL_TREE);
22590 tree int_ftype_v2di_v2di
22591 = build_function_type_list (integer_type_node,
22592 V2DI_type_node, V2DI_type_node,
22593 NULL_TREE);
22594 tree int_ftype_v16qi_int_v16qi_int_int
22595 = build_function_type_list (integer_type_node,
22596 V16QI_type_node,
22597 integer_type_node,
22598 V16QI_type_node,
22599 integer_type_node,
22600 integer_type_node,
22601 NULL_TREE);
22602 tree v16qi_ftype_v16qi_int_v16qi_int_int
22603 = build_function_type_list (V16QI_type_node,
22604 V16QI_type_node,
22605 integer_type_node,
22606 V16QI_type_node,
22607 integer_type_node,
22608 integer_type_node,
22609 NULL_TREE);
22610 tree int_ftype_v16qi_v16qi_int
22611 = build_function_type_list (integer_type_node,
22612 V16QI_type_node,
22613 V16QI_type_node,
22614 integer_type_node,
22615 NULL_TREE);
22616
22617 /* SSE5 instructions */
22618 tree v2di_ftype_v2di_v2di_v2di
22619 = build_function_type_list (V2DI_type_node,
22620 V2DI_type_node,
22621 V2DI_type_node,
22622 V2DI_type_node,
22623 NULL_TREE);
22624
22625 tree v4si_ftype_v4si_v4si_v4si
22626 = build_function_type_list (V4SI_type_node,
22627 V4SI_type_node,
22628 V4SI_type_node,
22629 V4SI_type_node,
22630 NULL_TREE);
22631
22632 tree v4si_ftype_v4si_v4si_v2di
22633 = build_function_type_list (V4SI_type_node,
22634 V4SI_type_node,
22635 V4SI_type_node,
22636 V2DI_type_node,
22637 NULL_TREE);
22638
22639 tree v8hi_ftype_v8hi_v8hi_v8hi
22640 = build_function_type_list (V8HI_type_node,
22641 V8HI_type_node,
22642 V8HI_type_node,
22643 V8HI_type_node,
22644 NULL_TREE);
22645
22646 tree v8hi_ftype_v8hi_v8hi_v4si
22647 = build_function_type_list (V8HI_type_node,
22648 V8HI_type_node,
22649 V8HI_type_node,
22650 V4SI_type_node,
22651 NULL_TREE);
22652
22653 tree v2df_ftype_v2df_v2df_v16qi
22654 = build_function_type_list (V2DF_type_node,
22655 V2DF_type_node,
22656 V2DF_type_node,
22657 V16QI_type_node,
22658 NULL_TREE);
22659
22660 tree v4sf_ftype_v4sf_v4sf_v16qi
22661 = build_function_type_list (V4SF_type_node,
22662 V4SF_type_node,
22663 V4SF_type_node,
22664 V16QI_type_node,
22665 NULL_TREE);
22666
22667 tree v2di_ftype_v2di_si
22668 = build_function_type_list (V2DI_type_node,
22669 V2DI_type_node,
22670 integer_type_node,
22671 NULL_TREE);
22672
22673 tree v4si_ftype_v4si_si
22674 = build_function_type_list (V4SI_type_node,
22675 V4SI_type_node,
22676 integer_type_node,
22677 NULL_TREE);
22678
22679 tree v8hi_ftype_v8hi_si
22680 = build_function_type_list (V8HI_type_node,
22681 V8HI_type_node,
22682 integer_type_node,
22683 NULL_TREE);
22684
22685 tree v16qi_ftype_v16qi_si
22686 = build_function_type_list (V16QI_type_node,
22687 V16QI_type_node,
22688 integer_type_node,
22689 NULL_TREE);
22690 tree v4sf_ftype_v4hi
22691 = build_function_type_list (V4SF_type_node,
22692 V4HI_type_node,
22693 NULL_TREE);
22694
22695 tree v4hi_ftype_v4sf
22696 = build_function_type_list (V4HI_type_node,
22697 V4SF_type_node,
22698 NULL_TREE);
22699
22700 tree v2di_ftype_v2di
22701 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
22702
22703 tree v16qi_ftype_v8hi_v8hi
22704 = build_function_type_list (V16QI_type_node,
22705 V8HI_type_node, V8HI_type_node,
22706 NULL_TREE);
22707 tree v8hi_ftype_v4si_v4si
22708 = build_function_type_list (V8HI_type_node,
22709 V4SI_type_node, V4SI_type_node,
22710 NULL_TREE);
22711 tree v8hi_ftype_v16qi_v16qi
22712 = build_function_type_list (V8HI_type_node,
22713 V16QI_type_node, V16QI_type_node,
22714 NULL_TREE);
22715 tree v4hi_ftype_v8qi_v8qi
22716 = build_function_type_list (V4HI_type_node,
22717 V8QI_type_node, V8QI_type_node,
22718 NULL_TREE);
22719 tree unsigned_ftype_unsigned_uchar
22720 = build_function_type_list (unsigned_type_node,
22721 unsigned_type_node,
22722 unsigned_char_type_node,
22723 NULL_TREE);
22724 tree unsigned_ftype_unsigned_ushort
22725 = build_function_type_list (unsigned_type_node,
22726 unsigned_type_node,
22727 short_unsigned_type_node,
22728 NULL_TREE);
22729 tree unsigned_ftype_unsigned_unsigned
22730 = build_function_type_list (unsigned_type_node,
22731 unsigned_type_node,
22732 unsigned_type_node,
22733 NULL_TREE);
22734 tree uint64_ftype_uint64_uint64
22735 = build_function_type_list (long_long_unsigned_type_node,
22736 long_long_unsigned_type_node,
22737 long_long_unsigned_type_node,
22738 NULL_TREE);
22739 tree float_ftype_float
22740 = build_function_type_list (float_type_node,
22741 float_type_node,
22742 NULL_TREE);
22743
22744 /* AVX builtins */
22745 tree V32QI_type_node = build_vector_type_for_mode (char_type_node,
22746 V32QImode);
22747 tree V8SI_type_node = build_vector_type_for_mode (intSI_type_node,
22748 V8SImode);
22749 tree V8SF_type_node = build_vector_type_for_mode (float_type_node,
22750 V8SFmode);
22751 tree V4DI_type_node = build_vector_type_for_mode (long_long_integer_type_node,
22752 V4DImode);
22753 tree V4DF_type_node = build_vector_type_for_mode (double_type_node,
22754 V4DFmode);
22755 tree v8sf_ftype_v8sf
22756 = build_function_type_list (V8SF_type_node,
22757 V8SF_type_node,
22758 NULL_TREE);
22759 tree v8si_ftype_v8sf
22760 = build_function_type_list (V8SI_type_node,
22761 V8SF_type_node,
22762 NULL_TREE);
22763 tree v8sf_ftype_v8si
22764 = build_function_type_list (V8SF_type_node,
22765 V8SI_type_node,
22766 NULL_TREE);
22767 tree v4si_ftype_v4df
22768 = build_function_type_list (V4SI_type_node,
22769 V4DF_type_node,
22770 NULL_TREE);
22771 tree v4df_ftype_v4df
22772 = build_function_type_list (V4DF_type_node,
22773 V4DF_type_node,
22774 NULL_TREE);
22775 tree v4df_ftype_v4si
22776 = build_function_type_list (V4DF_type_node,
22777 V4SI_type_node,
22778 NULL_TREE);
22779 tree v4df_ftype_v4sf
22780 = build_function_type_list (V4DF_type_node,
22781 V4SF_type_node,
22782 NULL_TREE);
22783 tree v4sf_ftype_v4df
22784 = build_function_type_list (V4SF_type_node,
22785 V4DF_type_node,
22786 NULL_TREE);
22787 tree v8sf_ftype_v8sf_v8sf
22788 = build_function_type_list (V8SF_type_node,
22789 V8SF_type_node, V8SF_type_node,
22790 NULL_TREE);
22791 tree v4df_ftype_v4df_v4df
22792 = build_function_type_list (V4DF_type_node,
22793 V4DF_type_node, V4DF_type_node,
22794 NULL_TREE);
22795 tree v8sf_ftype_v8sf_int
22796 = build_function_type_list (V8SF_type_node,
22797 V8SF_type_node, integer_type_node,
22798 NULL_TREE);
22799 tree v4si_ftype_v8si_int
22800 = build_function_type_list (V4SI_type_node,
22801 V8SI_type_node, integer_type_node,
22802 NULL_TREE);
22803 tree v4df_ftype_v4df_int
22804 = build_function_type_list (V4DF_type_node,
22805 V4DF_type_node, integer_type_node,
22806 NULL_TREE);
22807 tree v4sf_ftype_v8sf_int
22808 = build_function_type_list (V4SF_type_node,
22809 V8SF_type_node, integer_type_node,
22810 NULL_TREE);
22811 tree v2df_ftype_v4df_int
22812 = build_function_type_list (V2DF_type_node,
22813 V4DF_type_node, integer_type_node,
22814 NULL_TREE);
22815 tree v8sf_ftype_v8sf_v8sf_int
22816 = build_function_type_list (V8SF_type_node,
22817 V8SF_type_node, V8SF_type_node,
22818 integer_type_node,
22819 NULL_TREE);
22820 tree v8sf_ftype_v8sf_v8sf_v8sf
22821 = build_function_type_list (V8SF_type_node,
22822 V8SF_type_node, V8SF_type_node,
22823 V8SF_type_node,
22824 NULL_TREE);
22825 tree v4df_ftype_v4df_v4df_v4df
22826 = build_function_type_list (V4DF_type_node,
22827 V4DF_type_node, V4DF_type_node,
22828 V4DF_type_node,
22829 NULL_TREE);
22830 tree v8si_ftype_v8si_v8si_int
22831 = build_function_type_list (V8SI_type_node,
22832 V8SI_type_node, V8SI_type_node,
22833 integer_type_node,
22834 NULL_TREE);
22835 tree v4df_ftype_v4df_v4df_int
22836 = build_function_type_list (V4DF_type_node,
22837 V4DF_type_node, V4DF_type_node,
22838 integer_type_node,
22839 NULL_TREE);
22840 tree v8sf_ftype_pcfloat
22841 = build_function_type_list (V8SF_type_node,
22842 pcfloat_type_node,
22843 NULL_TREE);
22844 tree v4df_ftype_pcdouble
22845 = build_function_type_list (V4DF_type_node,
22846 pcdouble_type_node,
22847 NULL_TREE);
22848 tree pcv4sf_type_node
22849 = build_pointer_type (build_type_variant (V4SF_type_node, 1, 0));
22850 tree pcv2df_type_node
22851 = build_pointer_type (build_type_variant (V2DF_type_node, 1, 0));
22852 tree v8sf_ftype_pcv4sf
22853 = build_function_type_list (V8SF_type_node,
22854 pcv4sf_type_node,
22855 NULL_TREE);
22856 tree v4df_ftype_pcv2df
22857 = build_function_type_list (V4DF_type_node,
22858 pcv2df_type_node,
22859 NULL_TREE);
22860 tree v32qi_ftype_pcchar
22861 = build_function_type_list (V32QI_type_node,
22862 pcchar_type_node,
22863 NULL_TREE);
22864 tree void_ftype_pchar_v32qi
22865 = build_function_type_list (void_type_node,
22866 pchar_type_node, V32QI_type_node,
22867 NULL_TREE);
22868 tree v8si_ftype_v8si_v4si_int
22869 = build_function_type_list (V8SI_type_node,
22870 V8SI_type_node, V4SI_type_node,
22871 integer_type_node,
22872 NULL_TREE);
22873 tree pv4di_type_node = build_pointer_type (V4DI_type_node);
22874 tree void_ftype_pv4di_v4di
22875 = build_function_type_list (void_type_node,
22876 pv4di_type_node, V4DI_type_node,
22877 NULL_TREE);
22878 tree v8sf_ftype_v8sf_v4sf_int
22879 = build_function_type_list (V8SF_type_node,
22880 V8SF_type_node, V4SF_type_node,
22881 integer_type_node,
22882 NULL_TREE);
22883 tree v4df_ftype_v4df_v2df_int
22884 = build_function_type_list (V4DF_type_node,
22885 V4DF_type_node, V2DF_type_node,
22886 integer_type_node,
22887 NULL_TREE);
22888 tree void_ftype_pfloat_v8sf
22889 = build_function_type_list (void_type_node,
22890 pfloat_type_node, V8SF_type_node,
22891 NULL_TREE);
22892 tree void_ftype_pdouble_v4df
22893 = build_function_type_list (void_type_node,
22894 pdouble_type_node, V4DF_type_node,
22895 NULL_TREE);
22896 tree pv8sf_type_node = build_pointer_type (V8SF_type_node);
22897 tree pv4sf_type_node = build_pointer_type (V4SF_type_node);
22898 tree pv4df_type_node = build_pointer_type (V4DF_type_node);
22899 tree pv2df_type_node = build_pointer_type (V2DF_type_node);
22900 tree pcv8sf_type_node
22901 = build_pointer_type (build_type_variant (V8SF_type_node, 1, 0));
22902 tree pcv4df_type_node
22903 = build_pointer_type (build_type_variant (V4DF_type_node, 1, 0));
22904 tree v8sf_ftype_pcv8sf_v8sf
22905 = build_function_type_list (V8SF_type_node,
22906 pcv8sf_type_node, V8SF_type_node,
22907 NULL_TREE);
22908 tree v4df_ftype_pcv4df_v4df
22909 = build_function_type_list (V4DF_type_node,
22910 pcv4df_type_node, V4DF_type_node,
22911 NULL_TREE);
22912 tree v4sf_ftype_pcv4sf_v4sf
22913 = build_function_type_list (V4SF_type_node,
22914 pcv4sf_type_node, V4SF_type_node,
22915 NULL_TREE);
22916 tree v2df_ftype_pcv2df_v2df
22917 = build_function_type_list (V2DF_type_node,
22918 pcv2df_type_node, V2DF_type_node,
22919 NULL_TREE);
22920 tree void_ftype_pv8sf_v8sf_v8sf
22921 = build_function_type_list (void_type_node,
22922 pv8sf_type_node, V8SF_type_node,
22923 V8SF_type_node,
22924 NULL_TREE);
22925 tree void_ftype_pv4df_v4df_v4df
22926 = build_function_type_list (void_type_node,
22927 pv4df_type_node, V4DF_type_node,
22928 V4DF_type_node,
22929 NULL_TREE);
22930 tree void_ftype_pv4sf_v4sf_v4sf
22931 = build_function_type_list (void_type_node,
22932 pv4sf_type_node, V4SF_type_node,
22933 V4SF_type_node,
22934 NULL_TREE);
22935 tree void_ftype_pv2df_v2df_v2df
22936 = build_function_type_list (void_type_node,
22937 pv2df_type_node, V2DF_type_node,
22938 V2DF_type_node,
22939 NULL_TREE);
22940 tree v4df_ftype_v2df
22941 = build_function_type_list (V4DF_type_node,
22942 V2DF_type_node,
22943 NULL_TREE);
22944 tree v8sf_ftype_v4sf
22945 = build_function_type_list (V8SF_type_node,
22946 V4SF_type_node,
22947 NULL_TREE);
22948 tree v8si_ftype_v4si
22949 = build_function_type_list (V8SI_type_node,
22950 V4SI_type_node,
22951 NULL_TREE);
22952 tree v2df_ftype_v4df
22953 = build_function_type_list (V2DF_type_node,
22954 V4DF_type_node,
22955 NULL_TREE);
22956 tree v4sf_ftype_v8sf
22957 = build_function_type_list (V4SF_type_node,
22958 V8SF_type_node,
22959 NULL_TREE);
22960 tree v4si_ftype_v8si
22961 = build_function_type_list (V4SI_type_node,
22962 V8SI_type_node,
22963 NULL_TREE);
22964 tree int_ftype_v4df
22965 = build_function_type_list (integer_type_node,
22966 V4DF_type_node,
22967 NULL_TREE);
22968 tree int_ftype_v8sf
22969 = build_function_type_list (integer_type_node,
22970 V8SF_type_node,
22971 NULL_TREE);
22972 tree int_ftype_v8sf_v8sf
22973 = build_function_type_list (integer_type_node,
22974 V8SF_type_node, V8SF_type_node,
22975 NULL_TREE);
22976 tree int_ftype_v4di_v4di
22977 = build_function_type_list (integer_type_node,
22978 V4DI_type_node, V4DI_type_node,
22979 NULL_TREE);
22980 tree int_ftype_v4df_v4df
22981 = build_function_type_list (integer_type_node,
22982 V4DF_type_node, V4DF_type_node,
22983 NULL_TREE);
22984 tree v8sf_ftype_v8sf_v8si
22985 = build_function_type_list (V8SF_type_node,
22986 V8SF_type_node, V8SI_type_node,
22987 NULL_TREE);
22988 tree v4df_ftype_v4df_v4di
22989 = build_function_type_list (V4DF_type_node,
22990 V4DF_type_node, V4DI_type_node,
22991 NULL_TREE);
22992 tree v4sf_ftype_v4sf_v4si
22993 = build_function_type_list (V4SF_type_node,
22994 V4SF_type_node, V4SI_type_node, NULL_TREE);
22995 tree v2df_ftype_v2df_v2di
22996 = build_function_type_list (V2DF_type_node,
22997 V2DF_type_node, V2DI_type_node, NULL_TREE);
22998
22999 tree ftype;
23000
23001 /* Add all special builtins with variable number of operands. */
23002 for (i = 0, d = bdesc_special_args;
23003 i < ARRAY_SIZE (bdesc_special_args);
23004 i++, d++)
23005 {
23006 tree type;
23007
23008 if (d->name == 0)
23009 continue;
23010
23011 switch ((enum ix86_special_builtin_type) d->flag)
23012 {
23013 case VOID_FTYPE_VOID:
23014 type = void_ftype_void;
23015 break;
23016 case V32QI_FTYPE_PCCHAR:
23017 type = v32qi_ftype_pcchar;
23018 break;
23019 case V16QI_FTYPE_PCCHAR:
23020 type = v16qi_ftype_pcchar;
23021 break;
23022 case V8SF_FTYPE_PCV4SF:
23023 type = v8sf_ftype_pcv4sf;
23024 break;
23025 case V8SF_FTYPE_PCFLOAT:
23026 type = v8sf_ftype_pcfloat;
23027 break;
23028 case V4DF_FTYPE_PCV2DF:
23029 type = v4df_ftype_pcv2df;
23030 break;
23031 case V4DF_FTYPE_PCDOUBLE:
23032 type = v4df_ftype_pcdouble;
23033 break;
23034 case V4SF_FTYPE_PCFLOAT:
23035 type = v4sf_ftype_pcfloat;
23036 break;
23037 case V2DI_FTYPE_PV2DI:
23038 type = v2di_ftype_pv2di;
23039 break;
23040 case V2DF_FTYPE_PCDOUBLE:
23041 type = v2df_ftype_pcdouble;
23042 break;
23043 case V8SF_FTYPE_PCV8SF_V8SF:
23044 type = v8sf_ftype_pcv8sf_v8sf;
23045 break;
23046 case V4DF_FTYPE_PCV4DF_V4DF:
23047 type = v4df_ftype_pcv4df_v4df;
23048 break;
23049 case V4SF_FTYPE_V4SF_PCV2SF:
23050 type = v4sf_ftype_v4sf_pcv2sf;
23051 break;
23052 case V4SF_FTYPE_PCV4SF_V4SF:
23053 type = v4sf_ftype_pcv4sf_v4sf;
23054 break;
23055 case V2DF_FTYPE_V2DF_PCDOUBLE:
23056 type = v2df_ftype_v2df_pcdouble;
23057 break;
23058 case V2DF_FTYPE_PCV2DF_V2DF:
23059 type = v2df_ftype_pcv2df_v2df;
23060 break;
23061 case VOID_FTYPE_PV2SF_V4SF:
23062 type = void_ftype_pv2sf_v4sf;
23063 break;
23064 case VOID_FTYPE_PV4DI_V4DI:
23065 type = void_ftype_pv4di_v4di;
23066 break;
23067 case VOID_FTYPE_PV2DI_V2DI:
23068 type = void_ftype_pv2di_v2di;
23069 break;
23070 case VOID_FTYPE_PCHAR_V32QI:
23071 type = void_ftype_pchar_v32qi;
23072 break;
23073 case VOID_FTYPE_PCHAR_V16QI:
23074 type = void_ftype_pchar_v16qi;
23075 break;
23076 case VOID_FTYPE_PFLOAT_V8SF:
23077 type = void_ftype_pfloat_v8sf;
23078 break;
23079 case VOID_FTYPE_PFLOAT_V4SF:
23080 type = void_ftype_pfloat_v4sf;
23081 break;
23082 case VOID_FTYPE_PDOUBLE_V4DF:
23083 type = void_ftype_pdouble_v4df;
23084 break;
23085 case VOID_FTYPE_PDOUBLE_V2DF:
23086 type = void_ftype_pdouble_v2df;
23087 break;
23088 case VOID_FTYPE_PDI_DI:
23089 type = void_ftype_pdi_di;
23090 break;
23091 case VOID_FTYPE_PINT_INT:
23092 type = void_ftype_pint_int;
23093 break;
23094 case VOID_FTYPE_PV8SF_V8SF_V8SF:
23095 type = void_ftype_pv8sf_v8sf_v8sf;
23096 break;
23097 case VOID_FTYPE_PV4DF_V4DF_V4DF:
23098 type = void_ftype_pv4df_v4df_v4df;
23099 break;
23100 case VOID_FTYPE_PV4SF_V4SF_V4SF:
23101 type = void_ftype_pv4sf_v4sf_v4sf;
23102 break;
23103 case VOID_FTYPE_PV2DF_V2DF_V2DF:
23104 type = void_ftype_pv2df_v2df_v2df;
23105 break;
23106 default:
23107 gcc_unreachable ();
23108 }
23109
23110 def_builtin (d->mask, d->name, type, d->code);
23111 }
23112
23113 /* Add all builtins with variable number of operands. */
23114 for (i = 0, d = bdesc_args;
23115 i < ARRAY_SIZE (bdesc_args);
23116 i++, d++)
23117 {
23118 tree type;
23119
23120 if (d->name == 0)
23121 continue;
23122
23123 switch ((enum ix86_builtin_type) d->flag)
23124 {
23125 case FLOAT_FTYPE_FLOAT:
23126 type = float_ftype_float;
23127 break;
23128 case INT_FTYPE_V8SF_V8SF_PTEST:
23129 type = int_ftype_v8sf_v8sf;
23130 break;
23131 case INT_FTYPE_V4DI_V4DI_PTEST:
23132 type = int_ftype_v4di_v4di;
23133 break;
23134 case INT_FTYPE_V4DF_V4DF_PTEST:
23135 type = int_ftype_v4df_v4df;
23136 break;
23137 case INT_FTYPE_V4SF_V4SF_PTEST:
23138 type = int_ftype_v4sf_v4sf;
23139 break;
23140 case INT_FTYPE_V2DI_V2DI_PTEST:
23141 type = int_ftype_v2di_v2di;
23142 break;
23143 case INT_FTYPE_V2DF_V2DF_PTEST:
23144 type = int_ftype_v2df_v2df;
23145 break;
23146 case INT64_FTYPE_V4SF:
23147 type = int64_ftype_v4sf;
23148 break;
23149 case INT64_FTYPE_V2DF:
23150 type = int64_ftype_v2df;
23151 break;
23152 case INT_FTYPE_V16QI:
23153 type = int_ftype_v16qi;
23154 break;
23155 case INT_FTYPE_V8QI:
23156 type = int_ftype_v8qi;
23157 break;
23158 case INT_FTYPE_V8SF:
23159 type = int_ftype_v8sf;
23160 break;
23161 case INT_FTYPE_V4DF:
23162 type = int_ftype_v4df;
23163 break;
23164 case INT_FTYPE_V4SF:
23165 type = int_ftype_v4sf;
23166 break;
23167 case INT_FTYPE_V2DF:
23168 type = int_ftype_v2df;
23169 break;
23170 case V16QI_FTYPE_V16QI:
23171 type = v16qi_ftype_v16qi;
23172 break;
23173 case V8SI_FTYPE_V8SF:
23174 type = v8si_ftype_v8sf;
23175 break;
23176 case V8SI_FTYPE_V4SI:
23177 type = v8si_ftype_v4si;
23178 break;
23179 case V8HI_FTYPE_V8HI:
23180 type = v8hi_ftype_v8hi;
23181 break;
23182 case V8HI_FTYPE_V16QI:
23183 type = v8hi_ftype_v16qi;
23184 break;
23185 case V8QI_FTYPE_V8QI:
23186 type = v8qi_ftype_v8qi;
23187 break;
23188 case V8SF_FTYPE_V8SF:
23189 type = v8sf_ftype_v8sf;
23190 break;
23191 case V8SF_FTYPE_V8SI:
23192 type = v8sf_ftype_v8si;
23193 break;
23194 case V8SF_FTYPE_V4SF:
23195 type = v8sf_ftype_v4sf;
23196 break;
23197 case V4SI_FTYPE_V4DF:
23198 type = v4si_ftype_v4df;
23199 break;
23200 case V4SI_FTYPE_V4SI:
23201 type = v4si_ftype_v4si;
23202 break;
23203 case V4SI_FTYPE_V16QI:
23204 type = v4si_ftype_v16qi;
23205 break;
23206 case V4SI_FTYPE_V8SI:
23207 type = v4si_ftype_v8si;
23208 break;
23209 case V4SI_FTYPE_V8HI:
23210 type = v4si_ftype_v8hi;
23211 break;
23212 case V4SI_FTYPE_V4SF:
23213 type = v4si_ftype_v4sf;
23214 break;
23215 case V4SI_FTYPE_V2DF:
23216 type = v4si_ftype_v2df;
23217 break;
23218 case V4HI_FTYPE_V4HI:
23219 type = v4hi_ftype_v4hi;
23220 break;
23221 case V4DF_FTYPE_V4DF:
23222 type = v4df_ftype_v4df;
23223 break;
23224 case V4DF_FTYPE_V4SI:
23225 type = v4df_ftype_v4si;
23226 break;
23227 case V4DF_FTYPE_V4SF:
23228 type = v4df_ftype_v4sf;
23229 break;
23230 case V4DF_FTYPE_V2DF:
23231 type = v4df_ftype_v2df;
23232 break;
23233 case V4SF_FTYPE_V4SF:
23234 case V4SF_FTYPE_V4SF_VEC_MERGE:
23235 type = v4sf_ftype_v4sf;
23236 break;
23237 case V4SF_FTYPE_V8SF:
23238 type = v4sf_ftype_v8sf;
23239 break;
23240 case V4SF_FTYPE_V4SI:
23241 type = v4sf_ftype_v4si;
23242 break;
23243 case V4SF_FTYPE_V4DF:
23244 type = v4sf_ftype_v4df;
23245 break;
23246 case V4SF_FTYPE_V2DF:
23247 type = v4sf_ftype_v2df;
23248 break;
23249 case V2DI_FTYPE_V2DI:
23250 type = v2di_ftype_v2di;
23251 break;
23252 case V2DI_FTYPE_V16QI:
23253 type = v2di_ftype_v16qi;
23254 break;
23255 case V2DI_FTYPE_V8HI:
23256 type = v2di_ftype_v8hi;
23257 break;
23258 case V2DI_FTYPE_V4SI:
23259 type = v2di_ftype_v4si;
23260 break;
23261 case V2SI_FTYPE_V2SI:
23262 type = v2si_ftype_v2si;
23263 break;
23264 case V2SI_FTYPE_V4SF:
23265 type = v2si_ftype_v4sf;
23266 break;
23267 case V2SI_FTYPE_V2DF:
23268 type = v2si_ftype_v2df;
23269 break;
23270 case V2SI_FTYPE_V2SF:
23271 type = v2si_ftype_v2sf;
23272 break;
23273 case V2DF_FTYPE_V4DF:
23274 type = v2df_ftype_v4df;
23275 break;
23276 case V2DF_FTYPE_V4SF:
23277 type = v2df_ftype_v4sf;
23278 break;
23279 case V2DF_FTYPE_V2DF:
23280 case V2DF_FTYPE_V2DF_VEC_MERGE:
23281 type = v2df_ftype_v2df;
23282 break;
23283 case V2DF_FTYPE_V2SI:
23284 type = v2df_ftype_v2si;
23285 break;
23286 case V2DF_FTYPE_V4SI:
23287 type = v2df_ftype_v4si;
23288 break;
23289 case V2SF_FTYPE_V2SF:
23290 type = v2sf_ftype_v2sf;
23291 break;
23292 case V2SF_FTYPE_V2SI:
23293 type = v2sf_ftype_v2si;
23294 break;
23295 case V16QI_FTYPE_V16QI_V16QI:
23296 type = v16qi_ftype_v16qi_v16qi;
23297 break;
23298 case V16QI_FTYPE_V8HI_V8HI:
23299 type = v16qi_ftype_v8hi_v8hi;
23300 break;
23301 case V8QI_FTYPE_V8QI_V8QI:
23302 type = v8qi_ftype_v8qi_v8qi;
23303 break;
23304 case V8QI_FTYPE_V4HI_V4HI:
23305 type = v8qi_ftype_v4hi_v4hi;
23306 break;
23307 case V8HI_FTYPE_V8HI_V8HI:
23308 case V8HI_FTYPE_V8HI_V8HI_COUNT:
23309 type = v8hi_ftype_v8hi_v8hi;
23310 break;
23311 case V8HI_FTYPE_V16QI_V16QI:
23312 type = v8hi_ftype_v16qi_v16qi;
23313 break;
23314 case V8HI_FTYPE_V4SI_V4SI:
23315 type = v8hi_ftype_v4si_v4si;
23316 break;
23317 case V8HI_FTYPE_V8HI_SI_COUNT:
23318 type = v8hi_ftype_v8hi_int;
23319 break;
23320 case V8SF_FTYPE_V8SF_V8SF:
23321 type = v8sf_ftype_v8sf_v8sf;
23322 break;
23323 case V8SF_FTYPE_V8SF_V8SI:
23324 type = v8sf_ftype_v8sf_v8si;
23325 break;
23326 case V4SI_FTYPE_V4SI_V4SI:
23327 case V4SI_FTYPE_V4SI_V4SI_COUNT:
23328 type = v4si_ftype_v4si_v4si;
23329 break;
23330 case V4SI_FTYPE_V8HI_V8HI:
23331 type = v4si_ftype_v8hi_v8hi;
23332 break;
23333 case V4SI_FTYPE_V4SF_V4SF:
23334 type = v4si_ftype_v4sf_v4sf;
23335 break;
23336 case V4SI_FTYPE_V2DF_V2DF:
23337 type = v4si_ftype_v2df_v2df;
23338 break;
23339 case V4SI_FTYPE_V4SI_SI_COUNT:
23340 type = v4si_ftype_v4si_int;
23341 break;
23342 case V4HI_FTYPE_V4HI_V4HI:
23343 case V4HI_FTYPE_V4HI_V4HI_COUNT:
23344 type = v4hi_ftype_v4hi_v4hi;
23345 break;
23346 case V4HI_FTYPE_V8QI_V8QI:
23347 type = v4hi_ftype_v8qi_v8qi;
23348 break;
23349 case V4HI_FTYPE_V2SI_V2SI:
23350 type = v4hi_ftype_v2si_v2si;
23351 break;
23352 case V4HI_FTYPE_V4HI_SI_COUNT:
23353 type = v4hi_ftype_v4hi_int;
23354 break;
23355 case V4DF_FTYPE_V4DF_V4DF:
23356 type = v4df_ftype_v4df_v4df;
23357 break;
23358 case V4DF_FTYPE_V4DF_V4DI:
23359 type = v4df_ftype_v4df_v4di;
23360 break;
23361 case V4SF_FTYPE_V4SF_V4SF:
23362 case V4SF_FTYPE_V4SF_V4SF_SWAP:
23363 type = v4sf_ftype_v4sf_v4sf;
23364 break;
23365 case V4SF_FTYPE_V4SF_V4SI:
23366 type = v4sf_ftype_v4sf_v4si;
23367 break;
23368 case V4SF_FTYPE_V4SF_V2SI:
23369 type = v4sf_ftype_v4sf_v2si;
23370 break;
23371 case V4SF_FTYPE_V4SF_V2DF:
23372 type = v4sf_ftype_v4sf_v2df;
23373 break;
23374 case V4SF_FTYPE_V4SF_DI:
23375 type = v4sf_ftype_v4sf_int64;
23376 break;
23377 case V4SF_FTYPE_V4SF_SI:
23378 type = v4sf_ftype_v4sf_int;
23379 break;
23380 case V2DI_FTYPE_V2DI_V2DI:
23381 case V2DI_FTYPE_V2DI_V2DI_COUNT:
23382 type = v2di_ftype_v2di_v2di;
23383 break;
23384 case V2DI_FTYPE_V16QI_V16QI:
23385 type = v2di_ftype_v16qi_v16qi;
23386 break;
23387 case V2DI_FTYPE_V4SI_V4SI:
23388 type = v2di_ftype_v4si_v4si;
23389 break;
23390 case V2DI_FTYPE_V2DI_V16QI:
23391 type = v2di_ftype_v2di_v16qi;
23392 break;
23393 case V2DI_FTYPE_V2DF_V2DF:
23394 type = v2di_ftype_v2df_v2df;
23395 break;
23396 case V2DI_FTYPE_V2DI_SI_COUNT:
23397 type = v2di_ftype_v2di_int;
23398 break;
23399 case V2SI_FTYPE_V2SI_V2SI:
23400 case V2SI_FTYPE_V2SI_V2SI_COUNT:
23401 type = v2si_ftype_v2si_v2si;
23402 break;
23403 case V2SI_FTYPE_V4HI_V4HI:
23404 type = v2si_ftype_v4hi_v4hi;
23405 break;
23406 case V2SI_FTYPE_V2SF_V2SF:
23407 type = v2si_ftype_v2sf_v2sf;
23408 break;
23409 case V2SI_FTYPE_V2SI_SI_COUNT:
23410 type = v2si_ftype_v2si_int;
23411 break;
23412 case V2DF_FTYPE_V2DF_V2DF:
23413 case V2DF_FTYPE_V2DF_V2DF_SWAP:
23414 type = v2df_ftype_v2df_v2df;
23415 break;
23416 case V2DF_FTYPE_V2DF_V4SF:
23417 type = v2df_ftype_v2df_v4sf;
23418 break;
23419 case V2DF_FTYPE_V2DF_V2DI:
23420 type = v2df_ftype_v2df_v2di;
23421 break;
23422 case V2DF_FTYPE_V2DF_DI:
23423 type = v2df_ftype_v2df_int64;
23424 break;
23425 case V2DF_FTYPE_V2DF_SI:
23426 type = v2df_ftype_v2df_int;
23427 break;
23428 case V2SF_FTYPE_V2SF_V2SF:
23429 type = v2sf_ftype_v2sf_v2sf;
23430 break;
23431 case V1DI_FTYPE_V1DI_V1DI:
23432 case V1DI_FTYPE_V1DI_V1DI_COUNT:
23433 type = v1di_ftype_v1di_v1di;
23434 break;
23435 case V1DI_FTYPE_V8QI_V8QI:
23436 type = v1di_ftype_v8qi_v8qi;
23437 break;
23438 case V1DI_FTYPE_V2SI_V2SI:
23439 type = v1di_ftype_v2si_v2si;
23440 break;
23441 case V1DI_FTYPE_V1DI_SI_COUNT:
23442 type = v1di_ftype_v1di_int;
23443 break;
23444 case UINT64_FTYPE_UINT64_UINT64:
23445 type = uint64_ftype_uint64_uint64;
23446 break;
23447 case UINT_FTYPE_UINT_UINT:
23448 type = unsigned_ftype_unsigned_unsigned;
23449 break;
23450 case UINT_FTYPE_UINT_USHORT:
23451 type = unsigned_ftype_unsigned_ushort;
23452 break;
23453 case UINT_FTYPE_UINT_UCHAR:
23454 type = unsigned_ftype_unsigned_uchar;
23455 break;
23456 case V8HI_FTYPE_V8HI_INT:
23457 type = v8hi_ftype_v8hi_int;
23458 break;
23459 case V8SF_FTYPE_V8SF_INT:
23460 type = v8sf_ftype_v8sf_int;
23461 break;
23462 case V4SI_FTYPE_V4SI_INT:
23463 type = v4si_ftype_v4si_int;
23464 break;
23465 case V4SI_FTYPE_V8SI_INT:
23466 type = v4si_ftype_v8si_int;
23467 break;
23468 case V4HI_FTYPE_V4HI_INT:
23469 type = v4hi_ftype_v4hi_int;
23470 break;
23471 case V4DF_FTYPE_V4DF_INT:
23472 type = v4df_ftype_v4df_int;
23473 break;
23474 case V4SF_FTYPE_V4SF_INT:
23475 type = v4sf_ftype_v4sf_int;
23476 break;
23477 case V4SF_FTYPE_V8SF_INT:
23478 type = v4sf_ftype_v8sf_int;
23479 break;
23480 case V2DI_FTYPE_V2DI_INT:
23481 case V2DI2TI_FTYPE_V2DI_INT:
23482 type = v2di_ftype_v2di_int;
23483 break;
23484 case V2DF_FTYPE_V2DF_INT:
23485 type = v2df_ftype_v2df_int;
23486 break;
23487 case V2DF_FTYPE_V4DF_INT:
23488 type = v2df_ftype_v4df_int;
23489 break;
23490 case V16QI_FTYPE_V16QI_V16QI_V16QI:
23491 type = v16qi_ftype_v16qi_v16qi_v16qi;
23492 break;
23493 case V8SF_FTYPE_V8SF_V8SF_V8SF:
23494 type = v8sf_ftype_v8sf_v8sf_v8sf;
23495 break;
23496 case V4DF_FTYPE_V4DF_V4DF_V4DF:
23497 type = v4df_ftype_v4df_v4df_v4df;
23498 break;
23499 case V4SF_FTYPE_V4SF_V4SF_V4SF:
23500 type = v4sf_ftype_v4sf_v4sf_v4sf;
23501 break;
23502 case V2DF_FTYPE_V2DF_V2DF_V2DF:
23503 type = v2df_ftype_v2df_v2df_v2df;
23504 break;
23505 case V16QI_FTYPE_V16QI_V16QI_INT:
23506 type = v16qi_ftype_v16qi_v16qi_int;
23507 break;
23508 case V8SI_FTYPE_V8SI_V8SI_INT:
23509 type = v8si_ftype_v8si_v8si_int;
23510 break;
23511 case V8SI_FTYPE_V8SI_V4SI_INT:
23512 type = v8si_ftype_v8si_v4si_int;
23513 break;
23514 case V8HI_FTYPE_V8HI_V8HI_INT:
23515 type = v8hi_ftype_v8hi_v8hi_int;
23516 break;
23517 case V8SF_FTYPE_V8SF_V8SF_INT:
23518 type = v8sf_ftype_v8sf_v8sf_int;
23519 break;
23520 case V8SF_FTYPE_V8SF_V4SF_INT:
23521 type = v8sf_ftype_v8sf_v4sf_int;
23522 break;
23523 case V4SI_FTYPE_V4SI_V4SI_INT:
23524 type = v4si_ftype_v4si_v4si_int;
23525 break;
23526 case V4DF_FTYPE_V4DF_V4DF_INT:
23527 type = v4df_ftype_v4df_v4df_int;
23528 break;
23529 case V4DF_FTYPE_V4DF_V2DF_INT:
23530 type = v4df_ftype_v4df_v2df_int;
23531 break;
23532 case V4SF_FTYPE_V4SF_V4SF_INT:
23533 type = v4sf_ftype_v4sf_v4sf_int;
23534 break;
23535 case V2DI_FTYPE_V2DI_V2DI_INT:
23536 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
23537 type = v2di_ftype_v2di_v2di_int;
23538 break;
23539 case V2DF_FTYPE_V2DF_V2DF_INT:
23540 type = v2df_ftype_v2df_v2df_int;
23541 break;
23542 case V2DI_FTYPE_V2DI_UINT_UINT:
23543 type = v2di_ftype_v2di_unsigned_unsigned;
23544 break;
23545 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
23546 type = v2di_ftype_v2di_v2di_unsigned_unsigned;
23547 break;
23548 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
23549 type = v1di_ftype_v1di_v1di_int;
23550 break;
23551 default:
23552 gcc_unreachable ();
23553 }
23554
23555 def_builtin_const (d->mask, d->name, type, d->code);
23556 }
23557
23558 /* pcmpestr[im] insns. */
23559 for (i = 0, d = bdesc_pcmpestr;
23560 i < ARRAY_SIZE (bdesc_pcmpestr);
23561 i++, d++)
23562 {
23563 if (d->code == IX86_BUILTIN_PCMPESTRM128)
23564 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
23565 else
23566 ftype = int_ftype_v16qi_int_v16qi_int_int;
23567 def_builtin_const (d->mask, d->name, ftype, d->code);
23568 }
23569
23570 /* pcmpistr[im] insns. */
23571 for (i = 0, d = bdesc_pcmpistr;
23572 i < ARRAY_SIZE (bdesc_pcmpistr);
23573 i++, d++)
23574 {
23575 if (d->code == IX86_BUILTIN_PCMPISTRM128)
23576 ftype = v16qi_ftype_v16qi_v16qi_int;
23577 else
23578 ftype = int_ftype_v16qi_v16qi_int;
23579 def_builtin_const (d->mask, d->name, ftype, d->code);
23580 }
23581
23582 /* comi/ucomi insns. */
23583 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
23584 if (d->mask == OPTION_MASK_ISA_SSE2)
23585 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
23586 else
23587 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
23588
23589 /* SSE */
23590 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
23591 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
23592
23593 /* SSE or 3DNow!A */
23594 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
23595
23596 /* SSE2 */
23597 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
23598
23599 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
23600 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
23601
23602 /* SSE3. */
23603 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
23604 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
23605
23606 /* AES */
23607 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
23608 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
23609 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
23610 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
23611 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
23612 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
23613
23614 /* PCLMUL */
23615 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
23616
23617 /* AVX */
23618 def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_vzeroupper", void_ftype_void,
23619 TARGET_64BIT ? IX86_BUILTIN_VZEROUPPER_REX64 : IX86_BUILTIN_VZEROUPPER);
23620
23621 /* Access to the vec_init patterns. */
23622 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
23623 integer_type_node, NULL_TREE);
23624 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
23625
23626 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
23627 short_integer_type_node,
23628 short_integer_type_node,
23629 short_integer_type_node, NULL_TREE);
23630 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
23631
23632 ftype = build_function_type_list (V8QI_type_node, char_type_node,
23633 char_type_node, char_type_node,
23634 char_type_node, char_type_node,
23635 char_type_node, char_type_node,
23636 char_type_node, NULL_TREE);
23637 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
23638
23639 /* Access to the vec_extract patterns. */
23640 ftype = build_function_type_list (double_type_node, V2DF_type_node,
23641 integer_type_node, NULL_TREE);
23642 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
23643
23644 ftype = build_function_type_list (long_long_integer_type_node,
23645 V2DI_type_node, integer_type_node,
23646 NULL_TREE);
23647 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
23648
23649 ftype = build_function_type_list (float_type_node, V4SF_type_node,
23650 integer_type_node, NULL_TREE);
23651 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
23652
23653 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
23654 integer_type_node, NULL_TREE);
23655 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
23656
23657 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
23658 integer_type_node, NULL_TREE);
23659 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
23660
23661 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
23662 integer_type_node, NULL_TREE);
23663 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
23664
23665 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
23666 integer_type_node, NULL_TREE);
23667 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
23668
23669 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
23670 integer_type_node, NULL_TREE);
23671 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
23672
23673 /* Access to the vec_set patterns. */
23674 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
23675 intDI_type_node,
23676 integer_type_node, NULL_TREE);
23677 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
23678
23679 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
23680 float_type_node,
23681 integer_type_node, NULL_TREE);
23682 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
23683
23684 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
23685 intSI_type_node,
23686 integer_type_node, NULL_TREE);
23687 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
23688
23689 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
23690 intHI_type_node,
23691 integer_type_node, NULL_TREE);
23692 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
23693
23694 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
23695 intHI_type_node,
23696 integer_type_node, NULL_TREE);
23697 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
23698
23699 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
23700 intQI_type_node,
23701 integer_type_node, NULL_TREE);
23702 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
23703
23704 /* Add SSE5 multi-arg argument instructions */
23705 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
23706 {
23707 tree mtype = NULL_TREE;
23708
23709 if (d->name == 0)
23710 continue;
23711
23712 switch ((enum multi_arg_type)d->flag)
23713 {
23714 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
23715 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
23716 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
23717 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
23718 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
23719 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
23720 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
23721 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
23722 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
23723 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
23724 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
23725 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
23726 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
23727 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
23728 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
23729 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
23730 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
23731 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
23732 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
23733 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
23734 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
23735 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
23736 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
23737 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
23738 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
23739 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
23740 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
23741 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
23742 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
23743 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
23744 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
23745 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
23746 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
23747 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
23748 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
23749 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
23750 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
23751 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
23752 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
23753 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
23754 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
23755 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
23756 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
23757 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
23758 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
23759 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
23760 case MULTI_ARG_UNKNOWN:
23761 default:
23762 gcc_unreachable ();
23763 }
23764
23765 if (mtype)
23766 def_builtin_const (d->mask, d->name, mtype, d->code);
23767 }
23768 }
23769
23770 /* Internal method for ix86_init_builtins. */
23771
23772 static void
23773 ix86_init_builtins_va_builtins_abi (void)
23774 {
23775 tree ms_va_ref, sysv_va_ref;
23776 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
23777 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
23778 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
23779 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
23780
23781 if (!TARGET_64BIT)
23782 return;
23783 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
23784 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
23785 ms_va_ref = build_reference_type (ms_va_list_type_node);
23786 sysv_va_ref =
23787 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
23788
23789 fnvoid_va_end_ms =
23790 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23791 fnvoid_va_start_ms =
23792 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23793 fnvoid_va_end_sysv =
23794 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
23795 fnvoid_va_start_sysv =
23796 build_varargs_function_type_list (void_type_node, sysv_va_ref,
23797 NULL_TREE);
23798 fnvoid_va_copy_ms =
23799 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
23800 NULL_TREE);
23801 fnvoid_va_copy_sysv =
23802 build_function_type_list (void_type_node, sysv_va_ref,
23803 sysv_va_ref, NULL_TREE);
23804
23805 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
23806 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
23807 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
23808 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
23809 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
23810 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
23811 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
23812 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23813 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
23814 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23815 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
23816 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23817 }
23818
23819 static void
23820 ix86_init_builtins (void)
23821 {
23822 tree float128_type_node = make_node (REAL_TYPE);
23823 tree ftype, decl;
23824
23825 /* The __float80 type. */
23826 if (TYPE_MODE (long_double_type_node) == XFmode)
23827 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
23828 "__float80");
23829 else
23830 {
23831 /* The __float80 type. */
23832 tree float80_type_node = make_node (REAL_TYPE);
23833
23834 TYPE_PRECISION (float80_type_node) = 80;
23835 layout_type (float80_type_node);
23836 (*lang_hooks.types.register_builtin_type) (float80_type_node,
23837 "__float80");
23838 }
23839
23840 /* The __float128 type. */
23841 TYPE_PRECISION (float128_type_node) = 128;
23842 layout_type (float128_type_node);
23843 (*lang_hooks.types.register_builtin_type) (float128_type_node,
23844 "__float128");
23845
23846 /* TFmode support builtins. */
23847 ftype = build_function_type (float128_type_node, void_list_node);
23848 decl = add_builtin_function ("__builtin_infq", ftype,
23849 IX86_BUILTIN_INFQ, BUILT_IN_MD,
23850 NULL, NULL_TREE);
23851 ix86_builtins[(int) IX86_BUILTIN_INFQ] = decl;
23852
23853 decl = add_builtin_function ("__builtin_huge_valq", ftype,
23854 IX86_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
23855 NULL, NULL_TREE);
23856 ix86_builtins[(int) IX86_BUILTIN_HUGE_VALQ] = decl;
23857
23858 /* We will expand them to normal call if SSE2 isn't available since
23859 they are used by libgcc. */
23860 ftype = build_function_type_list (float128_type_node,
23861 float128_type_node,
23862 NULL_TREE);
23863 decl = add_builtin_function ("__builtin_fabsq", ftype,
23864 IX86_BUILTIN_FABSQ, BUILT_IN_MD,
23865 "__fabstf2", NULL_TREE);
23866 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
23867 TREE_READONLY (decl) = 1;
23868
23869 ftype = build_function_type_list (float128_type_node,
23870 float128_type_node,
23871 float128_type_node,
23872 NULL_TREE);
23873 decl = add_builtin_function ("__builtin_copysignq", ftype,
23874 IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
23875 "__copysigntf3", NULL_TREE);
23876 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
23877 TREE_READONLY (decl) = 1;
23878
23879 ix86_init_mmx_sse_builtins ();
23880 if (TARGET_64BIT)
23881 ix86_init_builtins_va_builtins_abi ();
23882 }
23883
23884 /* Errors in the source file can cause expand_expr to return const0_rtx
23885 where we expect a vector. To avoid crashing, use one of the vector
23886 clear instructions. */
23887 static rtx
23888 safe_vector_operand (rtx x, enum machine_mode mode)
23889 {
23890 if (x == const0_rtx)
23891 x = CONST0_RTX (mode);
23892 return x;
23893 }
23894
23895 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
23896
23897 static rtx
23898 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
23899 {
23900 rtx pat;
23901 tree arg0 = CALL_EXPR_ARG (exp, 0);
23902 tree arg1 = CALL_EXPR_ARG (exp, 1);
23903 rtx op0 = expand_normal (arg0);
23904 rtx op1 = expand_normal (arg1);
23905 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23906 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23907 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
23908
23909 if (VECTOR_MODE_P (mode0))
23910 op0 = safe_vector_operand (op0, mode0);
23911 if (VECTOR_MODE_P (mode1))
23912 op1 = safe_vector_operand (op1, mode1);
23913
23914 if (optimize || !target
23915 || GET_MODE (target) != tmode
23916 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23917 target = gen_reg_rtx (tmode);
23918
23919 if (GET_MODE (op1) == SImode && mode1 == TImode)
23920 {
23921 rtx x = gen_reg_rtx (V4SImode);
23922 emit_insn (gen_sse2_loadd (x, op1));
23923 op1 = gen_lowpart (TImode, x);
23924 }
23925
23926 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
23927 op0 = copy_to_mode_reg (mode0, op0);
23928 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
23929 op1 = copy_to_mode_reg (mode1, op1);
23930
23931 pat = GEN_FCN (icode) (target, op0, op1);
23932 if (! pat)
23933 return 0;
23934
23935 emit_insn (pat);
23936
23937 return target;
23938 }
23939
23940 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
23941
23942 static rtx
23943 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
23944 enum multi_arg_type m_type,
23945 enum rtx_code sub_code)
23946 {
23947 rtx pat;
23948 int i;
23949 int nargs;
23950 bool comparison_p = false;
23951 bool tf_p = false;
23952 bool last_arg_constant = false;
23953 int num_memory = 0;
23954 struct {
23955 rtx op;
23956 enum machine_mode mode;
23957 } args[4];
23958
23959 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23960
23961 switch (m_type)
23962 {
23963 case MULTI_ARG_3_SF:
23964 case MULTI_ARG_3_DF:
23965 case MULTI_ARG_3_DI:
23966 case MULTI_ARG_3_SI:
23967 case MULTI_ARG_3_SI_DI:
23968 case MULTI_ARG_3_HI:
23969 case MULTI_ARG_3_HI_SI:
23970 case MULTI_ARG_3_QI:
23971 case MULTI_ARG_3_PERMPS:
23972 case MULTI_ARG_3_PERMPD:
23973 nargs = 3;
23974 break;
23975
23976 case MULTI_ARG_2_SF:
23977 case MULTI_ARG_2_DF:
23978 case MULTI_ARG_2_DI:
23979 case MULTI_ARG_2_SI:
23980 case MULTI_ARG_2_HI:
23981 case MULTI_ARG_2_QI:
23982 nargs = 2;
23983 break;
23984
23985 case MULTI_ARG_2_DI_IMM:
23986 case MULTI_ARG_2_SI_IMM:
23987 case MULTI_ARG_2_HI_IMM:
23988 case MULTI_ARG_2_QI_IMM:
23989 nargs = 2;
23990 last_arg_constant = true;
23991 break;
23992
23993 case MULTI_ARG_1_SF:
23994 case MULTI_ARG_1_DF:
23995 case MULTI_ARG_1_DI:
23996 case MULTI_ARG_1_SI:
23997 case MULTI_ARG_1_HI:
23998 case MULTI_ARG_1_QI:
23999 case MULTI_ARG_1_SI_DI:
24000 case MULTI_ARG_1_HI_DI:
24001 case MULTI_ARG_1_HI_SI:
24002 case MULTI_ARG_1_QI_DI:
24003 case MULTI_ARG_1_QI_SI:
24004 case MULTI_ARG_1_QI_HI:
24005 case MULTI_ARG_1_PH2PS:
24006 case MULTI_ARG_1_PS2PH:
24007 nargs = 1;
24008 break;
24009
24010 case MULTI_ARG_2_SF_CMP:
24011 case MULTI_ARG_2_DF_CMP:
24012 case MULTI_ARG_2_DI_CMP:
24013 case MULTI_ARG_2_SI_CMP:
24014 case MULTI_ARG_2_HI_CMP:
24015 case MULTI_ARG_2_QI_CMP:
24016 nargs = 2;
24017 comparison_p = true;
24018 break;
24019
24020 case MULTI_ARG_2_SF_TF:
24021 case MULTI_ARG_2_DF_TF:
24022 case MULTI_ARG_2_DI_TF:
24023 case MULTI_ARG_2_SI_TF:
24024 case MULTI_ARG_2_HI_TF:
24025 case MULTI_ARG_2_QI_TF:
24026 nargs = 2;
24027 tf_p = true;
24028 break;
24029
24030 case MULTI_ARG_UNKNOWN:
24031 default:
24032 gcc_unreachable ();
24033 }
24034
24035 if (optimize || !target
24036 || GET_MODE (target) != tmode
24037 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24038 target = gen_reg_rtx (tmode);
24039
24040 gcc_assert (nargs <= 4);
24041
24042 for (i = 0; i < nargs; i++)
24043 {
24044 tree arg = CALL_EXPR_ARG (exp, i);
24045 rtx op = expand_normal (arg);
24046 int adjust = (comparison_p) ? 1 : 0;
24047 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
24048
24049 if (last_arg_constant && i == nargs-1)
24050 {
24051 if (GET_CODE (op) != CONST_INT)
24052 {
24053 error ("last argument must be an immediate");
24054 return gen_reg_rtx (tmode);
24055 }
24056 }
24057 else
24058 {
24059 if (VECTOR_MODE_P (mode))
24060 op = safe_vector_operand (op, mode);
24061
24062 /* If we aren't optimizing, only allow one memory operand to be
24063 generated. */
24064 if (memory_operand (op, mode))
24065 num_memory++;
24066
24067 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
24068
24069 if (optimize
24070 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
24071 || num_memory > 1)
24072 op = force_reg (mode, op);
24073 }
24074
24075 args[i].op = op;
24076 args[i].mode = mode;
24077 }
24078
24079 switch (nargs)
24080 {
24081 case 1:
24082 pat = GEN_FCN (icode) (target, args[0].op);
24083 break;
24084
24085 case 2:
24086 if (tf_p)
24087 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
24088 GEN_INT ((int)sub_code));
24089 else if (! comparison_p)
24090 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24091 else
24092 {
24093 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
24094 args[0].op,
24095 args[1].op);
24096
24097 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
24098 }
24099 break;
24100
24101 case 3:
24102 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
24103 break;
24104
24105 default:
24106 gcc_unreachable ();
24107 }
24108
24109 if (! pat)
24110 return 0;
24111
24112 emit_insn (pat);
24113 return target;
24114 }
24115
24116 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
24117 insns with vec_merge. */
24118
24119 static rtx
24120 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
24121 rtx target)
24122 {
24123 rtx pat;
24124 tree arg0 = CALL_EXPR_ARG (exp, 0);
24125 rtx op1, op0 = expand_normal (arg0);
24126 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24127 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24128
24129 if (optimize || !target
24130 || GET_MODE (target) != tmode
24131 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24132 target = gen_reg_rtx (tmode);
24133
24134 if (VECTOR_MODE_P (mode0))
24135 op0 = safe_vector_operand (op0, mode0);
24136
24137 if ((optimize && !register_operand (op0, mode0))
24138 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24139 op0 = copy_to_mode_reg (mode0, op0);
24140
24141 op1 = op0;
24142 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
24143 op1 = copy_to_mode_reg (mode0, op1);
24144
24145 pat = GEN_FCN (icode) (target, op0, op1);
24146 if (! pat)
24147 return 0;
24148 emit_insn (pat);
24149 return target;
24150 }
24151
24152 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
24153
24154 static rtx
24155 ix86_expand_sse_compare (const struct builtin_description *d,
24156 tree exp, rtx target, bool swap)
24157 {
24158 rtx pat;
24159 tree arg0 = CALL_EXPR_ARG (exp, 0);
24160 tree arg1 = CALL_EXPR_ARG (exp, 1);
24161 rtx op0 = expand_normal (arg0);
24162 rtx op1 = expand_normal (arg1);
24163 rtx op2;
24164 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
24165 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
24166 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
24167 enum rtx_code comparison = d->comparison;
24168
24169 if (VECTOR_MODE_P (mode0))
24170 op0 = safe_vector_operand (op0, mode0);
24171 if (VECTOR_MODE_P (mode1))
24172 op1 = safe_vector_operand (op1, mode1);
24173
24174 /* Swap operands if we have a comparison that isn't available in
24175 hardware. */
24176 if (swap)
24177 {
24178 rtx tmp = gen_reg_rtx (mode1);
24179 emit_move_insn (tmp, op1);
24180 op1 = op0;
24181 op0 = tmp;
24182 }
24183
24184 if (optimize || !target
24185 || GET_MODE (target) != tmode
24186 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
24187 target = gen_reg_rtx (tmode);
24188
24189 if ((optimize && !register_operand (op0, mode0))
24190 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
24191 op0 = copy_to_mode_reg (mode0, op0);
24192 if ((optimize && !register_operand (op1, mode1))
24193 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
24194 op1 = copy_to_mode_reg (mode1, op1);
24195
24196 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
24197 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
24198 if (! pat)
24199 return 0;
24200 emit_insn (pat);
24201 return target;
24202 }
24203
24204 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
24205
24206 static rtx
24207 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
24208 rtx target)
24209 {
24210 rtx pat;
24211 tree arg0 = CALL_EXPR_ARG (exp, 0);
24212 tree arg1 = CALL_EXPR_ARG (exp, 1);
24213 rtx op0 = expand_normal (arg0);
24214 rtx op1 = expand_normal (arg1);
24215 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24216 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24217 enum rtx_code comparison = d->comparison;
24218
24219 if (VECTOR_MODE_P (mode0))
24220 op0 = safe_vector_operand (op0, mode0);
24221 if (VECTOR_MODE_P (mode1))
24222 op1 = safe_vector_operand (op1, mode1);
24223
24224 /* Swap operands if we have a comparison that isn't available in
24225 hardware. */
24226 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
24227 {
24228 rtx tmp = op1;
24229 op1 = op0;
24230 op0 = tmp;
24231 }
24232
24233 target = gen_reg_rtx (SImode);
24234 emit_move_insn (target, const0_rtx);
24235 target = gen_rtx_SUBREG (QImode, target, 0);
24236
24237 if ((optimize && !register_operand (op0, mode0))
24238 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
24239 op0 = copy_to_mode_reg (mode0, op0);
24240 if ((optimize && !register_operand (op1, mode1))
24241 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
24242 op1 = copy_to_mode_reg (mode1, op1);
24243
24244 pat = GEN_FCN (d->icode) (op0, op1);
24245 if (! pat)
24246 return 0;
24247 emit_insn (pat);
24248 emit_insn (gen_rtx_SET (VOIDmode,
24249 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24250 gen_rtx_fmt_ee (comparison, QImode,
24251 SET_DEST (pat),
24252 const0_rtx)));
24253
24254 return SUBREG_REG (target);
24255 }
24256
24257 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
24258
24259 static rtx
24260 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
24261 rtx target)
24262 {
24263 rtx pat;
24264 tree arg0 = CALL_EXPR_ARG (exp, 0);
24265 tree arg1 = CALL_EXPR_ARG (exp, 1);
24266 rtx op0 = expand_normal (arg0);
24267 rtx op1 = expand_normal (arg1);
24268 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24269 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24270 enum rtx_code comparison = d->comparison;
24271
24272 if (VECTOR_MODE_P (mode0))
24273 op0 = safe_vector_operand (op0, mode0);
24274 if (VECTOR_MODE_P (mode1))
24275 op1 = safe_vector_operand (op1, mode1);
24276
24277 target = gen_reg_rtx (SImode);
24278 emit_move_insn (target, const0_rtx);
24279 target = gen_rtx_SUBREG (QImode, target, 0);
24280
24281 if ((optimize && !register_operand (op0, mode0))
24282 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
24283 op0 = copy_to_mode_reg (mode0, op0);
24284 if ((optimize && !register_operand (op1, mode1))
24285 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
24286 op1 = copy_to_mode_reg (mode1, op1);
24287
24288 pat = GEN_FCN (d->icode) (op0, op1);
24289 if (! pat)
24290 return 0;
24291 emit_insn (pat);
24292 emit_insn (gen_rtx_SET (VOIDmode,
24293 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24294 gen_rtx_fmt_ee (comparison, QImode,
24295 SET_DEST (pat),
24296 const0_rtx)));
24297
24298 return SUBREG_REG (target);
24299 }
24300
24301 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
24302
24303 static rtx
24304 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
24305 tree exp, rtx target)
24306 {
24307 rtx pat;
24308 tree arg0 = CALL_EXPR_ARG (exp, 0);
24309 tree arg1 = CALL_EXPR_ARG (exp, 1);
24310 tree arg2 = CALL_EXPR_ARG (exp, 2);
24311 tree arg3 = CALL_EXPR_ARG (exp, 3);
24312 tree arg4 = CALL_EXPR_ARG (exp, 4);
24313 rtx scratch0, scratch1;
24314 rtx op0 = expand_normal (arg0);
24315 rtx op1 = expand_normal (arg1);
24316 rtx op2 = expand_normal (arg2);
24317 rtx op3 = expand_normal (arg3);
24318 rtx op4 = expand_normal (arg4);
24319 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
24320
24321 tmode0 = insn_data[d->icode].operand[0].mode;
24322 tmode1 = insn_data[d->icode].operand[1].mode;
24323 modev2 = insn_data[d->icode].operand[2].mode;
24324 modei3 = insn_data[d->icode].operand[3].mode;
24325 modev4 = insn_data[d->icode].operand[4].mode;
24326 modei5 = insn_data[d->icode].operand[5].mode;
24327 modeimm = insn_data[d->icode].operand[6].mode;
24328
24329 if (VECTOR_MODE_P (modev2))
24330 op0 = safe_vector_operand (op0, modev2);
24331 if (VECTOR_MODE_P (modev4))
24332 op2 = safe_vector_operand (op2, modev4);
24333
24334 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
24335 op0 = copy_to_mode_reg (modev2, op0);
24336 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
24337 op1 = copy_to_mode_reg (modei3, op1);
24338 if ((optimize && !register_operand (op2, modev4))
24339 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
24340 op2 = copy_to_mode_reg (modev4, op2);
24341 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
24342 op3 = copy_to_mode_reg (modei5, op3);
24343
24344 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
24345 {
24346 error ("the fifth argument must be a 8-bit immediate");
24347 return const0_rtx;
24348 }
24349
24350 if (d->code == IX86_BUILTIN_PCMPESTRI128)
24351 {
24352 if (optimize || !target
24353 || GET_MODE (target) != tmode0
24354 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
24355 target = gen_reg_rtx (tmode0);
24356
24357 scratch1 = gen_reg_rtx (tmode1);
24358
24359 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
24360 }
24361 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
24362 {
24363 if (optimize || !target
24364 || GET_MODE (target) != tmode1
24365 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
24366 target = gen_reg_rtx (tmode1);
24367
24368 scratch0 = gen_reg_rtx (tmode0);
24369
24370 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
24371 }
24372 else
24373 {
24374 gcc_assert (d->flag);
24375
24376 scratch0 = gen_reg_rtx (tmode0);
24377 scratch1 = gen_reg_rtx (tmode1);
24378
24379 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
24380 }
24381
24382 if (! pat)
24383 return 0;
24384
24385 emit_insn (pat);
24386
24387 if (d->flag)
24388 {
24389 target = gen_reg_rtx (SImode);
24390 emit_move_insn (target, const0_rtx);
24391 target = gen_rtx_SUBREG (QImode, target, 0);
24392
24393 emit_insn
24394 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24395 gen_rtx_fmt_ee (EQ, QImode,
24396 gen_rtx_REG ((enum machine_mode) d->flag,
24397 FLAGS_REG),
24398 const0_rtx)));
24399 return SUBREG_REG (target);
24400 }
24401 else
24402 return target;
24403 }
24404
24405
24406 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
24407
24408 static rtx
24409 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
24410 tree exp, rtx target)
24411 {
24412 rtx pat;
24413 tree arg0 = CALL_EXPR_ARG (exp, 0);
24414 tree arg1 = CALL_EXPR_ARG (exp, 1);
24415 tree arg2 = CALL_EXPR_ARG (exp, 2);
24416 rtx scratch0, scratch1;
24417 rtx op0 = expand_normal (arg0);
24418 rtx op1 = expand_normal (arg1);
24419 rtx op2 = expand_normal (arg2);
24420 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
24421
24422 tmode0 = insn_data[d->icode].operand[0].mode;
24423 tmode1 = insn_data[d->icode].operand[1].mode;
24424 modev2 = insn_data[d->icode].operand[2].mode;
24425 modev3 = insn_data[d->icode].operand[3].mode;
24426 modeimm = insn_data[d->icode].operand[4].mode;
24427
24428 if (VECTOR_MODE_P (modev2))
24429 op0 = safe_vector_operand (op0, modev2);
24430 if (VECTOR_MODE_P (modev3))
24431 op1 = safe_vector_operand (op1, modev3);
24432
24433 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
24434 op0 = copy_to_mode_reg (modev2, op0);
24435 if ((optimize && !register_operand (op1, modev3))
24436 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
24437 op1 = copy_to_mode_reg (modev3, op1);
24438
24439 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
24440 {
24441 error ("the third argument must be a 8-bit immediate");
24442 return const0_rtx;
24443 }
24444
24445 if (d->code == IX86_BUILTIN_PCMPISTRI128)
24446 {
24447 if (optimize || !target
24448 || GET_MODE (target) != tmode0
24449 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
24450 target = gen_reg_rtx (tmode0);
24451
24452 scratch1 = gen_reg_rtx (tmode1);
24453
24454 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
24455 }
24456 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
24457 {
24458 if (optimize || !target
24459 || GET_MODE (target) != tmode1
24460 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
24461 target = gen_reg_rtx (tmode1);
24462
24463 scratch0 = gen_reg_rtx (tmode0);
24464
24465 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
24466 }
24467 else
24468 {
24469 gcc_assert (d->flag);
24470
24471 scratch0 = gen_reg_rtx (tmode0);
24472 scratch1 = gen_reg_rtx (tmode1);
24473
24474 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
24475 }
24476
24477 if (! pat)
24478 return 0;
24479
24480 emit_insn (pat);
24481
24482 if (d->flag)
24483 {
24484 target = gen_reg_rtx (SImode);
24485 emit_move_insn (target, const0_rtx);
24486 target = gen_rtx_SUBREG (QImode, target, 0);
24487
24488 emit_insn
24489 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24490 gen_rtx_fmt_ee (EQ, QImode,
24491 gen_rtx_REG ((enum machine_mode) d->flag,
24492 FLAGS_REG),
24493 const0_rtx)));
24494 return SUBREG_REG (target);
24495 }
24496 else
24497 return target;
24498 }
24499
24500 /* Subroutine of ix86_expand_builtin to take care of insns with
24501 variable number of operands. */
24502
24503 static rtx
24504 ix86_expand_args_builtin (const struct builtin_description *d,
24505 tree exp, rtx target)
24506 {
24507 rtx pat, real_target;
24508 unsigned int i, nargs;
24509 unsigned int nargs_constant = 0;
24510 int num_memory = 0;
24511 struct
24512 {
24513 rtx op;
24514 enum machine_mode mode;
24515 } args[4];
24516 bool last_arg_count = false;
24517 enum insn_code icode = d->icode;
24518 const struct insn_data *insn_p = &insn_data[icode];
24519 enum machine_mode tmode = insn_p->operand[0].mode;
24520 enum machine_mode rmode = VOIDmode;
24521 bool swap = false;
24522 enum rtx_code comparison = d->comparison;
24523
24524 switch ((enum ix86_builtin_type) d->flag)
24525 {
24526 case INT_FTYPE_V8SF_V8SF_PTEST:
24527 case INT_FTYPE_V4DI_V4DI_PTEST:
24528 case INT_FTYPE_V4DF_V4DF_PTEST:
24529 case INT_FTYPE_V4SF_V4SF_PTEST:
24530 case INT_FTYPE_V2DI_V2DI_PTEST:
24531 case INT_FTYPE_V2DF_V2DF_PTEST:
24532 return ix86_expand_sse_ptest (d, exp, target);
24533 case FLOAT128_FTYPE_FLOAT128:
24534 case FLOAT_FTYPE_FLOAT:
24535 case INT64_FTYPE_V4SF:
24536 case INT64_FTYPE_V2DF:
24537 case INT_FTYPE_V16QI:
24538 case INT_FTYPE_V8QI:
24539 case INT_FTYPE_V8SF:
24540 case INT_FTYPE_V4DF:
24541 case INT_FTYPE_V4SF:
24542 case INT_FTYPE_V2DF:
24543 case V16QI_FTYPE_V16QI:
24544 case V8SI_FTYPE_V8SF:
24545 case V8SI_FTYPE_V4SI:
24546 case V8HI_FTYPE_V8HI:
24547 case V8HI_FTYPE_V16QI:
24548 case V8QI_FTYPE_V8QI:
24549 case V8SF_FTYPE_V8SF:
24550 case V8SF_FTYPE_V8SI:
24551 case V8SF_FTYPE_V4SF:
24552 case V4SI_FTYPE_V4SI:
24553 case V4SI_FTYPE_V16QI:
24554 case V4SI_FTYPE_V4SF:
24555 case V4SI_FTYPE_V8SI:
24556 case V4SI_FTYPE_V8HI:
24557 case V4SI_FTYPE_V4DF:
24558 case V4SI_FTYPE_V2DF:
24559 case V4HI_FTYPE_V4HI:
24560 case V4DF_FTYPE_V4DF:
24561 case V4DF_FTYPE_V4SI:
24562 case V4DF_FTYPE_V4SF:
24563 case V4DF_FTYPE_V2DF:
24564 case V4SF_FTYPE_V4SF:
24565 case V4SF_FTYPE_V4SI:
24566 case V4SF_FTYPE_V8SF:
24567 case V4SF_FTYPE_V4DF:
24568 case V4SF_FTYPE_V2DF:
24569 case V2DI_FTYPE_V2DI:
24570 case V2DI_FTYPE_V16QI:
24571 case V2DI_FTYPE_V8HI:
24572 case V2DI_FTYPE_V4SI:
24573 case V2DF_FTYPE_V2DF:
24574 case V2DF_FTYPE_V4SI:
24575 case V2DF_FTYPE_V4DF:
24576 case V2DF_FTYPE_V4SF:
24577 case V2DF_FTYPE_V2SI:
24578 case V2SI_FTYPE_V2SI:
24579 case V2SI_FTYPE_V4SF:
24580 case V2SI_FTYPE_V2SF:
24581 case V2SI_FTYPE_V2DF:
24582 case V2SF_FTYPE_V2SF:
24583 case V2SF_FTYPE_V2SI:
24584 nargs = 1;
24585 break;
24586 case V4SF_FTYPE_V4SF_VEC_MERGE:
24587 case V2DF_FTYPE_V2DF_VEC_MERGE:
24588 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
24589 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
24590 case V16QI_FTYPE_V16QI_V16QI:
24591 case V16QI_FTYPE_V8HI_V8HI:
24592 case V8QI_FTYPE_V8QI_V8QI:
24593 case V8QI_FTYPE_V4HI_V4HI:
24594 case V8HI_FTYPE_V8HI_V8HI:
24595 case V8HI_FTYPE_V16QI_V16QI:
24596 case V8HI_FTYPE_V4SI_V4SI:
24597 case V8SF_FTYPE_V8SF_V8SF:
24598 case V8SF_FTYPE_V8SF_V8SI:
24599 case V4SI_FTYPE_V4SI_V4SI:
24600 case V4SI_FTYPE_V8HI_V8HI:
24601 case V4SI_FTYPE_V4SF_V4SF:
24602 case V4SI_FTYPE_V2DF_V2DF:
24603 case V4HI_FTYPE_V4HI_V4HI:
24604 case V4HI_FTYPE_V8QI_V8QI:
24605 case V4HI_FTYPE_V2SI_V2SI:
24606 case V4DF_FTYPE_V4DF_V4DF:
24607 case V4DF_FTYPE_V4DF_V4DI:
24608 case V4SF_FTYPE_V4SF_V4SF:
24609 case V4SF_FTYPE_V4SF_V4SI:
24610 case V4SF_FTYPE_V4SF_V2SI:
24611 case V4SF_FTYPE_V4SF_V2DF:
24612 case V4SF_FTYPE_V4SF_DI:
24613 case V4SF_FTYPE_V4SF_SI:
24614 case V2DI_FTYPE_V2DI_V2DI:
24615 case V2DI_FTYPE_V16QI_V16QI:
24616 case V2DI_FTYPE_V4SI_V4SI:
24617 case V2DI_FTYPE_V2DI_V16QI:
24618 case V2DI_FTYPE_V2DF_V2DF:
24619 case V2SI_FTYPE_V2SI_V2SI:
24620 case V2SI_FTYPE_V4HI_V4HI:
24621 case V2SI_FTYPE_V2SF_V2SF:
24622 case V2DF_FTYPE_V2DF_V2DF:
24623 case V2DF_FTYPE_V2DF_V4SF:
24624 case V2DF_FTYPE_V2DF_V2DI:
24625 case V2DF_FTYPE_V2DF_DI:
24626 case V2DF_FTYPE_V2DF_SI:
24627 case V2SF_FTYPE_V2SF_V2SF:
24628 case V1DI_FTYPE_V1DI_V1DI:
24629 case V1DI_FTYPE_V8QI_V8QI:
24630 case V1DI_FTYPE_V2SI_V2SI:
24631 if (comparison == UNKNOWN)
24632 return ix86_expand_binop_builtin (icode, exp, target);
24633 nargs = 2;
24634 break;
24635 case V4SF_FTYPE_V4SF_V4SF_SWAP:
24636 case V2DF_FTYPE_V2DF_V2DF_SWAP:
24637 gcc_assert (comparison != UNKNOWN);
24638 nargs = 2;
24639 swap = true;
24640 break;
24641 case V8HI_FTYPE_V8HI_V8HI_COUNT:
24642 case V8HI_FTYPE_V8HI_SI_COUNT:
24643 case V4SI_FTYPE_V4SI_V4SI_COUNT:
24644 case V4SI_FTYPE_V4SI_SI_COUNT:
24645 case V4HI_FTYPE_V4HI_V4HI_COUNT:
24646 case V4HI_FTYPE_V4HI_SI_COUNT:
24647 case V2DI_FTYPE_V2DI_V2DI_COUNT:
24648 case V2DI_FTYPE_V2DI_SI_COUNT:
24649 case V2SI_FTYPE_V2SI_V2SI_COUNT:
24650 case V2SI_FTYPE_V2SI_SI_COUNT:
24651 case V1DI_FTYPE_V1DI_V1DI_COUNT:
24652 case V1DI_FTYPE_V1DI_SI_COUNT:
24653 nargs = 2;
24654 last_arg_count = true;
24655 break;
24656 case UINT64_FTYPE_UINT64_UINT64:
24657 case UINT_FTYPE_UINT_UINT:
24658 case UINT_FTYPE_UINT_USHORT:
24659 case UINT_FTYPE_UINT_UCHAR:
24660 nargs = 2;
24661 break;
24662 case V2DI2TI_FTYPE_V2DI_INT:
24663 nargs = 2;
24664 rmode = V2DImode;
24665 nargs_constant = 1;
24666 break;
24667 case V8HI_FTYPE_V8HI_INT:
24668 case V8SF_FTYPE_V8SF_INT:
24669 case V4SI_FTYPE_V4SI_INT:
24670 case V4SI_FTYPE_V8SI_INT:
24671 case V4HI_FTYPE_V4HI_INT:
24672 case V4DF_FTYPE_V4DF_INT:
24673 case V4SF_FTYPE_V4SF_INT:
24674 case V4SF_FTYPE_V8SF_INT:
24675 case V2DI_FTYPE_V2DI_INT:
24676 case V2DF_FTYPE_V2DF_INT:
24677 case V2DF_FTYPE_V4DF_INT:
24678 nargs = 2;
24679 nargs_constant = 1;
24680 break;
24681 case V16QI_FTYPE_V16QI_V16QI_V16QI:
24682 case V8SF_FTYPE_V8SF_V8SF_V8SF:
24683 case V4DF_FTYPE_V4DF_V4DF_V4DF:
24684 case V4SF_FTYPE_V4SF_V4SF_V4SF:
24685 case V2DF_FTYPE_V2DF_V2DF_V2DF:
24686 nargs = 3;
24687 break;
24688 case V16QI_FTYPE_V16QI_V16QI_INT:
24689 case V8HI_FTYPE_V8HI_V8HI_INT:
24690 case V8SI_FTYPE_V8SI_V8SI_INT:
24691 case V8SI_FTYPE_V8SI_V4SI_INT:
24692 case V8SF_FTYPE_V8SF_V8SF_INT:
24693 case V8SF_FTYPE_V8SF_V4SF_INT:
24694 case V4SI_FTYPE_V4SI_V4SI_INT:
24695 case V4DF_FTYPE_V4DF_V4DF_INT:
24696 case V4DF_FTYPE_V4DF_V2DF_INT:
24697 case V4SF_FTYPE_V4SF_V4SF_INT:
24698 case V2DI_FTYPE_V2DI_V2DI_INT:
24699 case V2DF_FTYPE_V2DF_V2DF_INT:
24700 nargs = 3;
24701 nargs_constant = 1;
24702 break;
24703 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
24704 nargs = 3;
24705 rmode = V2DImode;
24706 nargs_constant = 1;
24707 break;
24708 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
24709 nargs = 3;
24710 rmode = DImode;
24711 nargs_constant = 1;
24712 break;
24713 case V2DI_FTYPE_V2DI_UINT_UINT:
24714 nargs = 3;
24715 nargs_constant = 2;
24716 break;
24717 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
24718 nargs = 4;
24719 nargs_constant = 2;
24720 break;
24721 default:
24722 gcc_unreachable ();
24723 }
24724
24725 gcc_assert (nargs <= ARRAY_SIZE (args));
24726
24727 if (comparison != UNKNOWN)
24728 {
24729 gcc_assert (nargs == 2);
24730 return ix86_expand_sse_compare (d, exp, target, swap);
24731 }
24732
24733 if (rmode == VOIDmode || rmode == tmode)
24734 {
24735 if (optimize
24736 || target == 0
24737 || GET_MODE (target) != tmode
24738 || ! (*insn_p->operand[0].predicate) (target, tmode))
24739 target = gen_reg_rtx (tmode);
24740 real_target = target;
24741 }
24742 else
24743 {
24744 target = gen_reg_rtx (rmode);
24745 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
24746 }
24747
24748 for (i = 0; i < nargs; i++)
24749 {
24750 tree arg = CALL_EXPR_ARG (exp, i);
24751 rtx op = expand_normal (arg);
24752 enum machine_mode mode = insn_p->operand[i + 1].mode;
24753 bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
24754
24755 if (last_arg_count && (i + 1) == nargs)
24756 {
24757 /* SIMD shift insns take either an 8-bit immediate or
24758 register as count. But builtin functions take int as
24759 count. If count doesn't match, we put it in register. */
24760 if (!match)
24761 {
24762 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
24763 if (!(*insn_p->operand[i + 1].predicate) (op, mode))
24764 op = copy_to_reg (op);
24765 }
24766 }
24767 else if ((nargs - i) <= nargs_constant)
24768 {
24769 if (!match)
24770 switch (icode)
24771 {
24772 case CODE_FOR_sse4_1_roundpd:
24773 case CODE_FOR_sse4_1_roundps:
24774 case CODE_FOR_sse4_1_roundsd:
24775 case CODE_FOR_sse4_1_roundss:
24776 case CODE_FOR_sse4_1_blendps:
24777 case CODE_FOR_avx_blendpd256:
24778 case CODE_FOR_avx_vpermilv4df:
24779 case CODE_FOR_avx_roundpd256:
24780 case CODE_FOR_avx_roundps256:
24781 error ("the last argument must be a 4-bit immediate");
24782 return const0_rtx;
24783
24784 case CODE_FOR_sse4_1_blendpd:
24785 case CODE_FOR_avx_vpermilv2df:
24786 error ("the last argument must be a 2-bit immediate");
24787 return const0_rtx;
24788
24789 case CODE_FOR_avx_vextractf128v4df:
24790 case CODE_FOR_avx_vextractf128v8sf:
24791 case CODE_FOR_avx_vextractf128v8si:
24792 case CODE_FOR_avx_vinsertf128v4df:
24793 case CODE_FOR_avx_vinsertf128v8sf:
24794 case CODE_FOR_avx_vinsertf128v8si:
24795 error ("the last argument must be a 1-bit immediate");
24796 return const0_rtx;
24797
24798 case CODE_FOR_avx_cmpsdv2df3:
24799 case CODE_FOR_avx_cmpssv4sf3:
24800 case CODE_FOR_avx_cmppdv2df3:
24801 case CODE_FOR_avx_cmppsv4sf3:
24802 case CODE_FOR_avx_cmppdv4df3:
24803 case CODE_FOR_avx_cmppsv8sf3:
24804 error ("the last argument must be a 5-bit immediate");
24805 return const0_rtx;
24806
24807 default:
24808 switch (nargs_constant)
24809 {
24810 case 2:
24811 if ((nargs - i) == nargs_constant)
24812 {
24813 error ("the next to last argument must be an 8-bit immediate");
24814 break;
24815 }
24816 case 1:
24817 error ("the last argument must be an 8-bit immediate");
24818 break;
24819 default:
24820 gcc_unreachable ();
24821 }
24822 return const0_rtx;
24823 }
24824 }
24825 else
24826 {
24827 if (VECTOR_MODE_P (mode))
24828 op = safe_vector_operand (op, mode);
24829
24830 /* If we aren't optimizing, only allow one memory operand to
24831 be generated. */
24832 if (memory_operand (op, mode))
24833 num_memory++;
24834
24835 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
24836 {
24837 if (optimize || !match || num_memory > 1)
24838 op = copy_to_mode_reg (mode, op);
24839 }
24840 else
24841 {
24842 op = copy_to_reg (op);
24843 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
24844 }
24845 }
24846
24847 args[i].op = op;
24848 args[i].mode = mode;
24849 }
24850
24851 switch (nargs)
24852 {
24853 case 1:
24854 pat = GEN_FCN (icode) (real_target, args[0].op);
24855 break;
24856 case 2:
24857 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
24858 break;
24859 case 3:
24860 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24861 args[2].op);
24862 break;
24863 case 4:
24864 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24865 args[2].op, args[3].op);
24866 break;
24867 default:
24868 gcc_unreachable ();
24869 }
24870
24871 if (! pat)
24872 return 0;
24873
24874 emit_insn (pat);
24875 return target;
24876 }
24877
24878 /* Subroutine of ix86_expand_builtin to take care of special insns
24879 with variable number of operands. */
24880
24881 static rtx
24882 ix86_expand_special_args_builtin (const struct builtin_description *d,
24883 tree exp, rtx target)
24884 {
24885 tree arg;
24886 rtx pat, op;
24887 unsigned int i, nargs, arg_adjust, memory;
24888 struct
24889 {
24890 rtx op;
24891 enum machine_mode mode;
24892 } args[2];
24893 enum insn_code icode = d->icode;
24894 bool last_arg_constant = false;
24895 const struct insn_data *insn_p = &insn_data[icode];
24896 enum machine_mode tmode = insn_p->operand[0].mode;
24897 enum { load, store } klass;
24898
24899 switch ((enum ix86_special_builtin_type) d->flag)
24900 {
24901 case VOID_FTYPE_VOID:
24902 emit_insn (GEN_FCN (icode) (target));
24903 return 0;
24904 case V2DI_FTYPE_PV2DI:
24905 case V32QI_FTYPE_PCCHAR:
24906 case V16QI_FTYPE_PCCHAR:
24907 case V8SF_FTYPE_PCV4SF:
24908 case V8SF_FTYPE_PCFLOAT:
24909 case V4SF_FTYPE_PCFLOAT:
24910 case V4DF_FTYPE_PCV2DF:
24911 case V4DF_FTYPE_PCDOUBLE:
24912 case V2DF_FTYPE_PCDOUBLE:
24913 nargs = 1;
24914 klass = load;
24915 memory = 0;
24916 break;
24917 case VOID_FTYPE_PV2SF_V4SF:
24918 case VOID_FTYPE_PV4DI_V4DI:
24919 case VOID_FTYPE_PV2DI_V2DI:
24920 case VOID_FTYPE_PCHAR_V32QI:
24921 case VOID_FTYPE_PCHAR_V16QI:
24922 case VOID_FTYPE_PFLOAT_V8SF:
24923 case VOID_FTYPE_PFLOAT_V4SF:
24924 case VOID_FTYPE_PDOUBLE_V4DF:
24925 case VOID_FTYPE_PDOUBLE_V2DF:
24926 case VOID_FTYPE_PDI_DI:
24927 case VOID_FTYPE_PINT_INT:
24928 nargs = 1;
24929 klass = store;
24930 /* Reserve memory operand for target. */
24931 memory = ARRAY_SIZE (args);
24932 break;
24933 case V4SF_FTYPE_V4SF_PCV2SF:
24934 case V2DF_FTYPE_V2DF_PCDOUBLE:
24935 nargs = 2;
24936 klass = load;
24937 memory = 1;
24938 break;
24939 case V8SF_FTYPE_PCV8SF_V8SF:
24940 case V4DF_FTYPE_PCV4DF_V4DF:
24941 case V4SF_FTYPE_PCV4SF_V4SF:
24942 case V2DF_FTYPE_PCV2DF_V2DF:
24943 nargs = 2;
24944 klass = load;
24945 memory = 0;
24946 break;
24947 case VOID_FTYPE_PV8SF_V8SF_V8SF:
24948 case VOID_FTYPE_PV4DF_V4DF_V4DF:
24949 case VOID_FTYPE_PV4SF_V4SF_V4SF:
24950 case VOID_FTYPE_PV2DF_V2DF_V2DF:
24951 nargs = 2;
24952 klass = store;
24953 /* Reserve memory operand for target. */
24954 memory = ARRAY_SIZE (args);
24955 break;
24956 default:
24957 gcc_unreachable ();
24958 }
24959
24960 gcc_assert (nargs <= ARRAY_SIZE (args));
24961
24962 if (klass == store)
24963 {
24964 arg = CALL_EXPR_ARG (exp, 0);
24965 op = expand_normal (arg);
24966 gcc_assert (target == 0);
24967 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
24968 arg_adjust = 1;
24969 }
24970 else
24971 {
24972 arg_adjust = 0;
24973 if (optimize
24974 || target == 0
24975 || GET_MODE (target) != tmode
24976 || ! (*insn_p->operand[0].predicate) (target, tmode))
24977 target = gen_reg_rtx (tmode);
24978 }
24979
24980 for (i = 0; i < nargs; i++)
24981 {
24982 enum machine_mode mode = insn_p->operand[i + 1].mode;
24983 bool match;
24984
24985 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
24986 op = expand_normal (arg);
24987 match = (*insn_p->operand[i + 1].predicate) (op, mode);
24988
24989 if (last_arg_constant && (i + 1) == nargs)
24990 {
24991 if (!match)
24992 switch (icode)
24993 {
24994 default:
24995 error ("the last argument must be an 8-bit immediate");
24996 return const0_rtx;
24997 }
24998 }
24999 else
25000 {
25001 if (i == memory)
25002 {
25003 /* This must be the memory operand. */
25004 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
25005 gcc_assert (GET_MODE (op) == mode
25006 || GET_MODE (op) == VOIDmode);
25007 }
25008 else
25009 {
25010 /* This must be register. */
25011 if (VECTOR_MODE_P (mode))
25012 op = safe_vector_operand (op, mode);
25013
25014 gcc_assert (GET_MODE (op) == mode
25015 || GET_MODE (op) == VOIDmode);
25016 op = copy_to_mode_reg (mode, op);
25017 }
25018 }
25019
25020 args[i].op = op;
25021 args[i].mode = mode;
25022 }
25023
25024 switch (nargs)
25025 {
25026 case 1:
25027 pat = GEN_FCN (icode) (target, args[0].op);
25028 break;
25029 case 2:
25030 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
25031 break;
25032 default:
25033 gcc_unreachable ();
25034 }
25035
25036 if (! pat)
25037 return 0;
25038 emit_insn (pat);
25039 return klass == store ? 0 : target;
25040 }
25041
25042 /* Return the integer constant in ARG. Constrain it to be in the range
25043 of the subparts of VEC_TYPE; issue an error if not. */
25044
25045 static int
25046 get_element_number (tree vec_type, tree arg)
25047 {
25048 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
25049
25050 if (!host_integerp (arg, 1)
25051 || (elt = tree_low_cst (arg, 1), elt > max))
25052 {
25053 error ("selector must be an integer constant in the range 0..%wi", max);
25054 return 0;
25055 }
25056
25057 return elt;
25058 }
25059
25060 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25061 ix86_expand_vector_init. We DO have language-level syntax for this, in
25062 the form of (type){ init-list }. Except that since we can't place emms
25063 instructions from inside the compiler, we can't allow the use of MMX
25064 registers unless the user explicitly asks for it. So we do *not* define
25065 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
25066 we have builtins invoked by mmintrin.h that gives us license to emit
25067 these sorts of instructions. */
25068
25069 static rtx
25070 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
25071 {
25072 enum machine_mode tmode = TYPE_MODE (type);
25073 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
25074 int i, n_elt = GET_MODE_NUNITS (tmode);
25075 rtvec v = rtvec_alloc (n_elt);
25076
25077 gcc_assert (VECTOR_MODE_P (tmode));
25078 gcc_assert (call_expr_nargs (exp) == n_elt);
25079
25080 for (i = 0; i < n_elt; ++i)
25081 {
25082 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
25083 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
25084 }
25085
25086 if (!target || !register_operand (target, tmode))
25087 target = gen_reg_rtx (tmode);
25088
25089 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
25090 return target;
25091 }
25092
25093 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25094 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
25095 had a language-level syntax for referencing vector elements. */
25096
25097 static rtx
25098 ix86_expand_vec_ext_builtin (tree exp, rtx target)
25099 {
25100 enum machine_mode tmode, mode0;
25101 tree arg0, arg1;
25102 int elt;
25103 rtx op0;
25104
25105 arg0 = CALL_EXPR_ARG (exp, 0);
25106 arg1 = CALL_EXPR_ARG (exp, 1);
25107
25108 op0 = expand_normal (arg0);
25109 elt = get_element_number (TREE_TYPE (arg0), arg1);
25110
25111 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25112 mode0 = TYPE_MODE (TREE_TYPE (arg0));
25113 gcc_assert (VECTOR_MODE_P (mode0));
25114
25115 op0 = force_reg (mode0, op0);
25116
25117 if (optimize || !target || !register_operand (target, tmode))
25118 target = gen_reg_rtx (tmode);
25119
25120 ix86_expand_vector_extract (true, target, op0, elt);
25121
25122 return target;
25123 }
25124
25125 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25126 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
25127 a language-level syntax for referencing vector elements. */
25128
25129 static rtx
25130 ix86_expand_vec_set_builtin (tree exp)
25131 {
25132 enum machine_mode tmode, mode1;
25133 tree arg0, arg1, arg2;
25134 int elt;
25135 rtx op0, op1, target;
25136
25137 arg0 = CALL_EXPR_ARG (exp, 0);
25138 arg1 = CALL_EXPR_ARG (exp, 1);
25139 arg2 = CALL_EXPR_ARG (exp, 2);
25140
25141 tmode = TYPE_MODE (TREE_TYPE (arg0));
25142 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25143 gcc_assert (VECTOR_MODE_P (tmode));
25144
25145 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
25146 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
25147 elt = get_element_number (TREE_TYPE (arg0), arg2);
25148
25149 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
25150 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
25151
25152 op0 = force_reg (tmode, op0);
25153 op1 = force_reg (mode1, op1);
25154
25155 /* OP0 is the source of these builtin functions and shouldn't be
25156 modified. Create a copy, use it and return it as target. */
25157 target = gen_reg_rtx (tmode);
25158 emit_move_insn (target, op0);
25159 ix86_expand_vector_set (true, target, op1, elt);
25160
25161 return target;
25162 }
25163
25164 /* Expand an expression EXP that calls a built-in function,
25165 with result going to TARGET if that's convenient
25166 (and in mode MODE if that's convenient).
25167 SUBTARGET may be used as the target for computing one of EXP's operands.
25168 IGNORE is nonzero if the value is to be ignored. */
25169
25170 static rtx
25171 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
25172 enum machine_mode mode ATTRIBUTE_UNUSED,
25173 int ignore ATTRIBUTE_UNUSED)
25174 {
25175 const struct builtin_description *d;
25176 size_t i;
25177 enum insn_code icode;
25178 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25179 tree arg0, arg1, arg2;
25180 rtx op0, op1, op2, pat;
25181 enum machine_mode mode0, mode1, mode2;
25182 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25183
25184 /* Determine whether the builtin function is available under the current ISA.
25185 Originally the builtin was not created if it wasn't applicable to the
25186 current ISA based on the command line switches. With function specific
25187 options, we need to check in the context of the function making the call
25188 whether it is supported. */
25189 if (ix86_builtins_isa[fcode].isa
25190 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
25191 {
25192 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
25193 NULL, NULL, false);
25194
25195 if (!opts)
25196 error ("%qE needs unknown isa option", fndecl);
25197 else
25198 {
25199 gcc_assert (opts != NULL);
25200 error ("%qE needs isa option %s", fndecl, opts);
25201 free (opts);
25202 }
25203 return const0_rtx;
25204 }
25205
25206 switch (fcode)
25207 {
25208 case IX86_BUILTIN_MASKMOVQ:
25209 case IX86_BUILTIN_MASKMOVDQU:
25210 icode = (fcode == IX86_BUILTIN_MASKMOVQ
25211 ? CODE_FOR_mmx_maskmovq
25212 : CODE_FOR_sse2_maskmovdqu);
25213 /* Note the arg order is different from the operand order. */
25214 arg1 = CALL_EXPR_ARG (exp, 0);
25215 arg2 = CALL_EXPR_ARG (exp, 1);
25216 arg0 = CALL_EXPR_ARG (exp, 2);
25217 op0 = expand_normal (arg0);
25218 op1 = expand_normal (arg1);
25219 op2 = expand_normal (arg2);
25220 mode0 = insn_data[icode].operand[0].mode;
25221 mode1 = insn_data[icode].operand[1].mode;
25222 mode2 = insn_data[icode].operand[2].mode;
25223
25224 op0 = force_reg (Pmode, op0);
25225 op0 = gen_rtx_MEM (mode1, op0);
25226
25227 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
25228 op0 = copy_to_mode_reg (mode0, op0);
25229 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
25230 op1 = copy_to_mode_reg (mode1, op1);
25231 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
25232 op2 = copy_to_mode_reg (mode2, op2);
25233 pat = GEN_FCN (icode) (op0, op1, op2);
25234 if (! pat)
25235 return 0;
25236 emit_insn (pat);
25237 return 0;
25238
25239 case IX86_BUILTIN_LDMXCSR:
25240 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
25241 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25242 emit_move_insn (target, op0);
25243 emit_insn (gen_sse_ldmxcsr (target));
25244 return 0;
25245
25246 case IX86_BUILTIN_STMXCSR:
25247 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25248 emit_insn (gen_sse_stmxcsr (target));
25249 return copy_to_mode_reg (SImode, target);
25250
25251 case IX86_BUILTIN_CLFLUSH:
25252 arg0 = CALL_EXPR_ARG (exp, 0);
25253 op0 = expand_normal (arg0);
25254 icode = CODE_FOR_sse2_clflush;
25255 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
25256 op0 = copy_to_mode_reg (Pmode, op0);
25257
25258 emit_insn (gen_sse2_clflush (op0));
25259 return 0;
25260
25261 case IX86_BUILTIN_MONITOR:
25262 arg0 = CALL_EXPR_ARG (exp, 0);
25263 arg1 = CALL_EXPR_ARG (exp, 1);
25264 arg2 = CALL_EXPR_ARG (exp, 2);
25265 op0 = expand_normal (arg0);
25266 op1 = expand_normal (arg1);
25267 op2 = expand_normal (arg2);
25268 if (!REG_P (op0))
25269 op0 = copy_to_mode_reg (Pmode, op0);
25270 if (!REG_P (op1))
25271 op1 = copy_to_mode_reg (SImode, op1);
25272 if (!REG_P (op2))
25273 op2 = copy_to_mode_reg (SImode, op2);
25274 emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
25275 return 0;
25276
25277 case IX86_BUILTIN_MWAIT:
25278 arg0 = CALL_EXPR_ARG (exp, 0);
25279 arg1 = CALL_EXPR_ARG (exp, 1);
25280 op0 = expand_normal (arg0);
25281 op1 = expand_normal (arg1);
25282 if (!REG_P (op0))
25283 op0 = copy_to_mode_reg (SImode, op0);
25284 if (!REG_P (op1))
25285 op1 = copy_to_mode_reg (SImode, op1);
25286 emit_insn (gen_sse3_mwait (op0, op1));
25287 return 0;
25288
25289 case IX86_BUILTIN_VEC_INIT_V2SI:
25290 case IX86_BUILTIN_VEC_INIT_V4HI:
25291 case IX86_BUILTIN_VEC_INIT_V8QI:
25292 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
25293
25294 case IX86_BUILTIN_VEC_EXT_V2DF:
25295 case IX86_BUILTIN_VEC_EXT_V2DI:
25296 case IX86_BUILTIN_VEC_EXT_V4SF:
25297 case IX86_BUILTIN_VEC_EXT_V4SI:
25298 case IX86_BUILTIN_VEC_EXT_V8HI:
25299 case IX86_BUILTIN_VEC_EXT_V2SI:
25300 case IX86_BUILTIN_VEC_EXT_V4HI:
25301 case IX86_BUILTIN_VEC_EXT_V16QI:
25302 return ix86_expand_vec_ext_builtin (exp, target);
25303
25304 case IX86_BUILTIN_VEC_SET_V2DI:
25305 case IX86_BUILTIN_VEC_SET_V4SF:
25306 case IX86_BUILTIN_VEC_SET_V4SI:
25307 case IX86_BUILTIN_VEC_SET_V8HI:
25308 case IX86_BUILTIN_VEC_SET_V4HI:
25309 case IX86_BUILTIN_VEC_SET_V16QI:
25310 return ix86_expand_vec_set_builtin (exp);
25311
25312 case IX86_BUILTIN_INFQ:
25313 case IX86_BUILTIN_HUGE_VALQ:
25314 {
25315 REAL_VALUE_TYPE inf;
25316 rtx tmp;
25317
25318 real_inf (&inf);
25319 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
25320
25321 tmp = validize_mem (force_const_mem (mode, tmp));
25322
25323 if (target == 0)
25324 target = gen_reg_rtx (mode);
25325
25326 emit_move_insn (target, tmp);
25327 return target;
25328 }
25329
25330 default:
25331 break;
25332 }
25333
25334 for (i = 0, d = bdesc_special_args;
25335 i < ARRAY_SIZE (bdesc_special_args);
25336 i++, d++)
25337 if (d->code == fcode)
25338 return ix86_expand_special_args_builtin (d, exp, target);
25339
25340 for (i = 0, d = bdesc_args;
25341 i < ARRAY_SIZE (bdesc_args);
25342 i++, d++)
25343 if (d->code == fcode)
25344 switch (fcode)
25345 {
25346 case IX86_BUILTIN_FABSQ:
25347 case IX86_BUILTIN_COPYSIGNQ:
25348 if (!TARGET_SSE2)
25349 /* Emit a normal call if SSE2 isn't available. */
25350 return expand_call (exp, target, ignore);
25351 default:
25352 return ix86_expand_args_builtin (d, exp, target);
25353 }
25354
25355 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25356 if (d->code == fcode)
25357 return ix86_expand_sse_comi (d, exp, target);
25358
25359 for (i = 0, d = bdesc_pcmpestr;
25360 i < ARRAY_SIZE (bdesc_pcmpestr);
25361 i++, d++)
25362 if (d->code == fcode)
25363 return ix86_expand_sse_pcmpestr (d, exp, target);
25364
25365 for (i = 0, d = bdesc_pcmpistr;
25366 i < ARRAY_SIZE (bdesc_pcmpistr);
25367 i++, d++)
25368 if (d->code == fcode)
25369 return ix86_expand_sse_pcmpistr (d, exp, target);
25370
25371 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25372 if (d->code == fcode)
25373 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
25374 (enum multi_arg_type)d->flag,
25375 d->comparison);
25376
25377 gcc_unreachable ();
25378 }
25379
25380 /* Returns a function decl for a vectorized version of the builtin function
25381 with builtin function code FN and the result vector type TYPE, or NULL_TREE
25382 if it is not available. */
25383
25384 static tree
25385 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
25386 tree type_in)
25387 {
25388 enum machine_mode in_mode, out_mode;
25389 int in_n, out_n;
25390
25391 if (TREE_CODE (type_out) != VECTOR_TYPE
25392 || TREE_CODE (type_in) != VECTOR_TYPE)
25393 return NULL_TREE;
25394
25395 out_mode = TYPE_MODE (TREE_TYPE (type_out));
25396 out_n = TYPE_VECTOR_SUBPARTS (type_out);
25397 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25398 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25399
25400 switch (fn)
25401 {
25402 case BUILT_IN_SQRT:
25403 if (out_mode == DFmode && out_n == 2
25404 && in_mode == DFmode && in_n == 2)
25405 return ix86_builtins[IX86_BUILTIN_SQRTPD];
25406 break;
25407
25408 case BUILT_IN_SQRTF:
25409 if (out_mode == SFmode && out_n == 4
25410 && in_mode == SFmode && in_n == 4)
25411 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
25412 break;
25413
25414 case BUILT_IN_LRINT:
25415 if (out_mode == SImode && out_n == 4
25416 && in_mode == DFmode && in_n == 2)
25417 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
25418 break;
25419
25420 case BUILT_IN_LRINTF:
25421 if (out_mode == SImode && out_n == 4
25422 && in_mode == SFmode && in_n == 4)
25423 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
25424 break;
25425
25426 default:
25427 ;
25428 }
25429
25430 /* Dispatch to a handler for a vectorization library. */
25431 if (ix86_veclib_handler)
25432 return (*ix86_veclib_handler) ((enum built_in_function) fn, type_out,
25433 type_in);
25434
25435 return NULL_TREE;
25436 }
25437
25438 /* Handler for an SVML-style interface to
25439 a library with vectorized intrinsics. */
25440
25441 static tree
25442 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
25443 {
25444 char name[20];
25445 tree fntype, new_fndecl, args;
25446 unsigned arity;
25447 const char *bname;
25448 enum machine_mode el_mode, in_mode;
25449 int n, in_n;
25450
25451 /* The SVML is suitable for unsafe math only. */
25452 if (!flag_unsafe_math_optimizations)
25453 return NULL_TREE;
25454
25455 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25456 n = TYPE_VECTOR_SUBPARTS (type_out);
25457 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25458 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25459 if (el_mode != in_mode
25460 || n != in_n)
25461 return NULL_TREE;
25462
25463 switch (fn)
25464 {
25465 case BUILT_IN_EXP:
25466 case BUILT_IN_LOG:
25467 case BUILT_IN_LOG10:
25468 case BUILT_IN_POW:
25469 case BUILT_IN_TANH:
25470 case BUILT_IN_TAN:
25471 case BUILT_IN_ATAN:
25472 case BUILT_IN_ATAN2:
25473 case BUILT_IN_ATANH:
25474 case BUILT_IN_CBRT:
25475 case BUILT_IN_SINH:
25476 case BUILT_IN_SIN:
25477 case BUILT_IN_ASINH:
25478 case BUILT_IN_ASIN:
25479 case BUILT_IN_COSH:
25480 case BUILT_IN_COS:
25481 case BUILT_IN_ACOSH:
25482 case BUILT_IN_ACOS:
25483 if (el_mode != DFmode || n != 2)
25484 return NULL_TREE;
25485 break;
25486
25487 case BUILT_IN_EXPF:
25488 case BUILT_IN_LOGF:
25489 case BUILT_IN_LOG10F:
25490 case BUILT_IN_POWF:
25491 case BUILT_IN_TANHF:
25492 case BUILT_IN_TANF:
25493 case BUILT_IN_ATANF:
25494 case BUILT_IN_ATAN2F:
25495 case BUILT_IN_ATANHF:
25496 case BUILT_IN_CBRTF:
25497 case BUILT_IN_SINHF:
25498 case BUILT_IN_SINF:
25499 case BUILT_IN_ASINHF:
25500 case BUILT_IN_ASINF:
25501 case BUILT_IN_COSHF:
25502 case BUILT_IN_COSF:
25503 case BUILT_IN_ACOSHF:
25504 case BUILT_IN_ACOSF:
25505 if (el_mode != SFmode || n != 4)
25506 return NULL_TREE;
25507 break;
25508
25509 default:
25510 return NULL_TREE;
25511 }
25512
25513 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25514
25515 if (fn == BUILT_IN_LOGF)
25516 strcpy (name, "vmlsLn4");
25517 else if (fn == BUILT_IN_LOG)
25518 strcpy (name, "vmldLn2");
25519 else if (n == 4)
25520 {
25521 sprintf (name, "vmls%s", bname+10);
25522 name[strlen (name)-1] = '4';
25523 }
25524 else
25525 sprintf (name, "vmld%s2", bname+10);
25526
25527 /* Convert to uppercase. */
25528 name[4] &= ~0x20;
25529
25530 arity = 0;
25531 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25532 args = TREE_CHAIN (args))
25533 arity++;
25534
25535 if (arity == 1)
25536 fntype = build_function_type_list (type_out, type_in, NULL);
25537 else
25538 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25539
25540 /* Build a function declaration for the vectorized function. */
25541 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
25542 TREE_PUBLIC (new_fndecl) = 1;
25543 DECL_EXTERNAL (new_fndecl) = 1;
25544 DECL_IS_NOVOPS (new_fndecl) = 1;
25545 TREE_READONLY (new_fndecl) = 1;
25546
25547 return new_fndecl;
25548 }
25549
25550 /* Handler for an ACML-style interface to
25551 a library with vectorized intrinsics. */
25552
25553 static tree
25554 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
25555 {
25556 char name[20] = "__vr.._";
25557 tree fntype, new_fndecl, args;
25558 unsigned arity;
25559 const char *bname;
25560 enum machine_mode el_mode, in_mode;
25561 int n, in_n;
25562
25563 /* The ACML is 64bits only and suitable for unsafe math only as
25564 it does not correctly support parts of IEEE with the required
25565 precision such as denormals. */
25566 if (!TARGET_64BIT
25567 || !flag_unsafe_math_optimizations)
25568 return NULL_TREE;
25569
25570 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25571 n = TYPE_VECTOR_SUBPARTS (type_out);
25572 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25573 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25574 if (el_mode != in_mode
25575 || n != in_n)
25576 return NULL_TREE;
25577
25578 switch (fn)
25579 {
25580 case BUILT_IN_SIN:
25581 case BUILT_IN_COS:
25582 case BUILT_IN_EXP:
25583 case BUILT_IN_LOG:
25584 case BUILT_IN_LOG2:
25585 case BUILT_IN_LOG10:
25586 name[4] = 'd';
25587 name[5] = '2';
25588 if (el_mode != DFmode
25589 || n != 2)
25590 return NULL_TREE;
25591 break;
25592
25593 case BUILT_IN_SINF:
25594 case BUILT_IN_COSF:
25595 case BUILT_IN_EXPF:
25596 case BUILT_IN_POWF:
25597 case BUILT_IN_LOGF:
25598 case BUILT_IN_LOG2F:
25599 case BUILT_IN_LOG10F:
25600 name[4] = 's';
25601 name[5] = '4';
25602 if (el_mode != SFmode
25603 || n != 4)
25604 return NULL_TREE;
25605 break;
25606
25607 default:
25608 return NULL_TREE;
25609 }
25610
25611 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25612 sprintf (name + 7, "%s", bname+10);
25613
25614 arity = 0;
25615 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25616 args = TREE_CHAIN (args))
25617 arity++;
25618
25619 if (arity == 1)
25620 fntype = build_function_type_list (type_out, type_in, NULL);
25621 else
25622 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25623
25624 /* Build a function declaration for the vectorized function. */
25625 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
25626 TREE_PUBLIC (new_fndecl) = 1;
25627 DECL_EXTERNAL (new_fndecl) = 1;
25628 DECL_IS_NOVOPS (new_fndecl) = 1;
25629 TREE_READONLY (new_fndecl) = 1;
25630
25631 return new_fndecl;
25632 }
25633
25634
25635 /* Returns a decl of a function that implements conversion of an integer vector
25636 into a floating-point vector, or vice-versa. TYPE is the type of the integer
25637 side of the conversion.
25638 Return NULL_TREE if it is not available. */
25639
25640 static tree
25641 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
25642 {
25643 if (TREE_CODE (type) != VECTOR_TYPE
25644 /* There are only conversions from/to signed integers. */
25645 || TYPE_UNSIGNED (TREE_TYPE (type)))
25646 return NULL_TREE;
25647
25648 switch (code)
25649 {
25650 case FLOAT_EXPR:
25651 switch (TYPE_MODE (type))
25652 {
25653 case V4SImode:
25654 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
25655 default:
25656 return NULL_TREE;
25657 }
25658
25659 case FIX_TRUNC_EXPR:
25660 switch (TYPE_MODE (type))
25661 {
25662 case V4SImode:
25663 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
25664 default:
25665 return NULL_TREE;
25666 }
25667 default:
25668 return NULL_TREE;
25669
25670 }
25671 }
25672
25673 /* Returns a code for a target-specific builtin that implements
25674 reciprocal of the function, or NULL_TREE if not available. */
25675
25676 static tree
25677 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
25678 bool sqrt ATTRIBUTE_UNUSED)
25679 {
25680 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
25681 && flag_finite_math_only && !flag_trapping_math
25682 && flag_unsafe_math_optimizations))
25683 return NULL_TREE;
25684
25685 if (md_fn)
25686 /* Machine dependent builtins. */
25687 switch (fn)
25688 {
25689 /* Vectorized version of sqrt to rsqrt conversion. */
25690 case IX86_BUILTIN_SQRTPS_NR:
25691 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
25692
25693 default:
25694 return NULL_TREE;
25695 }
25696 else
25697 /* Normal builtins. */
25698 switch (fn)
25699 {
25700 /* Sqrt to rsqrt conversion. */
25701 case BUILT_IN_SQRTF:
25702 return ix86_builtins[IX86_BUILTIN_RSQRTF];
25703
25704 default:
25705 return NULL_TREE;
25706 }
25707 }
25708
25709 /* Store OPERAND to the memory after reload is completed. This means
25710 that we can't easily use assign_stack_local. */
25711 rtx
25712 ix86_force_to_memory (enum machine_mode mode, rtx operand)
25713 {
25714 rtx result;
25715
25716 gcc_assert (reload_completed);
25717 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
25718 {
25719 result = gen_rtx_MEM (mode,
25720 gen_rtx_PLUS (Pmode,
25721 stack_pointer_rtx,
25722 GEN_INT (-RED_ZONE_SIZE)));
25723 emit_move_insn (result, operand);
25724 }
25725 else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
25726 {
25727 switch (mode)
25728 {
25729 case HImode:
25730 case SImode:
25731 operand = gen_lowpart (DImode, operand);
25732 /* FALLTHRU */
25733 case DImode:
25734 emit_insn (
25735 gen_rtx_SET (VOIDmode,
25736 gen_rtx_MEM (DImode,
25737 gen_rtx_PRE_DEC (DImode,
25738 stack_pointer_rtx)),
25739 operand));
25740 break;
25741 default:
25742 gcc_unreachable ();
25743 }
25744 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25745 }
25746 else
25747 {
25748 switch (mode)
25749 {
25750 case DImode:
25751 {
25752 rtx operands[2];
25753 split_di (&operand, 1, operands, operands + 1);
25754 emit_insn (
25755 gen_rtx_SET (VOIDmode,
25756 gen_rtx_MEM (SImode,
25757 gen_rtx_PRE_DEC (Pmode,
25758 stack_pointer_rtx)),
25759 operands[1]));
25760 emit_insn (
25761 gen_rtx_SET (VOIDmode,
25762 gen_rtx_MEM (SImode,
25763 gen_rtx_PRE_DEC (Pmode,
25764 stack_pointer_rtx)),
25765 operands[0]));
25766 }
25767 break;
25768 case HImode:
25769 /* Store HImodes as SImodes. */
25770 operand = gen_lowpart (SImode, operand);
25771 /* FALLTHRU */
25772 case SImode:
25773 emit_insn (
25774 gen_rtx_SET (VOIDmode,
25775 gen_rtx_MEM (GET_MODE (operand),
25776 gen_rtx_PRE_DEC (SImode,
25777 stack_pointer_rtx)),
25778 operand));
25779 break;
25780 default:
25781 gcc_unreachable ();
25782 }
25783 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25784 }
25785 return result;
25786 }
25787
25788 /* Free operand from the memory. */
25789 void
25790 ix86_free_from_memory (enum machine_mode mode)
25791 {
25792 if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
25793 {
25794 int size;
25795
25796 if (mode == DImode || TARGET_64BIT)
25797 size = 8;
25798 else
25799 size = 4;
25800 /* Use LEA to deallocate stack space. In peephole2 it will be converted
25801 to pop or add instruction if registers are available. */
25802 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
25803 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
25804 GEN_INT (size))));
25805 }
25806 }
25807
25808 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
25809 QImode must go into class Q_REGS.
25810 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
25811 movdf to do mem-to-mem moves through integer regs. */
25812 enum reg_class
25813 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
25814 {
25815 enum machine_mode mode = GET_MODE (x);
25816
25817 /* We're only allowed to return a subclass of CLASS. Many of the
25818 following checks fail for NO_REGS, so eliminate that early. */
25819 if (regclass == NO_REGS)
25820 return NO_REGS;
25821
25822 /* All classes can load zeros. */
25823 if (x == CONST0_RTX (mode))
25824 return regclass;
25825
25826 /* Force constants into memory if we are loading a (nonzero) constant into
25827 an MMX or SSE register. This is because there are no MMX/SSE instructions
25828 to load from a constant. */
25829 if (CONSTANT_P (x)
25830 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
25831 return NO_REGS;
25832
25833 /* Prefer SSE regs only, if we can use them for math. */
25834 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
25835 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
25836
25837 /* Floating-point constants need more complex checks. */
25838 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
25839 {
25840 /* General regs can load everything. */
25841 if (reg_class_subset_p (regclass, GENERAL_REGS))
25842 return regclass;
25843
25844 /* Floats can load 0 and 1 plus some others. Note that we eliminated
25845 zero above. We only want to wind up preferring 80387 registers if
25846 we plan on doing computation with them. */
25847 if (TARGET_80387
25848 && standard_80387_constant_p (x))
25849 {
25850 /* Limit class to non-sse. */
25851 if (regclass == FLOAT_SSE_REGS)
25852 return FLOAT_REGS;
25853 if (regclass == FP_TOP_SSE_REGS)
25854 return FP_TOP_REG;
25855 if (regclass == FP_SECOND_SSE_REGS)
25856 return FP_SECOND_REG;
25857 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
25858 return regclass;
25859 }
25860
25861 return NO_REGS;
25862 }
25863
25864 /* Generally when we see PLUS here, it's the function invariant
25865 (plus soft-fp const_int). Which can only be computed into general
25866 regs. */
25867 if (GET_CODE (x) == PLUS)
25868 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
25869
25870 /* QImode constants are easy to load, but non-constant QImode data
25871 must go into Q_REGS. */
25872 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
25873 {
25874 if (reg_class_subset_p (regclass, Q_REGS))
25875 return regclass;
25876 if (reg_class_subset_p (Q_REGS, regclass))
25877 return Q_REGS;
25878 return NO_REGS;
25879 }
25880
25881 return regclass;
25882 }
25883
25884 /* Discourage putting floating-point values in SSE registers unless
25885 SSE math is being used, and likewise for the 387 registers. */
25886 enum reg_class
25887 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
25888 {
25889 enum machine_mode mode = GET_MODE (x);
25890
25891 /* Restrict the output reload class to the register bank that we are doing
25892 math on. If we would like not to return a subset of CLASS, reject this
25893 alternative: if reload cannot do this, it will still use its choice. */
25894 mode = GET_MODE (x);
25895 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
25896 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
25897
25898 if (X87_FLOAT_MODE_P (mode))
25899 {
25900 if (regclass == FP_TOP_SSE_REGS)
25901 return FP_TOP_REG;
25902 else if (regclass == FP_SECOND_SSE_REGS)
25903 return FP_SECOND_REG;
25904 else
25905 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
25906 }
25907
25908 return regclass;
25909 }
25910
25911 static enum reg_class
25912 ix86_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
25913 enum machine_mode mode,
25914 secondary_reload_info *sri ATTRIBUTE_UNUSED)
25915 {
25916 /* QImode spills from non-QI registers require
25917 intermediate register on 32bit targets. */
25918 if (!in_p && mode == QImode && !TARGET_64BIT
25919 && (rclass == GENERAL_REGS
25920 || rclass == LEGACY_REGS
25921 || rclass == INDEX_REGS))
25922 {
25923 int regno;
25924
25925 if (REG_P (x))
25926 regno = REGNO (x);
25927 else
25928 regno = -1;
25929
25930 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
25931 regno = true_regnum (x);
25932
25933 /* Return Q_REGS if the operand is in memory. */
25934 if (regno == -1)
25935 return Q_REGS;
25936 }
25937
25938 return NO_REGS;
25939 }
25940
25941 /* If we are copying between general and FP registers, we need a memory
25942 location. The same is true for SSE and MMX registers.
25943
25944 To optimize register_move_cost performance, allow inline variant.
25945
25946 The macro can't work reliably when one of the CLASSES is class containing
25947 registers from multiple units (SSE, MMX, integer). We avoid this by never
25948 combining those units in single alternative in the machine description.
25949 Ensure that this constraint holds to avoid unexpected surprises.
25950
25951 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
25952 enforce these sanity checks. */
25953
25954 static inline int
25955 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
25956 enum machine_mode mode, int strict)
25957 {
25958 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
25959 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
25960 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
25961 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
25962 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
25963 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
25964 {
25965 gcc_assert (!strict);
25966 return true;
25967 }
25968
25969 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
25970 return true;
25971
25972 /* ??? This is a lie. We do have moves between mmx/general, and for
25973 mmx/sse2. But by saying we need secondary memory we discourage the
25974 register allocator from using the mmx registers unless needed. */
25975 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
25976 return true;
25977
25978 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
25979 {
25980 /* SSE1 doesn't have any direct moves from other classes. */
25981 if (!TARGET_SSE2)
25982 return true;
25983
25984 /* If the target says that inter-unit moves are more expensive
25985 than moving through memory, then don't generate them. */
25986 if (!TARGET_INTER_UNIT_MOVES)
25987 return true;
25988
25989 /* Between SSE and general, we have moves no larger than word size. */
25990 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
25991 return true;
25992 }
25993
25994 return false;
25995 }
25996
25997 int
25998 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
25999 enum machine_mode mode, int strict)
26000 {
26001 return inline_secondary_memory_needed (class1, class2, mode, strict);
26002 }
26003
26004 /* Return true if the registers in CLASS cannot represent the change from
26005 modes FROM to TO. */
26006
26007 bool
26008 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
26009 enum reg_class regclass)
26010 {
26011 if (from == to)
26012 return false;
26013
26014 /* x87 registers can't do subreg at all, as all values are reformatted
26015 to extended precision. */
26016 if (MAYBE_FLOAT_CLASS_P (regclass))
26017 return true;
26018
26019 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
26020 {
26021 /* Vector registers do not support QI or HImode loads. If we don't
26022 disallow a change to these modes, reload will assume it's ok to
26023 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
26024 the vec_dupv4hi pattern. */
26025 if (GET_MODE_SIZE (from) < 4)
26026 return true;
26027
26028 /* Vector registers do not support subreg with nonzero offsets, which
26029 are otherwise valid for integer registers. Since we can't see
26030 whether we have a nonzero offset from here, prohibit all
26031 nonparadoxical subregs changing size. */
26032 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
26033 return true;
26034 }
26035
26036 return false;
26037 }
26038
26039 /* Return the cost of moving data of mode M between a
26040 register and memory. A value of 2 is the default; this cost is
26041 relative to those in `REGISTER_MOVE_COST'.
26042
26043 This function is used extensively by register_move_cost that is used to
26044 build tables at startup. Make it inline in this case.
26045 When IN is 2, return maximum of in and out move cost.
26046
26047 If moving between registers and memory is more expensive than
26048 between two registers, you should define this macro to express the
26049 relative cost.
26050
26051 Model also increased moving costs of QImode registers in non
26052 Q_REGS classes.
26053 */
26054 static inline int
26055 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
26056 int in)
26057 {
26058 int cost;
26059 if (FLOAT_CLASS_P (regclass))
26060 {
26061 int index;
26062 switch (mode)
26063 {
26064 case SFmode:
26065 index = 0;
26066 break;
26067 case DFmode:
26068 index = 1;
26069 break;
26070 case XFmode:
26071 index = 2;
26072 break;
26073 default:
26074 return 100;
26075 }
26076 if (in == 2)
26077 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
26078 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
26079 }
26080 if (SSE_CLASS_P (regclass))
26081 {
26082 int index;
26083 switch (GET_MODE_SIZE (mode))
26084 {
26085 case 4:
26086 index = 0;
26087 break;
26088 case 8:
26089 index = 1;
26090 break;
26091 case 16:
26092 index = 2;
26093 break;
26094 default:
26095 return 100;
26096 }
26097 if (in == 2)
26098 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
26099 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
26100 }
26101 if (MMX_CLASS_P (regclass))
26102 {
26103 int index;
26104 switch (GET_MODE_SIZE (mode))
26105 {
26106 case 4:
26107 index = 0;
26108 break;
26109 case 8:
26110 index = 1;
26111 break;
26112 default:
26113 return 100;
26114 }
26115 if (in)
26116 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
26117 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
26118 }
26119 switch (GET_MODE_SIZE (mode))
26120 {
26121 case 1:
26122 if (Q_CLASS_P (regclass) || TARGET_64BIT)
26123 {
26124 if (!in)
26125 return ix86_cost->int_store[0];
26126 if (TARGET_PARTIAL_REG_DEPENDENCY
26127 && optimize_function_for_speed_p (cfun))
26128 cost = ix86_cost->movzbl_load;
26129 else
26130 cost = ix86_cost->int_load[0];
26131 if (in == 2)
26132 return MAX (cost, ix86_cost->int_store[0]);
26133 return cost;
26134 }
26135 else
26136 {
26137 if (in == 2)
26138 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
26139 if (in)
26140 return ix86_cost->movzbl_load;
26141 else
26142 return ix86_cost->int_store[0] + 4;
26143 }
26144 break;
26145 case 2:
26146 if (in == 2)
26147 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
26148 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
26149 default:
26150 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
26151 if (mode == TFmode)
26152 mode = XFmode;
26153 if (in == 2)
26154 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
26155 else if (in)
26156 cost = ix86_cost->int_load[2];
26157 else
26158 cost = ix86_cost->int_store[2];
26159 return (cost * (((int) GET_MODE_SIZE (mode)
26160 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
26161 }
26162 }
26163
26164 int
26165 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
26166 {
26167 return inline_memory_move_cost (mode, regclass, in);
26168 }
26169
26170
26171 /* Return the cost of moving data from a register in class CLASS1 to
26172 one in class CLASS2.
26173
26174 It is not required that the cost always equal 2 when FROM is the same as TO;
26175 on some machines it is expensive to move between registers if they are not
26176 general registers. */
26177
26178 int
26179 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
26180 enum reg_class class2)
26181 {
26182 /* In case we require secondary memory, compute cost of the store followed
26183 by load. In order to avoid bad register allocation choices, we need
26184 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
26185
26186 if (inline_secondary_memory_needed (class1, class2, mode, 0))
26187 {
26188 int cost = 1;
26189
26190 cost += inline_memory_move_cost (mode, class1, 2);
26191 cost += inline_memory_move_cost (mode, class2, 2);
26192
26193 /* In case of copying from general_purpose_register we may emit multiple
26194 stores followed by single load causing memory size mismatch stall.
26195 Count this as arbitrarily high cost of 20. */
26196 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
26197 cost += 20;
26198
26199 /* In the case of FP/MMX moves, the registers actually overlap, and we
26200 have to switch modes in order to treat them differently. */
26201 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
26202 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
26203 cost += 20;
26204
26205 return cost;
26206 }
26207
26208 /* Moves between SSE/MMX and integer unit are expensive. */
26209 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
26210 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26211
26212 /* ??? By keeping returned value relatively high, we limit the number
26213 of moves between integer and MMX/SSE registers for all targets.
26214 Additionally, high value prevents problem with x86_modes_tieable_p(),
26215 where integer modes in MMX/SSE registers are not tieable
26216 because of missing QImode and HImode moves to, from or between
26217 MMX/SSE registers. */
26218 return MAX (8, ix86_cost->mmxsse_to_integer);
26219
26220 if (MAYBE_FLOAT_CLASS_P (class1))
26221 return ix86_cost->fp_move;
26222 if (MAYBE_SSE_CLASS_P (class1))
26223 return ix86_cost->sse_move;
26224 if (MAYBE_MMX_CLASS_P (class1))
26225 return ix86_cost->mmx_move;
26226 return 2;
26227 }
26228
26229 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
26230
26231 bool
26232 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
26233 {
26234 /* Flags and only flags can only hold CCmode values. */
26235 if (CC_REGNO_P (regno))
26236 return GET_MODE_CLASS (mode) == MODE_CC;
26237 if (GET_MODE_CLASS (mode) == MODE_CC
26238 || GET_MODE_CLASS (mode) == MODE_RANDOM
26239 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
26240 return 0;
26241 if (FP_REGNO_P (regno))
26242 return VALID_FP_MODE_P (mode);
26243 if (SSE_REGNO_P (regno))
26244 {
26245 /* We implement the move patterns for all vector modes into and
26246 out of SSE registers, even when no operation instructions
26247 are available. OImode move is available only when AVX is
26248 enabled. */
26249 return ((TARGET_AVX && mode == OImode)
26250 || VALID_AVX256_REG_MODE (mode)
26251 || VALID_SSE_REG_MODE (mode)
26252 || VALID_SSE2_REG_MODE (mode)
26253 || VALID_MMX_REG_MODE (mode)
26254 || VALID_MMX_REG_MODE_3DNOW (mode));
26255 }
26256 if (MMX_REGNO_P (regno))
26257 {
26258 /* We implement the move patterns for 3DNOW modes even in MMX mode,
26259 so if the register is available at all, then we can move data of
26260 the given mode into or out of it. */
26261 return (VALID_MMX_REG_MODE (mode)
26262 || VALID_MMX_REG_MODE_3DNOW (mode));
26263 }
26264
26265 if (mode == QImode)
26266 {
26267 /* Take care for QImode values - they can be in non-QI regs,
26268 but then they do cause partial register stalls. */
26269 if (regno <= BX_REG || TARGET_64BIT)
26270 return 1;
26271 if (!TARGET_PARTIAL_REG_STALL)
26272 return 1;
26273 return reload_in_progress || reload_completed;
26274 }
26275 /* We handle both integer and floats in the general purpose registers. */
26276 else if (VALID_INT_MODE_P (mode))
26277 return 1;
26278 else if (VALID_FP_MODE_P (mode))
26279 return 1;
26280 else if (VALID_DFP_MODE_P (mode))
26281 return 1;
26282 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
26283 on to use that value in smaller contexts, this can easily force a
26284 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
26285 supporting DImode, allow it. */
26286 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
26287 return 1;
26288
26289 return 0;
26290 }
26291
26292 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
26293 tieable integer mode. */
26294
26295 static bool
26296 ix86_tieable_integer_mode_p (enum machine_mode mode)
26297 {
26298 switch (mode)
26299 {
26300 case HImode:
26301 case SImode:
26302 return true;
26303
26304 case QImode:
26305 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
26306
26307 case DImode:
26308 return TARGET_64BIT;
26309
26310 default:
26311 return false;
26312 }
26313 }
26314
26315 /* Return true if MODE1 is accessible in a register that can hold MODE2
26316 without copying. That is, all register classes that can hold MODE2
26317 can also hold MODE1. */
26318
26319 bool
26320 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
26321 {
26322 if (mode1 == mode2)
26323 return true;
26324
26325 if (ix86_tieable_integer_mode_p (mode1)
26326 && ix86_tieable_integer_mode_p (mode2))
26327 return true;
26328
26329 /* MODE2 being XFmode implies fp stack or general regs, which means we
26330 can tie any smaller floating point modes to it. Note that we do not
26331 tie this with TFmode. */
26332 if (mode2 == XFmode)
26333 return mode1 == SFmode || mode1 == DFmode;
26334
26335 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
26336 that we can tie it with SFmode. */
26337 if (mode2 == DFmode)
26338 return mode1 == SFmode;
26339
26340 /* If MODE2 is only appropriate for an SSE register, then tie with
26341 any other mode acceptable to SSE registers. */
26342 if (GET_MODE_SIZE (mode2) == 16
26343 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
26344 return (GET_MODE_SIZE (mode1) == 16
26345 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
26346
26347 /* If MODE2 is appropriate for an MMX register, then tie
26348 with any other mode acceptable to MMX registers. */
26349 if (GET_MODE_SIZE (mode2) == 8
26350 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
26351 return (GET_MODE_SIZE (mode1) == 8
26352 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
26353
26354 return false;
26355 }
26356
26357 /* Compute a (partial) cost for rtx X. Return true if the complete
26358 cost has been computed, and false if subexpressions should be
26359 scanned. In either case, *TOTAL contains the cost result. */
26360
26361 static bool
26362 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
26363 {
26364 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
26365 enum machine_mode mode = GET_MODE (x);
26366 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
26367
26368 switch (code)
26369 {
26370 case CONST_INT:
26371 case CONST:
26372 case LABEL_REF:
26373 case SYMBOL_REF:
26374 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
26375 *total = 3;
26376 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
26377 *total = 2;
26378 else if (flag_pic && SYMBOLIC_CONST (x)
26379 && (!TARGET_64BIT
26380 || (!GET_CODE (x) != LABEL_REF
26381 && (GET_CODE (x) != SYMBOL_REF
26382 || !SYMBOL_REF_LOCAL_P (x)))))
26383 *total = 1;
26384 else
26385 *total = 0;
26386 return true;
26387
26388 case CONST_DOUBLE:
26389 if (mode == VOIDmode)
26390 *total = 0;
26391 else
26392 switch (standard_80387_constant_p (x))
26393 {
26394 case 1: /* 0.0 */
26395 *total = 1;
26396 break;
26397 default: /* Other constants */
26398 *total = 2;
26399 break;
26400 case 0:
26401 case -1:
26402 /* Start with (MEM (SYMBOL_REF)), since that's where
26403 it'll probably end up. Add a penalty for size. */
26404 *total = (COSTS_N_INSNS (1)
26405 + (flag_pic != 0 && !TARGET_64BIT)
26406 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
26407 break;
26408 }
26409 return true;
26410
26411 case ZERO_EXTEND:
26412 /* The zero extensions is often completely free on x86_64, so make
26413 it as cheap as possible. */
26414 if (TARGET_64BIT && mode == DImode
26415 && GET_MODE (XEXP (x, 0)) == SImode)
26416 *total = 1;
26417 else if (TARGET_ZERO_EXTEND_WITH_AND)
26418 *total = cost->add;
26419 else
26420 *total = cost->movzx;
26421 return false;
26422
26423 case SIGN_EXTEND:
26424 *total = cost->movsx;
26425 return false;
26426
26427 case ASHIFT:
26428 if (CONST_INT_P (XEXP (x, 1))
26429 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
26430 {
26431 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26432 if (value == 1)
26433 {
26434 *total = cost->add;
26435 return false;
26436 }
26437 if ((value == 2 || value == 3)
26438 && cost->lea <= cost->shift_const)
26439 {
26440 *total = cost->lea;
26441 return false;
26442 }
26443 }
26444 /* FALLTHRU */
26445
26446 case ROTATE:
26447 case ASHIFTRT:
26448 case LSHIFTRT:
26449 case ROTATERT:
26450 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
26451 {
26452 if (CONST_INT_P (XEXP (x, 1)))
26453 {
26454 if (INTVAL (XEXP (x, 1)) > 32)
26455 *total = cost->shift_const + COSTS_N_INSNS (2);
26456 else
26457 *total = cost->shift_const * 2;
26458 }
26459 else
26460 {
26461 if (GET_CODE (XEXP (x, 1)) == AND)
26462 *total = cost->shift_var * 2;
26463 else
26464 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
26465 }
26466 }
26467 else
26468 {
26469 if (CONST_INT_P (XEXP (x, 1)))
26470 *total = cost->shift_const;
26471 else
26472 *total = cost->shift_var;
26473 }
26474 return false;
26475
26476 case MULT:
26477 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26478 {
26479 /* ??? SSE scalar cost should be used here. */
26480 *total = cost->fmul;
26481 return false;
26482 }
26483 else if (X87_FLOAT_MODE_P (mode))
26484 {
26485 *total = cost->fmul;
26486 return false;
26487 }
26488 else if (FLOAT_MODE_P (mode))
26489 {
26490 /* ??? SSE vector cost should be used here. */
26491 *total = cost->fmul;
26492 return false;
26493 }
26494 else
26495 {
26496 rtx op0 = XEXP (x, 0);
26497 rtx op1 = XEXP (x, 1);
26498 int nbits;
26499 if (CONST_INT_P (XEXP (x, 1)))
26500 {
26501 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26502 for (nbits = 0; value != 0; value &= value - 1)
26503 nbits++;
26504 }
26505 else
26506 /* This is arbitrary. */
26507 nbits = 7;
26508
26509 /* Compute costs correctly for widening multiplication. */
26510 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
26511 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
26512 == GET_MODE_SIZE (mode))
26513 {
26514 int is_mulwiden = 0;
26515 enum machine_mode inner_mode = GET_MODE (op0);
26516
26517 if (GET_CODE (op0) == GET_CODE (op1))
26518 is_mulwiden = 1, op1 = XEXP (op1, 0);
26519 else if (CONST_INT_P (op1))
26520 {
26521 if (GET_CODE (op0) == SIGN_EXTEND)
26522 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
26523 == INTVAL (op1);
26524 else
26525 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
26526 }
26527
26528 if (is_mulwiden)
26529 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
26530 }
26531
26532 *total = (cost->mult_init[MODE_INDEX (mode)]
26533 + nbits * cost->mult_bit
26534 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
26535
26536 return true;
26537 }
26538
26539 case DIV:
26540 case UDIV:
26541 case MOD:
26542 case UMOD:
26543 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26544 /* ??? SSE cost should be used here. */
26545 *total = cost->fdiv;
26546 else if (X87_FLOAT_MODE_P (mode))
26547 *total = cost->fdiv;
26548 else if (FLOAT_MODE_P (mode))
26549 /* ??? SSE vector cost should be used here. */
26550 *total = cost->fdiv;
26551 else
26552 *total = cost->divide[MODE_INDEX (mode)];
26553 return false;
26554
26555 case PLUS:
26556 if (GET_MODE_CLASS (mode) == MODE_INT
26557 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
26558 {
26559 if (GET_CODE (XEXP (x, 0)) == PLUS
26560 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
26561 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
26562 && CONSTANT_P (XEXP (x, 1)))
26563 {
26564 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
26565 if (val == 2 || val == 4 || val == 8)
26566 {
26567 *total = cost->lea;
26568 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26569 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
26570 outer_code, speed);
26571 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26572 return true;
26573 }
26574 }
26575 else if (GET_CODE (XEXP (x, 0)) == MULT
26576 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
26577 {
26578 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
26579 if (val == 2 || val == 4 || val == 8)
26580 {
26581 *total = cost->lea;
26582 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26583 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26584 return true;
26585 }
26586 }
26587 else if (GET_CODE (XEXP (x, 0)) == PLUS)
26588 {
26589 *total = cost->lea;
26590 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26591 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26592 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26593 return true;
26594 }
26595 }
26596 /* FALLTHRU */
26597
26598 case MINUS:
26599 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26600 {
26601 /* ??? SSE cost should be used here. */
26602 *total = cost->fadd;
26603 return false;
26604 }
26605 else if (X87_FLOAT_MODE_P (mode))
26606 {
26607 *total = cost->fadd;
26608 return false;
26609 }
26610 else if (FLOAT_MODE_P (mode))
26611 {
26612 /* ??? SSE vector cost should be used here. */
26613 *total = cost->fadd;
26614 return false;
26615 }
26616 /* FALLTHRU */
26617
26618 case AND:
26619 case IOR:
26620 case XOR:
26621 if (!TARGET_64BIT && mode == DImode)
26622 {
26623 *total = (cost->add * 2
26624 + (rtx_cost (XEXP (x, 0), outer_code, speed)
26625 << (GET_MODE (XEXP (x, 0)) != DImode))
26626 + (rtx_cost (XEXP (x, 1), outer_code, speed)
26627 << (GET_MODE (XEXP (x, 1)) != DImode)));
26628 return true;
26629 }
26630 /* FALLTHRU */
26631
26632 case NEG:
26633 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26634 {
26635 /* ??? SSE cost should be used here. */
26636 *total = cost->fchs;
26637 return false;
26638 }
26639 else if (X87_FLOAT_MODE_P (mode))
26640 {
26641 *total = cost->fchs;
26642 return false;
26643 }
26644 else if (FLOAT_MODE_P (mode))
26645 {
26646 /* ??? SSE vector cost should be used here. */
26647 *total = cost->fchs;
26648 return false;
26649 }
26650 /* FALLTHRU */
26651
26652 case NOT:
26653 if (!TARGET_64BIT && mode == DImode)
26654 *total = cost->add * 2;
26655 else
26656 *total = cost->add;
26657 return false;
26658
26659 case COMPARE:
26660 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
26661 && XEXP (XEXP (x, 0), 1) == const1_rtx
26662 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
26663 && XEXP (x, 1) == const0_rtx)
26664 {
26665 /* This kind of construct is implemented using test[bwl].
26666 Treat it as if we had an AND. */
26667 *total = (cost->add
26668 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
26669 + rtx_cost (const1_rtx, outer_code, speed));
26670 return true;
26671 }
26672 return false;
26673
26674 case FLOAT_EXTEND:
26675 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
26676 *total = 0;
26677 return false;
26678
26679 case ABS:
26680 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26681 /* ??? SSE cost should be used here. */
26682 *total = cost->fabs;
26683 else if (X87_FLOAT_MODE_P (mode))
26684 *total = cost->fabs;
26685 else if (FLOAT_MODE_P (mode))
26686 /* ??? SSE vector cost should be used here. */
26687 *total = cost->fabs;
26688 return false;
26689
26690 case SQRT:
26691 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26692 /* ??? SSE cost should be used here. */
26693 *total = cost->fsqrt;
26694 else if (X87_FLOAT_MODE_P (mode))
26695 *total = cost->fsqrt;
26696 else if (FLOAT_MODE_P (mode))
26697 /* ??? SSE vector cost should be used here. */
26698 *total = cost->fsqrt;
26699 return false;
26700
26701 case UNSPEC:
26702 if (XINT (x, 1) == UNSPEC_TP)
26703 *total = 0;
26704 return false;
26705
26706 default:
26707 return false;
26708 }
26709 }
26710
26711 #if TARGET_MACHO
26712
26713 static int current_machopic_label_num;
26714
26715 /* Given a symbol name and its associated stub, write out the
26716 definition of the stub. */
26717
26718 void
26719 machopic_output_stub (FILE *file, const char *symb, const char *stub)
26720 {
26721 unsigned int length;
26722 char *binder_name, *symbol_name, lazy_ptr_name[32];
26723 int label = ++current_machopic_label_num;
26724
26725 /* For 64-bit we shouldn't get here. */
26726 gcc_assert (!TARGET_64BIT);
26727
26728 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
26729 symb = (*targetm.strip_name_encoding) (symb);
26730
26731 length = strlen (stub);
26732 binder_name = XALLOCAVEC (char, length + 32);
26733 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
26734
26735 length = strlen (symb);
26736 symbol_name = XALLOCAVEC (char, length + 32);
26737 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
26738
26739 sprintf (lazy_ptr_name, "L%d$lz", label);
26740
26741 if (MACHOPIC_PURE)
26742 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
26743 else
26744 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
26745
26746 fprintf (file, "%s:\n", stub);
26747 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26748
26749 if (MACHOPIC_PURE)
26750 {
26751 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
26752 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
26753 fprintf (file, "\tjmp\t*%%edx\n");
26754 }
26755 else
26756 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
26757
26758 fprintf (file, "%s:\n", binder_name);
26759
26760 if (MACHOPIC_PURE)
26761 {
26762 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
26763 fprintf (file, "\tpushl\t%%eax\n");
26764 }
26765 else
26766 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
26767
26768 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
26769
26770 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
26771 fprintf (file, "%s:\n", lazy_ptr_name);
26772 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26773 fprintf (file, "\t.long %s\n", binder_name);
26774 }
26775
26776 void
26777 darwin_x86_file_end (void)
26778 {
26779 darwin_file_end ();
26780 ix86_file_end ();
26781 }
26782 #endif /* TARGET_MACHO */
26783
26784 /* Order the registers for register allocator. */
26785
26786 void
26787 x86_order_regs_for_local_alloc (void)
26788 {
26789 int pos = 0;
26790 int i;
26791
26792 /* First allocate the local general purpose registers. */
26793 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26794 if (GENERAL_REGNO_P (i) && call_used_regs[i])
26795 reg_alloc_order [pos++] = i;
26796
26797 /* Global general purpose registers. */
26798 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26799 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
26800 reg_alloc_order [pos++] = i;
26801
26802 /* x87 registers come first in case we are doing FP math
26803 using them. */
26804 if (!TARGET_SSE_MATH)
26805 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26806 reg_alloc_order [pos++] = i;
26807
26808 /* SSE registers. */
26809 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
26810 reg_alloc_order [pos++] = i;
26811 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
26812 reg_alloc_order [pos++] = i;
26813
26814 /* x87 registers. */
26815 if (TARGET_SSE_MATH)
26816 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26817 reg_alloc_order [pos++] = i;
26818
26819 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
26820 reg_alloc_order [pos++] = i;
26821
26822 /* Initialize the rest of array as we do not allocate some registers
26823 at all. */
26824 while (pos < FIRST_PSEUDO_REGISTER)
26825 reg_alloc_order [pos++] = 0;
26826 }
26827
26828 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
26829 struct attribute_spec.handler. */
26830 static tree
26831 ix86_handle_abi_attribute (tree *node, tree name,
26832 tree args ATTRIBUTE_UNUSED,
26833 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26834 {
26835 if (TREE_CODE (*node) != FUNCTION_TYPE
26836 && TREE_CODE (*node) != METHOD_TYPE
26837 && TREE_CODE (*node) != FIELD_DECL
26838 && TREE_CODE (*node) != TYPE_DECL)
26839 {
26840 warning (OPT_Wattributes, "%qs attribute only applies to functions",
26841 IDENTIFIER_POINTER (name));
26842 *no_add_attrs = true;
26843 return NULL_TREE;
26844 }
26845 if (!TARGET_64BIT)
26846 {
26847 warning (OPT_Wattributes, "%qs attribute only available for 64-bit",
26848 IDENTIFIER_POINTER (name));
26849 *no_add_attrs = true;
26850 return NULL_TREE;
26851 }
26852
26853 /* Can combine regparm with all attributes but fastcall. */
26854 if (is_attribute_p ("ms_abi", name))
26855 {
26856 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
26857 {
26858 error ("ms_abi and sysv_abi attributes are not compatible");
26859 }
26860
26861 return NULL_TREE;
26862 }
26863 else if (is_attribute_p ("sysv_abi", name))
26864 {
26865 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
26866 {
26867 error ("ms_abi and sysv_abi attributes are not compatible");
26868 }
26869
26870 return NULL_TREE;
26871 }
26872
26873 return NULL_TREE;
26874 }
26875
26876 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
26877 struct attribute_spec.handler. */
26878 static tree
26879 ix86_handle_struct_attribute (tree *node, tree name,
26880 tree args ATTRIBUTE_UNUSED,
26881 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26882 {
26883 tree *type = NULL;
26884 if (DECL_P (*node))
26885 {
26886 if (TREE_CODE (*node) == TYPE_DECL)
26887 type = &TREE_TYPE (*node);
26888 }
26889 else
26890 type = node;
26891
26892 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
26893 || TREE_CODE (*type) == UNION_TYPE)))
26894 {
26895 warning (OPT_Wattributes, "%qs attribute ignored",
26896 IDENTIFIER_POINTER (name));
26897 *no_add_attrs = true;
26898 }
26899
26900 else if ((is_attribute_p ("ms_struct", name)
26901 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
26902 || ((is_attribute_p ("gcc_struct", name)
26903 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
26904 {
26905 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
26906 IDENTIFIER_POINTER (name));
26907 *no_add_attrs = true;
26908 }
26909
26910 return NULL_TREE;
26911 }
26912
26913 static bool
26914 ix86_ms_bitfield_layout_p (const_tree record_type)
26915 {
26916 return (TARGET_MS_BITFIELD_LAYOUT &&
26917 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
26918 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
26919 }
26920
26921 /* Returns an expression indicating where the this parameter is
26922 located on entry to the FUNCTION. */
26923
26924 static rtx
26925 x86_this_parameter (tree function)
26926 {
26927 tree type = TREE_TYPE (function);
26928 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
26929 int nregs;
26930
26931 if (TARGET_64BIT)
26932 {
26933 const int *parm_regs;
26934
26935 if (ix86_function_type_abi (type) == MS_ABI)
26936 parm_regs = x86_64_ms_abi_int_parameter_registers;
26937 else
26938 parm_regs = x86_64_int_parameter_registers;
26939 return gen_rtx_REG (DImode, parm_regs[aggr]);
26940 }
26941
26942 nregs = ix86_function_regparm (type, function);
26943
26944 if (nregs > 0 && !stdarg_p (type))
26945 {
26946 int regno;
26947
26948 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
26949 regno = aggr ? DX_REG : CX_REG;
26950 else
26951 {
26952 regno = AX_REG;
26953 if (aggr)
26954 {
26955 regno = DX_REG;
26956 if (nregs == 1)
26957 return gen_rtx_MEM (SImode,
26958 plus_constant (stack_pointer_rtx, 4));
26959 }
26960 }
26961 return gen_rtx_REG (SImode, regno);
26962 }
26963
26964 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
26965 }
26966
26967 /* Determine whether x86_output_mi_thunk can succeed. */
26968
26969 static bool
26970 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
26971 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
26972 HOST_WIDE_INT vcall_offset, const_tree function)
26973 {
26974 /* 64-bit can handle anything. */
26975 if (TARGET_64BIT)
26976 return true;
26977
26978 /* For 32-bit, everything's fine if we have one free register. */
26979 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
26980 return true;
26981
26982 /* Need a free register for vcall_offset. */
26983 if (vcall_offset)
26984 return false;
26985
26986 /* Need a free register for GOT references. */
26987 if (flag_pic && !(*targetm.binds_local_p) (function))
26988 return false;
26989
26990 /* Otherwise ok. */
26991 return true;
26992 }
26993
26994 /* Output the assembler code for a thunk function. THUNK_DECL is the
26995 declaration for the thunk function itself, FUNCTION is the decl for
26996 the target function. DELTA is an immediate constant offset to be
26997 added to THIS. If VCALL_OFFSET is nonzero, the word at
26998 *(*this + vcall_offset) should be added to THIS. */
26999
27000 static void
27001 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
27002 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
27003 HOST_WIDE_INT vcall_offset, tree function)
27004 {
27005 rtx xops[3];
27006 rtx this_param = x86_this_parameter (function);
27007 rtx this_reg, tmp;
27008
27009 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
27010 pull it in now and let DELTA benefit. */
27011 if (REG_P (this_param))
27012 this_reg = this_param;
27013 else if (vcall_offset)
27014 {
27015 /* Put the this parameter into %eax. */
27016 xops[0] = this_param;
27017 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
27018 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27019 }
27020 else
27021 this_reg = NULL_RTX;
27022
27023 /* Adjust the this parameter by a fixed constant. */
27024 if (delta)
27025 {
27026 xops[0] = GEN_INT (delta);
27027 xops[1] = this_reg ? this_reg : this_param;
27028 if (TARGET_64BIT)
27029 {
27030 if (!x86_64_general_operand (xops[0], DImode))
27031 {
27032 tmp = gen_rtx_REG (DImode, R10_REG);
27033 xops[1] = tmp;
27034 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
27035 xops[0] = tmp;
27036 xops[1] = this_param;
27037 }
27038 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
27039 }
27040 else
27041 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
27042 }
27043
27044 /* Adjust the this parameter by a value stored in the vtable. */
27045 if (vcall_offset)
27046 {
27047 if (TARGET_64BIT)
27048 tmp = gen_rtx_REG (DImode, R10_REG);
27049 else
27050 {
27051 int tmp_regno = CX_REG;
27052 if (lookup_attribute ("fastcall",
27053 TYPE_ATTRIBUTES (TREE_TYPE (function))))
27054 tmp_regno = AX_REG;
27055 tmp = gen_rtx_REG (SImode, tmp_regno);
27056 }
27057
27058 xops[0] = gen_rtx_MEM (Pmode, this_reg);
27059 xops[1] = tmp;
27060 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27061
27062 /* Adjust the this parameter. */
27063 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
27064 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
27065 {
27066 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
27067 xops[0] = GEN_INT (vcall_offset);
27068 xops[1] = tmp2;
27069 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
27070 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
27071 }
27072 xops[1] = this_reg;
27073 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
27074 }
27075
27076 /* If necessary, drop THIS back to its stack slot. */
27077 if (this_reg && this_reg != this_param)
27078 {
27079 xops[0] = this_reg;
27080 xops[1] = this_param;
27081 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27082 }
27083
27084 xops[0] = XEXP (DECL_RTL (function), 0);
27085 if (TARGET_64BIT)
27086 {
27087 if (!flag_pic || (*targetm.binds_local_p) (function))
27088 output_asm_insn ("jmp\t%P0", xops);
27089 /* All thunks should be in the same object as their target,
27090 and thus binds_local_p should be true. */
27091 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
27092 gcc_unreachable ();
27093 else
27094 {
27095 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
27096 tmp = gen_rtx_CONST (Pmode, tmp);
27097 tmp = gen_rtx_MEM (QImode, tmp);
27098 xops[0] = tmp;
27099 output_asm_insn ("jmp\t%A0", xops);
27100 }
27101 }
27102 else
27103 {
27104 if (!flag_pic || (*targetm.binds_local_p) (function))
27105 output_asm_insn ("jmp\t%P0", xops);
27106 else
27107 #if TARGET_MACHO
27108 if (TARGET_MACHO)
27109 {
27110 rtx sym_ref = XEXP (DECL_RTL (function), 0);
27111 tmp = (gen_rtx_SYMBOL_REF
27112 (Pmode,
27113 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
27114 tmp = gen_rtx_MEM (QImode, tmp);
27115 xops[0] = tmp;
27116 output_asm_insn ("jmp\t%0", xops);
27117 }
27118 else
27119 #endif /* TARGET_MACHO */
27120 {
27121 tmp = gen_rtx_REG (SImode, CX_REG);
27122 output_set_got (tmp, NULL_RTX);
27123
27124 xops[1] = tmp;
27125 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
27126 output_asm_insn ("jmp\t{*}%1", xops);
27127 }
27128 }
27129 }
27130
27131 static void
27132 x86_file_start (void)
27133 {
27134 default_file_start ();
27135 #if TARGET_MACHO
27136 darwin_file_start ();
27137 #endif
27138 if (X86_FILE_START_VERSION_DIRECTIVE)
27139 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
27140 if (X86_FILE_START_FLTUSED)
27141 fputs ("\t.global\t__fltused\n", asm_out_file);
27142 if (ix86_asm_dialect == ASM_INTEL)
27143 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
27144 }
27145
27146 int
27147 x86_field_alignment (tree field, int computed)
27148 {
27149 enum machine_mode mode;
27150 tree type = TREE_TYPE (field);
27151
27152 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
27153 return computed;
27154 mode = TYPE_MODE (strip_array_types (type));
27155 if (mode == DFmode || mode == DCmode
27156 || GET_MODE_CLASS (mode) == MODE_INT
27157 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
27158 return MIN (32, computed);
27159 return computed;
27160 }
27161
27162 /* Output assembler code to FILE to increment profiler label # LABELNO
27163 for profiling a function entry. */
27164 void
27165 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
27166 {
27167 if (TARGET_64BIT)
27168 {
27169 #ifndef NO_PROFILE_COUNTERS
27170 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
27171 #endif
27172
27173 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
27174 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
27175 else
27176 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
27177 }
27178 else if (flag_pic)
27179 {
27180 #ifndef NO_PROFILE_COUNTERS
27181 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
27182 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
27183 #endif
27184 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
27185 }
27186 else
27187 {
27188 #ifndef NO_PROFILE_COUNTERS
27189 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
27190 PROFILE_COUNT_REGISTER);
27191 #endif
27192 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
27193 }
27194 }
27195
27196 /* We don't have exact information about the insn sizes, but we may assume
27197 quite safely that we are informed about all 1 byte insns and memory
27198 address sizes. This is enough to eliminate unnecessary padding in
27199 99% of cases. */
27200
27201 static int
27202 min_insn_size (rtx insn)
27203 {
27204 int l = 0;
27205
27206 if (!INSN_P (insn) || !active_insn_p (insn))
27207 return 0;
27208
27209 /* Discard alignments we've emit and jump instructions. */
27210 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
27211 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
27212 return 0;
27213 if (JUMP_P (insn)
27214 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
27215 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
27216 return 0;
27217
27218 /* Important case - calls are always 5 bytes.
27219 It is common to have many calls in the row. */
27220 if (CALL_P (insn)
27221 && symbolic_reference_mentioned_p (PATTERN (insn))
27222 && !SIBLING_CALL_P (insn))
27223 return 5;
27224 if (get_attr_length (insn) <= 1)
27225 return 1;
27226
27227 /* For normal instructions we may rely on the sizes of addresses
27228 and the presence of symbol to require 4 bytes of encoding.
27229 This is not the case for jumps where references are PC relative. */
27230 if (!JUMP_P (insn))
27231 {
27232 l = get_attr_length_address (insn);
27233 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
27234 l = 4;
27235 }
27236 if (l)
27237 return 1+l;
27238 else
27239 return 2;
27240 }
27241
27242 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
27243 window. */
27244
27245 static void
27246 ix86_avoid_jump_misspredicts (void)
27247 {
27248 rtx insn, start = get_insns ();
27249 int nbytes = 0, njumps = 0;
27250 int isjump = 0;
27251
27252 /* Look for all minimal intervals of instructions containing 4 jumps.
27253 The intervals are bounded by START and INSN. NBYTES is the total
27254 size of instructions in the interval including INSN and not including
27255 START. When the NBYTES is smaller than 16 bytes, it is possible
27256 that the end of START and INSN ends up in the same 16byte page.
27257
27258 The smallest offset in the page INSN can start is the case where START
27259 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
27260 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
27261 */
27262 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
27263 {
27264
27265 nbytes += min_insn_size (insn);
27266 if (dump_file)
27267 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
27268 INSN_UID (insn), min_insn_size (insn));
27269 if ((JUMP_P (insn)
27270 && GET_CODE (PATTERN (insn)) != ADDR_VEC
27271 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
27272 || CALL_P (insn))
27273 njumps++;
27274 else
27275 continue;
27276
27277 while (njumps > 3)
27278 {
27279 start = NEXT_INSN (start);
27280 if ((JUMP_P (start)
27281 && GET_CODE (PATTERN (start)) != ADDR_VEC
27282 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27283 || CALL_P (start))
27284 njumps--, isjump = 1;
27285 else
27286 isjump = 0;
27287 nbytes -= min_insn_size (start);
27288 }
27289 gcc_assert (njumps >= 0);
27290 if (dump_file)
27291 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
27292 INSN_UID (start), INSN_UID (insn), nbytes);
27293
27294 if (njumps == 3 && isjump && nbytes < 16)
27295 {
27296 int padsize = 15 - nbytes + min_insn_size (insn);
27297
27298 if (dump_file)
27299 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
27300 INSN_UID (insn), padsize);
27301 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
27302 }
27303 }
27304 }
27305
27306 /* AMD Athlon works faster
27307 when RET is not destination of conditional jump or directly preceded
27308 by other jump instruction. We avoid the penalty by inserting NOP just
27309 before the RET instructions in such cases. */
27310 static void
27311 ix86_pad_returns (void)
27312 {
27313 edge e;
27314 edge_iterator ei;
27315
27316 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
27317 {
27318 basic_block bb = e->src;
27319 rtx ret = BB_END (bb);
27320 rtx prev;
27321 bool replace = false;
27322
27323 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
27324 || optimize_bb_for_size_p (bb))
27325 continue;
27326 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
27327 if (active_insn_p (prev) || LABEL_P (prev))
27328 break;
27329 if (prev && LABEL_P (prev))
27330 {
27331 edge e;
27332 edge_iterator ei;
27333
27334 FOR_EACH_EDGE (e, ei, bb->preds)
27335 if (EDGE_FREQUENCY (e) && e->src->index >= 0
27336 && !(e->flags & EDGE_FALLTHRU))
27337 replace = true;
27338 }
27339 if (!replace)
27340 {
27341 prev = prev_active_insn (ret);
27342 if (prev
27343 && ((JUMP_P (prev) && any_condjump_p (prev))
27344 || CALL_P (prev)))
27345 replace = true;
27346 /* Empty functions get branch mispredict even when the jump destination
27347 is not visible to us. */
27348 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
27349 replace = true;
27350 }
27351 if (replace)
27352 {
27353 emit_insn_before (gen_return_internal_long (), ret);
27354 delete_insn (ret);
27355 }
27356 }
27357 }
27358
27359 /* Implement machine specific optimizations. We implement padding of returns
27360 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
27361 static void
27362 ix86_reorg (void)
27363 {
27364 if (TARGET_PAD_RETURNS && optimize
27365 && optimize_function_for_speed_p (cfun))
27366 ix86_pad_returns ();
27367 if (TARGET_FOUR_JUMP_LIMIT && optimize
27368 && optimize_function_for_speed_p (cfun))
27369 ix86_avoid_jump_misspredicts ();
27370 }
27371
27372 /* Return nonzero when QImode register that must be represented via REX prefix
27373 is used. */
27374 bool
27375 x86_extended_QIreg_mentioned_p (rtx insn)
27376 {
27377 int i;
27378 extract_insn_cached (insn);
27379 for (i = 0; i < recog_data.n_operands; i++)
27380 if (REG_P (recog_data.operand[i])
27381 && REGNO (recog_data.operand[i]) > BX_REG)
27382 return true;
27383 return false;
27384 }
27385
27386 /* Return nonzero when P points to register encoded via REX prefix.
27387 Called via for_each_rtx. */
27388 static int
27389 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
27390 {
27391 unsigned int regno;
27392 if (!REG_P (*p))
27393 return 0;
27394 regno = REGNO (*p);
27395 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
27396 }
27397
27398 /* Return true when INSN mentions register that must be encoded using REX
27399 prefix. */
27400 bool
27401 x86_extended_reg_mentioned_p (rtx insn)
27402 {
27403 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
27404 extended_reg_mentioned_1, NULL);
27405 }
27406
27407 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
27408 optabs would emit if we didn't have TFmode patterns. */
27409
27410 void
27411 x86_emit_floatuns (rtx operands[2])
27412 {
27413 rtx neglab, donelab, i0, i1, f0, in, out;
27414 enum machine_mode mode, inmode;
27415
27416 inmode = GET_MODE (operands[1]);
27417 gcc_assert (inmode == SImode || inmode == DImode);
27418
27419 out = operands[0];
27420 in = force_reg (inmode, operands[1]);
27421 mode = GET_MODE (out);
27422 neglab = gen_label_rtx ();
27423 donelab = gen_label_rtx ();
27424 f0 = gen_reg_rtx (mode);
27425
27426 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
27427
27428 expand_float (out, in, 0);
27429
27430 emit_jump_insn (gen_jump (donelab));
27431 emit_barrier ();
27432
27433 emit_label (neglab);
27434
27435 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
27436 1, OPTAB_DIRECT);
27437 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
27438 1, OPTAB_DIRECT);
27439 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
27440
27441 expand_float (f0, i0, 0);
27442
27443 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
27444
27445 emit_label (donelab);
27446 }
27447 \f
27448 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27449 with all elements equal to VAR. Return true if successful. */
27450
27451 static bool
27452 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
27453 rtx target, rtx val)
27454 {
27455 enum machine_mode hmode, smode, wsmode, wvmode;
27456 rtx x;
27457
27458 switch (mode)
27459 {
27460 case V2SImode:
27461 case V2SFmode:
27462 if (!mmx_ok)
27463 return false;
27464 /* FALLTHRU */
27465
27466 case V2DFmode:
27467 case V2DImode:
27468 case V4SFmode:
27469 case V4SImode:
27470 val = force_reg (GET_MODE_INNER (mode), val);
27471 x = gen_rtx_VEC_DUPLICATE (mode, val);
27472 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27473 return true;
27474
27475 case V4HImode:
27476 if (!mmx_ok)
27477 return false;
27478 if (TARGET_SSE || TARGET_3DNOW_A)
27479 {
27480 val = gen_lowpart (SImode, val);
27481 x = gen_rtx_TRUNCATE (HImode, val);
27482 x = gen_rtx_VEC_DUPLICATE (mode, x);
27483 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27484 return true;
27485 }
27486 else
27487 {
27488 smode = HImode;
27489 wsmode = SImode;
27490 wvmode = V2SImode;
27491 goto widen;
27492 }
27493
27494 case V8QImode:
27495 if (!mmx_ok)
27496 return false;
27497 smode = QImode;
27498 wsmode = HImode;
27499 wvmode = V4HImode;
27500 goto widen;
27501 case V8HImode:
27502 if (TARGET_SSE2)
27503 {
27504 rtx tmp1, tmp2;
27505 /* Extend HImode to SImode using a paradoxical SUBREG. */
27506 tmp1 = gen_reg_rtx (SImode);
27507 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27508 /* Insert the SImode value as low element of V4SImode vector. */
27509 tmp2 = gen_reg_rtx (V4SImode);
27510 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27511 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27512 CONST0_RTX (V4SImode),
27513 const1_rtx);
27514 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27515 /* Cast the V4SImode vector back to a V8HImode vector. */
27516 tmp1 = gen_reg_rtx (V8HImode);
27517 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
27518 /* Duplicate the low short through the whole low SImode word. */
27519 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
27520 /* Cast the V8HImode vector back to a V4SImode vector. */
27521 tmp2 = gen_reg_rtx (V4SImode);
27522 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27523 /* Replicate the low element of the V4SImode vector. */
27524 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27525 /* Cast the V2SImode back to V8HImode, and store in target. */
27526 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
27527 return true;
27528 }
27529 smode = HImode;
27530 wsmode = SImode;
27531 wvmode = V4SImode;
27532 goto widen;
27533 case V16QImode:
27534 if (TARGET_SSE2)
27535 {
27536 rtx tmp1, tmp2;
27537 /* Extend QImode to SImode using a paradoxical SUBREG. */
27538 tmp1 = gen_reg_rtx (SImode);
27539 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27540 /* Insert the SImode value as low element of V4SImode vector. */
27541 tmp2 = gen_reg_rtx (V4SImode);
27542 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27543 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27544 CONST0_RTX (V4SImode),
27545 const1_rtx);
27546 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27547 /* Cast the V4SImode vector back to a V16QImode vector. */
27548 tmp1 = gen_reg_rtx (V16QImode);
27549 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
27550 /* Duplicate the low byte through the whole low SImode word. */
27551 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27552 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27553 /* Cast the V16QImode vector back to a V4SImode vector. */
27554 tmp2 = gen_reg_rtx (V4SImode);
27555 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27556 /* Replicate the low element of the V4SImode vector. */
27557 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27558 /* Cast the V2SImode back to V16QImode, and store in target. */
27559 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
27560 return true;
27561 }
27562 smode = QImode;
27563 wsmode = HImode;
27564 wvmode = V8HImode;
27565 goto widen;
27566 widen:
27567 /* Replicate the value once into the next wider mode and recurse. */
27568 val = convert_modes (wsmode, smode, val, true);
27569 x = expand_simple_binop (wsmode, ASHIFT, val,
27570 GEN_INT (GET_MODE_BITSIZE (smode)),
27571 NULL_RTX, 1, OPTAB_LIB_WIDEN);
27572 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
27573
27574 x = gen_reg_rtx (wvmode);
27575 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
27576 gcc_unreachable ();
27577 emit_move_insn (target, gen_lowpart (mode, x));
27578 return true;
27579
27580 case V4DFmode:
27581 hmode = V2DFmode;
27582 goto half;
27583 case V4DImode:
27584 hmode = V2DImode;
27585 goto half;
27586 case V8SFmode:
27587 hmode = V4SFmode;
27588 goto half;
27589 case V8SImode:
27590 hmode = V4SImode;
27591 goto half;
27592 case V16HImode:
27593 hmode = V8HImode;
27594 goto half;
27595 case V32QImode:
27596 hmode = V16QImode;
27597 goto half;
27598 half:
27599 {
27600 rtx tmp = gen_reg_rtx (hmode);
27601 ix86_expand_vector_init_duplicate (mmx_ok, hmode, tmp, val);
27602 emit_insn (gen_rtx_SET (VOIDmode, target,
27603 gen_rtx_VEC_CONCAT (mode, tmp, tmp)));
27604 }
27605 return true;
27606
27607 default:
27608 return false;
27609 }
27610 }
27611
27612 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27613 whose ONE_VAR element is VAR, and other elements are zero. Return true
27614 if successful. */
27615
27616 static bool
27617 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
27618 rtx target, rtx var, int one_var)
27619 {
27620 enum machine_mode vsimode;
27621 rtx new_target;
27622 rtx x, tmp;
27623 bool use_vector_set = false;
27624
27625 switch (mode)
27626 {
27627 case V2DImode:
27628 /* For SSE4.1, we normally use vector set. But if the second
27629 element is zero and inter-unit moves are OK, we use movq
27630 instead. */
27631 use_vector_set = (TARGET_64BIT
27632 && TARGET_SSE4_1
27633 && !(TARGET_INTER_UNIT_MOVES
27634 && one_var == 0));
27635 break;
27636 case V16QImode:
27637 case V4SImode:
27638 case V4SFmode:
27639 use_vector_set = TARGET_SSE4_1;
27640 break;
27641 case V8HImode:
27642 use_vector_set = TARGET_SSE2;
27643 break;
27644 case V4HImode:
27645 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
27646 break;
27647 case V32QImode:
27648 case V16HImode:
27649 case V8SImode:
27650 case V8SFmode:
27651 case V4DFmode:
27652 use_vector_set = TARGET_AVX;
27653 break;
27654 case V4DImode:
27655 /* Use ix86_expand_vector_set in 64bit mode only. */
27656 use_vector_set = TARGET_AVX && TARGET_64BIT;
27657 break;
27658 default:
27659 break;
27660 }
27661
27662 if (use_vector_set)
27663 {
27664 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
27665 var = force_reg (GET_MODE_INNER (mode), var);
27666 ix86_expand_vector_set (mmx_ok, target, var, one_var);
27667 return true;
27668 }
27669
27670 switch (mode)
27671 {
27672 case V2SFmode:
27673 case V2SImode:
27674 if (!mmx_ok)
27675 return false;
27676 /* FALLTHRU */
27677
27678 case V2DFmode:
27679 case V2DImode:
27680 if (one_var != 0)
27681 return false;
27682 var = force_reg (GET_MODE_INNER (mode), var);
27683 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
27684 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27685 return true;
27686
27687 case V4SFmode:
27688 case V4SImode:
27689 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
27690 new_target = gen_reg_rtx (mode);
27691 else
27692 new_target = target;
27693 var = force_reg (GET_MODE_INNER (mode), var);
27694 x = gen_rtx_VEC_DUPLICATE (mode, var);
27695 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
27696 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
27697 if (one_var != 0)
27698 {
27699 /* We need to shuffle the value to the correct position, so
27700 create a new pseudo to store the intermediate result. */
27701
27702 /* With SSE2, we can use the integer shuffle insns. */
27703 if (mode != V4SFmode && TARGET_SSE2)
27704 {
27705 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
27706 GEN_INT (1),
27707 GEN_INT (one_var == 1 ? 0 : 1),
27708 GEN_INT (one_var == 2 ? 0 : 1),
27709 GEN_INT (one_var == 3 ? 0 : 1)));
27710 if (target != new_target)
27711 emit_move_insn (target, new_target);
27712 return true;
27713 }
27714
27715 /* Otherwise convert the intermediate result to V4SFmode and
27716 use the SSE1 shuffle instructions. */
27717 if (mode != V4SFmode)
27718 {
27719 tmp = gen_reg_rtx (V4SFmode);
27720 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
27721 }
27722 else
27723 tmp = new_target;
27724
27725 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
27726 GEN_INT (1),
27727 GEN_INT (one_var == 1 ? 0 : 1),
27728 GEN_INT (one_var == 2 ? 0+4 : 1+4),
27729 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
27730
27731 if (mode != V4SFmode)
27732 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
27733 else if (tmp != target)
27734 emit_move_insn (target, tmp);
27735 }
27736 else if (target != new_target)
27737 emit_move_insn (target, new_target);
27738 return true;
27739
27740 case V8HImode:
27741 case V16QImode:
27742 vsimode = V4SImode;
27743 goto widen;
27744 case V4HImode:
27745 case V8QImode:
27746 if (!mmx_ok)
27747 return false;
27748 vsimode = V2SImode;
27749 goto widen;
27750 widen:
27751 if (one_var != 0)
27752 return false;
27753
27754 /* Zero extend the variable element to SImode and recurse. */
27755 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
27756
27757 x = gen_reg_rtx (vsimode);
27758 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
27759 var, one_var))
27760 gcc_unreachable ();
27761
27762 emit_move_insn (target, gen_lowpart (mode, x));
27763 return true;
27764
27765 default:
27766 return false;
27767 }
27768 }
27769
27770 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27771 consisting of the values in VALS. It is known that all elements
27772 except ONE_VAR are constants. Return true if successful. */
27773
27774 static bool
27775 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
27776 rtx target, rtx vals, int one_var)
27777 {
27778 rtx var = XVECEXP (vals, 0, one_var);
27779 enum machine_mode wmode;
27780 rtx const_vec, x;
27781
27782 const_vec = copy_rtx (vals);
27783 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
27784 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
27785
27786 switch (mode)
27787 {
27788 case V2DFmode:
27789 case V2DImode:
27790 case V2SFmode:
27791 case V2SImode:
27792 /* For the two element vectors, it's just as easy to use
27793 the general case. */
27794 return false;
27795
27796 case V4DImode:
27797 /* Use ix86_expand_vector_set in 64bit mode only. */
27798 if (!TARGET_64BIT)
27799 return false;
27800 case V4DFmode:
27801 case V8SFmode:
27802 case V8SImode:
27803 case V16HImode:
27804 case V32QImode:
27805 case V4SFmode:
27806 case V4SImode:
27807 case V8HImode:
27808 case V4HImode:
27809 break;
27810
27811 case V16QImode:
27812 if (TARGET_SSE4_1)
27813 break;
27814 wmode = V8HImode;
27815 goto widen;
27816 case V8QImode:
27817 wmode = V4HImode;
27818 goto widen;
27819 widen:
27820 /* There's no way to set one QImode entry easily. Combine
27821 the variable value with its adjacent constant value, and
27822 promote to an HImode set. */
27823 x = XVECEXP (vals, 0, one_var ^ 1);
27824 if (one_var & 1)
27825 {
27826 var = convert_modes (HImode, QImode, var, true);
27827 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
27828 NULL_RTX, 1, OPTAB_LIB_WIDEN);
27829 x = GEN_INT (INTVAL (x) & 0xff);
27830 }
27831 else
27832 {
27833 var = convert_modes (HImode, QImode, var, true);
27834 x = gen_int_mode (INTVAL (x) << 8, HImode);
27835 }
27836 if (x != const0_rtx)
27837 var = expand_simple_binop (HImode, IOR, var, x, var,
27838 1, OPTAB_LIB_WIDEN);
27839
27840 x = gen_reg_rtx (wmode);
27841 emit_move_insn (x, gen_lowpart (wmode, const_vec));
27842 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
27843
27844 emit_move_insn (target, gen_lowpart (mode, x));
27845 return true;
27846
27847 default:
27848 return false;
27849 }
27850
27851 emit_move_insn (target, const_vec);
27852 ix86_expand_vector_set (mmx_ok, target, var, one_var);
27853 return true;
27854 }
27855
27856 /* A subroutine of ix86_expand_vector_init_general. Use vector
27857 concatenate to handle the most general case: all values variable,
27858 and none identical. */
27859
27860 static void
27861 ix86_expand_vector_init_concat (enum machine_mode mode,
27862 rtx target, rtx *ops, int n)
27863 {
27864 enum machine_mode cmode, hmode = VOIDmode;
27865 rtx first[8], second[4];
27866 rtvec v;
27867 int i, j;
27868
27869 switch (n)
27870 {
27871 case 2:
27872 switch (mode)
27873 {
27874 case V8SImode:
27875 cmode = V4SImode;
27876 break;
27877 case V8SFmode:
27878 cmode = V4SFmode;
27879 break;
27880 case V4DImode:
27881 cmode = V2DImode;
27882 break;
27883 case V4DFmode:
27884 cmode = V2DFmode;
27885 break;
27886 case V4SImode:
27887 cmode = V2SImode;
27888 break;
27889 case V4SFmode:
27890 cmode = V2SFmode;
27891 break;
27892 case V2DImode:
27893 cmode = DImode;
27894 break;
27895 case V2SImode:
27896 cmode = SImode;
27897 break;
27898 case V2DFmode:
27899 cmode = DFmode;
27900 break;
27901 case V2SFmode:
27902 cmode = SFmode;
27903 break;
27904 default:
27905 gcc_unreachable ();
27906 }
27907
27908 if (!register_operand (ops[1], cmode))
27909 ops[1] = force_reg (cmode, ops[1]);
27910 if (!register_operand (ops[0], cmode))
27911 ops[0] = force_reg (cmode, ops[0]);
27912 emit_insn (gen_rtx_SET (VOIDmode, target,
27913 gen_rtx_VEC_CONCAT (mode, ops[0],
27914 ops[1])));
27915 break;
27916
27917 case 4:
27918 switch (mode)
27919 {
27920 case V4DImode:
27921 cmode = V2DImode;
27922 break;
27923 case V4DFmode:
27924 cmode = V2DFmode;
27925 break;
27926 case V4SImode:
27927 cmode = V2SImode;
27928 break;
27929 case V4SFmode:
27930 cmode = V2SFmode;
27931 break;
27932 default:
27933 gcc_unreachable ();
27934 }
27935 goto half;
27936
27937 case 8:
27938 switch (mode)
27939 {
27940 case V8SImode:
27941 cmode = V2SImode;
27942 hmode = V4SImode;
27943 break;
27944 case V8SFmode:
27945 cmode = V2SFmode;
27946 hmode = V4SFmode;
27947 break;
27948 default:
27949 gcc_unreachable ();
27950 }
27951 goto half;
27952
27953 half:
27954 /* FIXME: We process inputs backward to help RA. PR 36222. */
27955 i = n - 1;
27956 j = (n >> 1) - 1;
27957 for (; i > 0; i -= 2, j--)
27958 {
27959 first[j] = gen_reg_rtx (cmode);
27960 v = gen_rtvec (2, ops[i - 1], ops[i]);
27961 ix86_expand_vector_init (false, first[j],
27962 gen_rtx_PARALLEL (cmode, v));
27963 }
27964
27965 n >>= 1;
27966 if (n > 2)
27967 {
27968 gcc_assert (hmode != VOIDmode);
27969 for (i = j = 0; i < n; i += 2, j++)
27970 {
27971 second[j] = gen_reg_rtx (hmode);
27972 ix86_expand_vector_init_concat (hmode, second [j],
27973 &first [i], 2);
27974 }
27975 n >>= 1;
27976 ix86_expand_vector_init_concat (mode, target, second, n);
27977 }
27978 else
27979 ix86_expand_vector_init_concat (mode, target, first, n);
27980 break;
27981
27982 default:
27983 gcc_unreachable ();
27984 }
27985 }
27986
27987 /* A subroutine of ix86_expand_vector_init_general. Use vector
27988 interleave to handle the most general case: all values variable,
27989 and none identical. */
27990
27991 static void
27992 ix86_expand_vector_init_interleave (enum machine_mode mode,
27993 rtx target, rtx *ops, int n)
27994 {
27995 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
27996 int i, j;
27997 rtx op0, op1;
27998 rtx (*gen_load_even) (rtx, rtx, rtx);
27999 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
28000 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
28001
28002 switch (mode)
28003 {
28004 case V8HImode:
28005 gen_load_even = gen_vec_setv8hi;
28006 gen_interleave_first_low = gen_vec_interleave_lowv4si;
28007 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28008 inner_mode = HImode;
28009 first_imode = V4SImode;
28010 second_imode = V2DImode;
28011 third_imode = VOIDmode;
28012 break;
28013 case V16QImode:
28014 gen_load_even = gen_vec_setv16qi;
28015 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
28016 gen_interleave_second_low = gen_vec_interleave_lowv4si;
28017 inner_mode = QImode;
28018 first_imode = V8HImode;
28019 second_imode = V4SImode;
28020 third_imode = V2DImode;
28021 break;
28022 default:
28023 gcc_unreachable ();
28024 }
28025
28026 for (i = 0; i < n; i++)
28027 {
28028 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
28029 op0 = gen_reg_rtx (SImode);
28030 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
28031
28032 /* Insert the SImode value as low element of V4SImode vector. */
28033 op1 = gen_reg_rtx (V4SImode);
28034 op0 = gen_rtx_VEC_MERGE (V4SImode,
28035 gen_rtx_VEC_DUPLICATE (V4SImode,
28036 op0),
28037 CONST0_RTX (V4SImode),
28038 const1_rtx);
28039 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
28040
28041 /* Cast the V4SImode vector back to a vector in orignal mode. */
28042 op0 = gen_reg_rtx (mode);
28043 emit_move_insn (op0, gen_lowpart (mode, op1));
28044
28045 /* Load even elements into the second positon. */
28046 emit_insn ((*gen_load_even) (op0,
28047 force_reg (inner_mode,
28048 ops [i + i + 1]),
28049 const1_rtx));
28050
28051 /* Cast vector to FIRST_IMODE vector. */
28052 ops[i] = gen_reg_rtx (first_imode);
28053 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
28054 }
28055
28056 /* Interleave low FIRST_IMODE vectors. */
28057 for (i = j = 0; i < n; i += 2, j++)
28058 {
28059 op0 = gen_reg_rtx (first_imode);
28060 emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
28061
28062 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
28063 ops[j] = gen_reg_rtx (second_imode);
28064 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
28065 }
28066
28067 /* Interleave low SECOND_IMODE vectors. */
28068 switch (second_imode)
28069 {
28070 case V4SImode:
28071 for (i = j = 0; i < n / 2; i += 2, j++)
28072 {
28073 op0 = gen_reg_rtx (second_imode);
28074 emit_insn ((*gen_interleave_second_low) (op0, ops[i],
28075 ops[i + 1]));
28076
28077 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
28078 vector. */
28079 ops[j] = gen_reg_rtx (third_imode);
28080 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
28081 }
28082 second_imode = V2DImode;
28083 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28084 /* FALLTHRU */
28085
28086 case V2DImode:
28087 op0 = gen_reg_rtx (second_imode);
28088 emit_insn ((*gen_interleave_second_low) (op0, ops[0],
28089 ops[1]));
28090
28091 /* Cast the SECOND_IMODE vector back to a vector on original
28092 mode. */
28093 emit_insn (gen_rtx_SET (VOIDmode, target,
28094 gen_lowpart (mode, op0)));
28095 break;
28096
28097 default:
28098 gcc_unreachable ();
28099 }
28100 }
28101
28102 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
28103 all values variable, and none identical. */
28104
28105 static void
28106 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
28107 rtx target, rtx vals)
28108 {
28109 rtx ops[32], op0, op1;
28110 enum machine_mode half_mode = VOIDmode;
28111 int n, i;
28112
28113 switch (mode)
28114 {
28115 case V2SFmode:
28116 case V2SImode:
28117 if (!mmx_ok && !TARGET_SSE)
28118 break;
28119 /* FALLTHRU */
28120
28121 case V8SFmode:
28122 case V8SImode:
28123 case V4DFmode:
28124 case V4DImode:
28125 case V4SFmode:
28126 case V4SImode:
28127 case V2DFmode:
28128 case V2DImode:
28129 n = GET_MODE_NUNITS (mode);
28130 for (i = 0; i < n; i++)
28131 ops[i] = XVECEXP (vals, 0, i);
28132 ix86_expand_vector_init_concat (mode, target, ops, n);
28133 return;
28134
28135 case V32QImode:
28136 half_mode = V16QImode;
28137 goto half;
28138
28139 case V16HImode:
28140 half_mode = V8HImode;
28141 goto half;
28142
28143 half:
28144 n = GET_MODE_NUNITS (mode);
28145 for (i = 0; i < n; i++)
28146 ops[i] = XVECEXP (vals, 0, i);
28147 op0 = gen_reg_rtx (half_mode);
28148 op1 = gen_reg_rtx (half_mode);
28149 ix86_expand_vector_init_interleave (half_mode, op0, ops,
28150 n >> 2);
28151 ix86_expand_vector_init_interleave (half_mode, op1,
28152 &ops [n >> 1], n >> 2);
28153 emit_insn (gen_rtx_SET (VOIDmode, target,
28154 gen_rtx_VEC_CONCAT (mode, op0, op1)));
28155 return;
28156
28157 case V16QImode:
28158 if (!TARGET_SSE4_1)
28159 break;
28160 /* FALLTHRU */
28161
28162 case V8HImode:
28163 if (!TARGET_SSE2)
28164 break;
28165
28166 /* Don't use ix86_expand_vector_init_interleave if we can't
28167 move from GPR to SSE register directly. */
28168 if (!TARGET_INTER_UNIT_MOVES)
28169 break;
28170
28171 n = GET_MODE_NUNITS (mode);
28172 for (i = 0; i < n; i++)
28173 ops[i] = XVECEXP (vals, 0, i);
28174 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
28175 return;
28176
28177 case V4HImode:
28178 case V8QImode:
28179 break;
28180
28181 default:
28182 gcc_unreachable ();
28183 }
28184
28185 {
28186 int i, j, n_elts, n_words, n_elt_per_word;
28187 enum machine_mode inner_mode;
28188 rtx words[4], shift;
28189
28190 inner_mode = GET_MODE_INNER (mode);
28191 n_elts = GET_MODE_NUNITS (mode);
28192 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
28193 n_elt_per_word = n_elts / n_words;
28194 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
28195
28196 for (i = 0; i < n_words; ++i)
28197 {
28198 rtx word = NULL_RTX;
28199
28200 for (j = 0; j < n_elt_per_word; ++j)
28201 {
28202 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
28203 elt = convert_modes (word_mode, inner_mode, elt, true);
28204
28205 if (j == 0)
28206 word = elt;
28207 else
28208 {
28209 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
28210 word, 1, OPTAB_LIB_WIDEN);
28211 word = expand_simple_binop (word_mode, IOR, word, elt,
28212 word, 1, OPTAB_LIB_WIDEN);
28213 }
28214 }
28215
28216 words[i] = word;
28217 }
28218
28219 if (n_words == 1)
28220 emit_move_insn (target, gen_lowpart (mode, words[0]));
28221 else if (n_words == 2)
28222 {
28223 rtx tmp = gen_reg_rtx (mode);
28224 emit_clobber (tmp);
28225 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
28226 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
28227 emit_move_insn (target, tmp);
28228 }
28229 else if (n_words == 4)
28230 {
28231 rtx tmp = gen_reg_rtx (V4SImode);
28232 gcc_assert (word_mode == SImode);
28233 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
28234 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
28235 emit_move_insn (target, gen_lowpart (mode, tmp));
28236 }
28237 else
28238 gcc_unreachable ();
28239 }
28240 }
28241
28242 /* Initialize vector TARGET via VALS. Suppress the use of MMX
28243 instructions unless MMX_OK is true. */
28244
28245 void
28246 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
28247 {
28248 enum machine_mode mode = GET_MODE (target);
28249 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28250 int n_elts = GET_MODE_NUNITS (mode);
28251 int n_var = 0, one_var = -1;
28252 bool all_same = true, all_const_zero = true;
28253 int i;
28254 rtx x;
28255
28256 for (i = 0; i < n_elts; ++i)
28257 {
28258 x = XVECEXP (vals, 0, i);
28259 if (!(CONST_INT_P (x)
28260 || GET_CODE (x) == CONST_DOUBLE
28261 || GET_CODE (x) == CONST_FIXED))
28262 n_var++, one_var = i;
28263 else if (x != CONST0_RTX (inner_mode))
28264 all_const_zero = false;
28265 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
28266 all_same = false;
28267 }
28268
28269 /* Constants are best loaded from the constant pool. */
28270 if (n_var == 0)
28271 {
28272 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
28273 return;
28274 }
28275
28276 /* If all values are identical, broadcast the value. */
28277 if (all_same
28278 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
28279 XVECEXP (vals, 0, 0)))
28280 return;
28281
28282 /* Values where only one field is non-constant are best loaded from
28283 the pool and overwritten via move later. */
28284 if (n_var == 1)
28285 {
28286 if (all_const_zero
28287 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
28288 XVECEXP (vals, 0, one_var),
28289 one_var))
28290 return;
28291
28292 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
28293 return;
28294 }
28295
28296 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
28297 }
28298
28299 void
28300 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
28301 {
28302 enum machine_mode mode = GET_MODE (target);
28303 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28304 enum machine_mode half_mode;
28305 bool use_vec_merge = false;
28306 rtx tmp;
28307 static rtx (*gen_extract[6][2]) (rtx, rtx)
28308 = {
28309 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
28310 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
28311 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
28312 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
28313 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
28314 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
28315 };
28316 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
28317 = {
28318 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
28319 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
28320 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
28321 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
28322 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
28323 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
28324 };
28325 int i, j, n;
28326
28327 switch (mode)
28328 {
28329 case V2SFmode:
28330 case V2SImode:
28331 if (mmx_ok)
28332 {
28333 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
28334 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
28335 if (elt == 0)
28336 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
28337 else
28338 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
28339 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28340 return;
28341 }
28342 break;
28343
28344 case V2DImode:
28345 use_vec_merge = TARGET_SSE4_1;
28346 if (use_vec_merge)
28347 break;
28348
28349 case V2DFmode:
28350 {
28351 rtx op0, op1;
28352
28353 /* For the two element vectors, we implement a VEC_CONCAT with
28354 the extraction of the other element. */
28355
28356 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
28357 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
28358
28359 if (elt == 0)
28360 op0 = val, op1 = tmp;
28361 else
28362 op0 = tmp, op1 = val;
28363
28364 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
28365 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28366 }
28367 return;
28368
28369 case V4SFmode:
28370 use_vec_merge = TARGET_SSE4_1;
28371 if (use_vec_merge)
28372 break;
28373
28374 switch (elt)
28375 {
28376 case 0:
28377 use_vec_merge = true;
28378 break;
28379
28380 case 1:
28381 /* tmp = target = A B C D */
28382 tmp = copy_to_reg (target);
28383 /* target = A A B B */
28384 emit_insn (gen_sse_unpcklps (target, target, target));
28385 /* target = X A B B */
28386 ix86_expand_vector_set (false, target, val, 0);
28387 /* target = A X C D */
28388 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28389 GEN_INT (1), GEN_INT (0),
28390 GEN_INT (2+4), GEN_INT (3+4)));
28391 return;
28392
28393 case 2:
28394 /* tmp = target = A B C D */
28395 tmp = copy_to_reg (target);
28396 /* tmp = X B C D */
28397 ix86_expand_vector_set (false, tmp, val, 0);
28398 /* target = A B X D */
28399 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28400 GEN_INT (0), GEN_INT (1),
28401 GEN_INT (0+4), GEN_INT (3+4)));
28402 return;
28403
28404 case 3:
28405 /* tmp = target = A B C D */
28406 tmp = copy_to_reg (target);
28407 /* tmp = X B C D */
28408 ix86_expand_vector_set (false, tmp, val, 0);
28409 /* target = A B X D */
28410 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28411 GEN_INT (0), GEN_INT (1),
28412 GEN_INT (2+4), GEN_INT (0+4)));
28413 return;
28414
28415 default:
28416 gcc_unreachable ();
28417 }
28418 break;
28419
28420 case V4SImode:
28421 use_vec_merge = TARGET_SSE4_1;
28422 if (use_vec_merge)
28423 break;
28424
28425 /* Element 0 handled by vec_merge below. */
28426 if (elt == 0)
28427 {
28428 use_vec_merge = true;
28429 break;
28430 }
28431
28432 if (TARGET_SSE2)
28433 {
28434 /* With SSE2, use integer shuffles to swap element 0 and ELT,
28435 store into element 0, then shuffle them back. */
28436
28437 rtx order[4];
28438
28439 order[0] = GEN_INT (elt);
28440 order[1] = const1_rtx;
28441 order[2] = const2_rtx;
28442 order[3] = GEN_INT (3);
28443 order[elt] = const0_rtx;
28444
28445 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28446 order[1], order[2], order[3]));
28447
28448 ix86_expand_vector_set (false, target, val, 0);
28449
28450 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28451 order[1], order[2], order[3]));
28452 }
28453 else
28454 {
28455 /* For SSE1, we have to reuse the V4SF code. */
28456 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
28457 gen_lowpart (SFmode, val), elt);
28458 }
28459 return;
28460
28461 case V8HImode:
28462 use_vec_merge = TARGET_SSE2;
28463 break;
28464 case V4HImode:
28465 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28466 break;
28467
28468 case V16QImode:
28469 use_vec_merge = TARGET_SSE4_1;
28470 break;
28471
28472 case V8QImode:
28473 break;
28474
28475 case V32QImode:
28476 half_mode = V16QImode;
28477 j = 0;
28478 n = 16;
28479 goto half;
28480
28481 case V16HImode:
28482 half_mode = V8HImode;
28483 j = 1;
28484 n = 8;
28485 goto half;
28486
28487 case V8SImode:
28488 half_mode = V4SImode;
28489 j = 2;
28490 n = 4;
28491 goto half;
28492
28493 case V4DImode:
28494 half_mode = V2DImode;
28495 j = 3;
28496 n = 2;
28497 goto half;
28498
28499 case V8SFmode:
28500 half_mode = V4SFmode;
28501 j = 4;
28502 n = 4;
28503 goto half;
28504
28505 case V4DFmode:
28506 half_mode = V2DFmode;
28507 j = 5;
28508 n = 2;
28509 goto half;
28510
28511 half:
28512 /* Compute offset. */
28513 i = elt / n;
28514 elt %= n;
28515
28516 gcc_assert (i <= 1);
28517
28518 /* Extract the half. */
28519 tmp = gen_reg_rtx (half_mode);
28520 emit_insn ((*gen_extract[j][i]) (tmp, target));
28521
28522 /* Put val in tmp at elt. */
28523 ix86_expand_vector_set (false, tmp, val, elt);
28524
28525 /* Put it back. */
28526 emit_insn ((*gen_insert[j][i]) (target, target, tmp));
28527 return;
28528
28529 default:
28530 break;
28531 }
28532
28533 if (use_vec_merge)
28534 {
28535 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
28536 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
28537 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28538 }
28539 else
28540 {
28541 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28542
28543 emit_move_insn (mem, target);
28544
28545 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28546 emit_move_insn (tmp, val);
28547
28548 emit_move_insn (target, mem);
28549 }
28550 }
28551
28552 void
28553 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
28554 {
28555 enum machine_mode mode = GET_MODE (vec);
28556 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28557 bool use_vec_extr = false;
28558 rtx tmp;
28559
28560 switch (mode)
28561 {
28562 case V2SImode:
28563 case V2SFmode:
28564 if (!mmx_ok)
28565 break;
28566 /* FALLTHRU */
28567
28568 case V2DFmode:
28569 case V2DImode:
28570 use_vec_extr = true;
28571 break;
28572
28573 case V4SFmode:
28574 use_vec_extr = TARGET_SSE4_1;
28575 if (use_vec_extr)
28576 break;
28577
28578 switch (elt)
28579 {
28580 case 0:
28581 tmp = vec;
28582 break;
28583
28584 case 1:
28585 case 3:
28586 tmp = gen_reg_rtx (mode);
28587 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
28588 GEN_INT (elt), GEN_INT (elt),
28589 GEN_INT (elt+4), GEN_INT (elt+4)));
28590 break;
28591
28592 case 2:
28593 tmp = gen_reg_rtx (mode);
28594 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
28595 break;
28596
28597 default:
28598 gcc_unreachable ();
28599 }
28600 vec = tmp;
28601 use_vec_extr = true;
28602 elt = 0;
28603 break;
28604
28605 case V4SImode:
28606 use_vec_extr = TARGET_SSE4_1;
28607 if (use_vec_extr)
28608 break;
28609
28610 if (TARGET_SSE2)
28611 {
28612 switch (elt)
28613 {
28614 case 0:
28615 tmp = vec;
28616 break;
28617
28618 case 1:
28619 case 3:
28620 tmp = gen_reg_rtx (mode);
28621 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
28622 GEN_INT (elt), GEN_INT (elt),
28623 GEN_INT (elt), GEN_INT (elt)));
28624 break;
28625
28626 case 2:
28627 tmp = gen_reg_rtx (mode);
28628 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
28629 break;
28630
28631 default:
28632 gcc_unreachable ();
28633 }
28634 vec = tmp;
28635 use_vec_extr = true;
28636 elt = 0;
28637 }
28638 else
28639 {
28640 /* For SSE1, we have to reuse the V4SF code. */
28641 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
28642 gen_lowpart (V4SFmode, vec), elt);
28643 return;
28644 }
28645 break;
28646
28647 case V8HImode:
28648 use_vec_extr = TARGET_SSE2;
28649 break;
28650 case V4HImode:
28651 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28652 break;
28653
28654 case V16QImode:
28655 use_vec_extr = TARGET_SSE4_1;
28656 break;
28657
28658 case V8QImode:
28659 /* ??? Could extract the appropriate HImode element and shift. */
28660 default:
28661 break;
28662 }
28663
28664 if (use_vec_extr)
28665 {
28666 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
28667 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
28668
28669 /* Let the rtl optimizers know about the zero extension performed. */
28670 if (inner_mode == QImode || inner_mode == HImode)
28671 {
28672 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
28673 target = gen_lowpart (SImode, target);
28674 }
28675
28676 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28677 }
28678 else
28679 {
28680 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28681
28682 emit_move_insn (mem, vec);
28683
28684 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28685 emit_move_insn (target, tmp);
28686 }
28687 }
28688
28689 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
28690 pattern to reduce; DEST is the destination; IN is the input vector. */
28691
28692 void
28693 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
28694 {
28695 rtx tmp1, tmp2, tmp3;
28696
28697 tmp1 = gen_reg_rtx (V4SFmode);
28698 tmp2 = gen_reg_rtx (V4SFmode);
28699 tmp3 = gen_reg_rtx (V4SFmode);
28700
28701 emit_insn (gen_sse_movhlps (tmp1, in, in));
28702 emit_insn (fn (tmp2, tmp1, in));
28703
28704 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
28705 GEN_INT (1), GEN_INT (1),
28706 GEN_INT (1+4), GEN_INT (1+4)));
28707 emit_insn (fn (dest, tmp2, tmp3));
28708 }
28709 \f
28710 /* Target hook for scalar_mode_supported_p. */
28711 static bool
28712 ix86_scalar_mode_supported_p (enum machine_mode mode)
28713 {
28714 if (DECIMAL_FLOAT_MODE_P (mode))
28715 return true;
28716 else if (mode == TFmode)
28717 return true;
28718 else
28719 return default_scalar_mode_supported_p (mode);
28720 }
28721
28722 /* Implements target hook vector_mode_supported_p. */
28723 static bool
28724 ix86_vector_mode_supported_p (enum machine_mode mode)
28725 {
28726 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
28727 return true;
28728 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
28729 return true;
28730 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
28731 return true;
28732 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
28733 return true;
28734 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
28735 return true;
28736 return false;
28737 }
28738
28739 /* Target hook for c_mode_for_suffix. */
28740 static enum machine_mode
28741 ix86_c_mode_for_suffix (char suffix)
28742 {
28743 if (suffix == 'q')
28744 return TFmode;
28745 if (suffix == 'w')
28746 return XFmode;
28747
28748 return VOIDmode;
28749 }
28750
28751 /* Worker function for TARGET_MD_ASM_CLOBBERS.
28752
28753 We do this in the new i386 backend to maintain source compatibility
28754 with the old cc0-based compiler. */
28755
28756 static tree
28757 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
28758 tree inputs ATTRIBUTE_UNUSED,
28759 tree clobbers)
28760 {
28761 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
28762 clobbers);
28763 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
28764 clobbers);
28765 return clobbers;
28766 }
28767
28768 /* Implements target vector targetm.asm.encode_section_info. This
28769 is not used by netware. */
28770
28771 static void ATTRIBUTE_UNUSED
28772 ix86_encode_section_info (tree decl, rtx rtl, int first)
28773 {
28774 default_encode_section_info (decl, rtl, first);
28775
28776 if (TREE_CODE (decl) == VAR_DECL
28777 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
28778 && ix86_in_large_data_p (decl))
28779 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
28780 }
28781
28782 /* Worker function for REVERSE_CONDITION. */
28783
28784 enum rtx_code
28785 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
28786 {
28787 return (mode != CCFPmode && mode != CCFPUmode
28788 ? reverse_condition (code)
28789 : reverse_condition_maybe_unordered (code));
28790 }
28791
28792 /* Output code to perform an x87 FP register move, from OPERANDS[1]
28793 to OPERANDS[0]. */
28794
28795 const char *
28796 output_387_reg_move (rtx insn, rtx *operands)
28797 {
28798 if (REG_P (operands[0]))
28799 {
28800 if (REG_P (operands[1])
28801 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
28802 {
28803 if (REGNO (operands[0]) == FIRST_STACK_REG)
28804 return output_387_ffreep (operands, 0);
28805 return "fstp\t%y0";
28806 }
28807 if (STACK_TOP_P (operands[0]))
28808 return "fld%Z1\t%y1";
28809 return "fst\t%y0";
28810 }
28811 else if (MEM_P (operands[0]))
28812 {
28813 gcc_assert (REG_P (operands[1]));
28814 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
28815 return "fstp%Z0\t%y0";
28816 else
28817 {
28818 /* There is no non-popping store to memory for XFmode.
28819 So if we need one, follow the store with a load. */
28820 if (GET_MODE (operands[0]) == XFmode)
28821 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
28822 else
28823 return "fst%Z0\t%y0";
28824 }
28825 }
28826 else
28827 gcc_unreachable();
28828 }
28829
28830 /* Output code to perform a conditional jump to LABEL, if C2 flag in
28831 FP status register is set. */
28832
28833 void
28834 ix86_emit_fp_unordered_jump (rtx label)
28835 {
28836 rtx reg = gen_reg_rtx (HImode);
28837 rtx temp;
28838
28839 emit_insn (gen_x86_fnstsw_1 (reg));
28840
28841 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
28842 {
28843 emit_insn (gen_x86_sahf_1 (reg));
28844
28845 temp = gen_rtx_REG (CCmode, FLAGS_REG);
28846 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
28847 }
28848 else
28849 {
28850 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
28851
28852 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
28853 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
28854 }
28855
28856 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
28857 gen_rtx_LABEL_REF (VOIDmode, label),
28858 pc_rtx);
28859 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
28860
28861 emit_jump_insn (temp);
28862 predict_jump (REG_BR_PROB_BASE * 10 / 100);
28863 }
28864
28865 /* Output code to perform a log1p XFmode calculation. */
28866
28867 void ix86_emit_i387_log1p (rtx op0, rtx op1)
28868 {
28869 rtx label1 = gen_label_rtx ();
28870 rtx label2 = gen_label_rtx ();
28871
28872 rtx tmp = gen_reg_rtx (XFmode);
28873 rtx tmp2 = gen_reg_rtx (XFmode);
28874
28875 emit_insn (gen_absxf2 (tmp, op1));
28876 emit_insn (gen_cmpxf (tmp,
28877 CONST_DOUBLE_FROM_REAL_VALUE (
28878 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
28879 XFmode)));
28880 emit_jump_insn (gen_bge (label1));
28881
28882 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
28883 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
28884 emit_jump (label2);
28885
28886 emit_label (label1);
28887 emit_move_insn (tmp, CONST1_RTX (XFmode));
28888 emit_insn (gen_addxf3 (tmp, op1, tmp));
28889 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
28890 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
28891
28892 emit_label (label2);
28893 }
28894
28895 /* Output code to perform a Newton-Rhapson approximation of a single precision
28896 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
28897
28898 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
28899 {
28900 rtx x0, x1, e0, e1, two;
28901
28902 x0 = gen_reg_rtx (mode);
28903 e0 = gen_reg_rtx (mode);
28904 e1 = gen_reg_rtx (mode);
28905 x1 = gen_reg_rtx (mode);
28906
28907 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
28908
28909 if (VECTOR_MODE_P (mode))
28910 two = ix86_build_const_vector (SFmode, true, two);
28911
28912 two = force_reg (mode, two);
28913
28914 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
28915
28916 /* x0 = rcp(b) estimate */
28917 emit_insn (gen_rtx_SET (VOIDmode, x0,
28918 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
28919 UNSPEC_RCP)));
28920 /* e0 = x0 * b */
28921 emit_insn (gen_rtx_SET (VOIDmode, e0,
28922 gen_rtx_MULT (mode, x0, b)));
28923 /* e1 = 2. - e0 */
28924 emit_insn (gen_rtx_SET (VOIDmode, e1,
28925 gen_rtx_MINUS (mode, two, e0)));
28926 /* x1 = x0 * e1 */
28927 emit_insn (gen_rtx_SET (VOIDmode, x1,
28928 gen_rtx_MULT (mode, x0, e1)));
28929 /* res = a * x1 */
28930 emit_insn (gen_rtx_SET (VOIDmode, res,
28931 gen_rtx_MULT (mode, a, x1)));
28932 }
28933
28934 /* Output code to perform a Newton-Rhapson approximation of a
28935 single precision floating point [reciprocal] square root. */
28936
28937 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
28938 bool recip)
28939 {
28940 rtx x0, e0, e1, e2, e3, mthree, mhalf;
28941 REAL_VALUE_TYPE r;
28942
28943 x0 = gen_reg_rtx (mode);
28944 e0 = gen_reg_rtx (mode);
28945 e1 = gen_reg_rtx (mode);
28946 e2 = gen_reg_rtx (mode);
28947 e3 = gen_reg_rtx (mode);
28948
28949 real_from_integer (&r, VOIDmode, -3, -1, 0);
28950 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
28951
28952 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
28953 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
28954
28955 if (VECTOR_MODE_P (mode))
28956 {
28957 mthree = ix86_build_const_vector (SFmode, true, mthree);
28958 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
28959 }
28960
28961 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
28962 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
28963
28964 /* x0 = rsqrt(a) estimate */
28965 emit_insn (gen_rtx_SET (VOIDmode, x0,
28966 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
28967 UNSPEC_RSQRT)));
28968
28969 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
28970 if (!recip)
28971 {
28972 rtx zero, mask;
28973
28974 zero = gen_reg_rtx (mode);
28975 mask = gen_reg_rtx (mode);
28976
28977 zero = force_reg (mode, CONST0_RTX(mode));
28978 emit_insn (gen_rtx_SET (VOIDmode, mask,
28979 gen_rtx_NE (mode, zero, a)));
28980
28981 emit_insn (gen_rtx_SET (VOIDmode, x0,
28982 gen_rtx_AND (mode, x0, mask)));
28983 }
28984
28985 /* e0 = x0 * a */
28986 emit_insn (gen_rtx_SET (VOIDmode, e0,
28987 gen_rtx_MULT (mode, x0, a)));
28988 /* e1 = e0 * x0 */
28989 emit_insn (gen_rtx_SET (VOIDmode, e1,
28990 gen_rtx_MULT (mode, e0, x0)));
28991
28992 /* e2 = e1 - 3. */
28993 mthree = force_reg (mode, mthree);
28994 emit_insn (gen_rtx_SET (VOIDmode, e2,
28995 gen_rtx_PLUS (mode, e1, mthree)));
28996
28997 mhalf = force_reg (mode, mhalf);
28998 if (recip)
28999 /* e3 = -.5 * x0 */
29000 emit_insn (gen_rtx_SET (VOIDmode, e3,
29001 gen_rtx_MULT (mode, x0, mhalf)));
29002 else
29003 /* e3 = -.5 * e0 */
29004 emit_insn (gen_rtx_SET (VOIDmode, e3,
29005 gen_rtx_MULT (mode, e0, mhalf)));
29006 /* ret = e2 * e3 */
29007 emit_insn (gen_rtx_SET (VOIDmode, res,
29008 gen_rtx_MULT (mode, e2, e3)));
29009 }
29010
29011 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
29012
29013 static void ATTRIBUTE_UNUSED
29014 i386_solaris_elf_named_section (const char *name, unsigned int flags,
29015 tree decl)
29016 {
29017 /* With Binutils 2.15, the "@unwind" marker must be specified on
29018 every occurrence of the ".eh_frame" section, not just the first
29019 one. */
29020 if (TARGET_64BIT
29021 && strcmp (name, ".eh_frame") == 0)
29022 {
29023 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
29024 flags & SECTION_WRITE ? "aw" : "a");
29025 return;
29026 }
29027 default_elf_asm_named_section (name, flags, decl);
29028 }
29029
29030 /* Return the mangling of TYPE if it is an extended fundamental type. */
29031
29032 static const char *
29033 ix86_mangle_type (const_tree type)
29034 {
29035 type = TYPE_MAIN_VARIANT (type);
29036
29037 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
29038 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
29039 return NULL;
29040
29041 switch (TYPE_MODE (type))
29042 {
29043 case TFmode:
29044 /* __float128 is "g". */
29045 return "g";
29046 case XFmode:
29047 /* "long double" or __float80 is "e". */
29048 return "e";
29049 default:
29050 return NULL;
29051 }
29052 }
29053
29054 /* For 32-bit code we can save PIC register setup by using
29055 __stack_chk_fail_local hidden function instead of calling
29056 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
29057 register, so it is better to call __stack_chk_fail directly. */
29058
29059 static tree
29060 ix86_stack_protect_fail (void)
29061 {
29062 return TARGET_64BIT
29063 ? default_external_stack_protect_fail ()
29064 : default_hidden_stack_protect_fail ();
29065 }
29066
29067 /* Select a format to encode pointers in exception handling data. CODE
29068 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
29069 true if the symbol may be affected by dynamic relocations.
29070
29071 ??? All x86 object file formats are capable of representing this.
29072 After all, the relocation needed is the same as for the call insn.
29073 Whether or not a particular assembler allows us to enter such, I
29074 guess we'll have to see. */
29075 int
29076 asm_preferred_eh_data_format (int code, int global)
29077 {
29078 if (flag_pic)
29079 {
29080 int type = DW_EH_PE_sdata8;
29081 if (!TARGET_64BIT
29082 || ix86_cmodel == CM_SMALL_PIC
29083 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
29084 type = DW_EH_PE_sdata4;
29085 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
29086 }
29087 if (ix86_cmodel == CM_SMALL
29088 || (ix86_cmodel == CM_MEDIUM && code))
29089 return DW_EH_PE_udata4;
29090 return DW_EH_PE_absptr;
29091 }
29092 \f
29093 /* Expand copysign from SIGN to the positive value ABS_VALUE
29094 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
29095 the sign-bit. */
29096 static void
29097 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
29098 {
29099 enum machine_mode mode = GET_MODE (sign);
29100 rtx sgn = gen_reg_rtx (mode);
29101 if (mask == NULL_RTX)
29102 {
29103 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
29104 if (!VECTOR_MODE_P (mode))
29105 {
29106 /* We need to generate a scalar mode mask in this case. */
29107 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29108 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29109 mask = gen_reg_rtx (mode);
29110 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29111 }
29112 }
29113 else
29114 mask = gen_rtx_NOT (mode, mask);
29115 emit_insn (gen_rtx_SET (VOIDmode, sgn,
29116 gen_rtx_AND (mode, mask, sign)));
29117 emit_insn (gen_rtx_SET (VOIDmode, result,
29118 gen_rtx_IOR (mode, abs_value, sgn)));
29119 }
29120
29121 /* Expand fabs (OP0) and return a new rtx that holds the result. The
29122 mask for masking out the sign-bit is stored in *SMASK, if that is
29123 non-null. */
29124 static rtx
29125 ix86_expand_sse_fabs (rtx op0, rtx *smask)
29126 {
29127 enum machine_mode mode = GET_MODE (op0);
29128 rtx xa, mask;
29129
29130 xa = gen_reg_rtx (mode);
29131 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
29132 if (!VECTOR_MODE_P (mode))
29133 {
29134 /* We need to generate a scalar mode mask in this case. */
29135 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29136 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29137 mask = gen_reg_rtx (mode);
29138 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29139 }
29140 emit_insn (gen_rtx_SET (VOIDmode, xa,
29141 gen_rtx_AND (mode, op0, mask)));
29142
29143 if (smask)
29144 *smask = mask;
29145
29146 return xa;
29147 }
29148
29149 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
29150 swapping the operands if SWAP_OPERANDS is true. The expanded
29151 code is a forward jump to a newly created label in case the
29152 comparison is true. The generated label rtx is returned. */
29153 static rtx
29154 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
29155 bool swap_operands)
29156 {
29157 rtx label, tmp;
29158
29159 if (swap_operands)
29160 {
29161 tmp = op0;
29162 op0 = op1;
29163 op1 = tmp;
29164 }
29165
29166 label = gen_label_rtx ();
29167 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
29168 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29169 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
29170 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
29171 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
29172 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
29173 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
29174 JUMP_LABEL (tmp) = label;
29175
29176 return label;
29177 }
29178
29179 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
29180 using comparison code CODE. Operands are swapped for the comparison if
29181 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
29182 static rtx
29183 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
29184 bool swap_operands)
29185 {
29186 enum machine_mode mode = GET_MODE (op0);
29187 rtx mask = gen_reg_rtx (mode);
29188
29189 if (swap_operands)
29190 {
29191 rtx tmp = op0;
29192 op0 = op1;
29193 op1 = tmp;
29194 }
29195
29196 if (mode == DFmode)
29197 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
29198 gen_rtx_fmt_ee (code, mode, op0, op1)));
29199 else
29200 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
29201 gen_rtx_fmt_ee (code, mode, op0, op1)));
29202
29203 return mask;
29204 }
29205
29206 /* Generate and return a rtx of mode MODE for 2**n where n is the number
29207 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
29208 static rtx
29209 ix86_gen_TWO52 (enum machine_mode mode)
29210 {
29211 REAL_VALUE_TYPE TWO52r;
29212 rtx TWO52;
29213
29214 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
29215 TWO52 = const_double_from_real_value (TWO52r, mode);
29216 TWO52 = force_reg (mode, TWO52);
29217
29218 return TWO52;
29219 }
29220
29221 /* Expand SSE sequence for computing lround from OP1 storing
29222 into OP0. */
29223 void
29224 ix86_expand_lround (rtx op0, rtx op1)
29225 {
29226 /* C code for the stuff we're doing below:
29227 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
29228 return (long)tmp;
29229 */
29230 enum machine_mode mode = GET_MODE (op1);
29231 const struct real_format *fmt;
29232 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29233 rtx adj;
29234
29235 /* load nextafter (0.5, 0.0) */
29236 fmt = REAL_MODE_FORMAT (mode);
29237 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29238 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29239
29240 /* adj = copysign (0.5, op1) */
29241 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
29242 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
29243
29244 /* adj = op1 + adj */
29245 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
29246
29247 /* op0 = (imode)adj */
29248 expand_fix (op0, adj, 0);
29249 }
29250
29251 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
29252 into OPERAND0. */
29253 void
29254 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
29255 {
29256 /* C code for the stuff we're doing below (for do_floor):
29257 xi = (long)op1;
29258 xi -= (double)xi > op1 ? 1 : 0;
29259 return xi;
29260 */
29261 enum machine_mode fmode = GET_MODE (op1);
29262 enum machine_mode imode = GET_MODE (op0);
29263 rtx ireg, freg, label, tmp;
29264
29265 /* reg = (long)op1 */
29266 ireg = gen_reg_rtx (imode);
29267 expand_fix (ireg, op1, 0);
29268
29269 /* freg = (double)reg */
29270 freg = gen_reg_rtx (fmode);
29271 expand_float (freg, ireg, 0);
29272
29273 /* ireg = (freg > op1) ? ireg - 1 : ireg */
29274 label = ix86_expand_sse_compare_and_jump (UNLE,
29275 freg, op1, !do_floor);
29276 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
29277 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
29278 emit_move_insn (ireg, tmp);
29279
29280 emit_label (label);
29281 LABEL_NUSES (label) = 1;
29282
29283 emit_move_insn (op0, ireg);
29284 }
29285
29286 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
29287 result in OPERAND0. */
29288 void
29289 ix86_expand_rint (rtx operand0, rtx operand1)
29290 {
29291 /* C code for the stuff we're doing below:
29292 xa = fabs (operand1);
29293 if (!isless (xa, 2**52))
29294 return operand1;
29295 xa = xa + 2**52 - 2**52;
29296 return copysign (xa, operand1);
29297 */
29298 enum machine_mode mode = GET_MODE (operand0);
29299 rtx res, xa, label, TWO52, mask;
29300
29301 res = gen_reg_rtx (mode);
29302 emit_move_insn (res, operand1);
29303
29304 /* xa = abs (operand1) */
29305 xa = ix86_expand_sse_fabs (res, &mask);
29306
29307 /* if (!isless (xa, TWO52)) goto label; */
29308 TWO52 = ix86_gen_TWO52 (mode);
29309 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29310
29311 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29312 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29313
29314 ix86_sse_copysign_to_positive (res, xa, res, mask);
29315
29316 emit_label (label);
29317 LABEL_NUSES (label) = 1;
29318
29319 emit_move_insn (operand0, res);
29320 }
29321
29322 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29323 into OPERAND0. */
29324 void
29325 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
29326 {
29327 /* C code for the stuff we expand below.
29328 double xa = fabs (x), x2;
29329 if (!isless (xa, TWO52))
29330 return x;
29331 xa = xa + TWO52 - TWO52;
29332 x2 = copysign (xa, x);
29333 Compensate. Floor:
29334 if (x2 > x)
29335 x2 -= 1;
29336 Compensate. Ceil:
29337 if (x2 < x)
29338 x2 -= -1;
29339 return x2;
29340 */
29341 enum machine_mode mode = GET_MODE (operand0);
29342 rtx xa, TWO52, tmp, label, one, res, mask;
29343
29344 TWO52 = ix86_gen_TWO52 (mode);
29345
29346 /* Temporary for holding the result, initialized to the input
29347 operand to ease control flow. */
29348 res = gen_reg_rtx (mode);
29349 emit_move_insn (res, operand1);
29350
29351 /* xa = abs (operand1) */
29352 xa = ix86_expand_sse_fabs (res, &mask);
29353
29354 /* if (!isless (xa, TWO52)) goto label; */
29355 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29356
29357 /* xa = xa + TWO52 - TWO52; */
29358 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29359 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29360
29361 /* xa = copysign (xa, operand1) */
29362 ix86_sse_copysign_to_positive (xa, xa, res, mask);
29363
29364 /* generate 1.0 or -1.0 */
29365 one = force_reg (mode,
29366 const_double_from_real_value (do_floor
29367 ? dconst1 : dconstm1, mode));
29368
29369 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29370 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29371 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29372 gen_rtx_AND (mode, one, tmp)));
29373 /* We always need to subtract here to preserve signed zero. */
29374 tmp = expand_simple_binop (mode, MINUS,
29375 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29376 emit_move_insn (res, tmp);
29377
29378 emit_label (label);
29379 LABEL_NUSES (label) = 1;
29380
29381 emit_move_insn (operand0, res);
29382 }
29383
29384 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29385 into OPERAND0. */
29386 void
29387 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
29388 {
29389 /* C code for the stuff we expand below.
29390 double xa = fabs (x), x2;
29391 if (!isless (xa, TWO52))
29392 return x;
29393 x2 = (double)(long)x;
29394 Compensate. Floor:
29395 if (x2 > x)
29396 x2 -= 1;
29397 Compensate. Ceil:
29398 if (x2 < x)
29399 x2 += 1;
29400 if (HONOR_SIGNED_ZEROS (mode))
29401 return copysign (x2, x);
29402 return x2;
29403 */
29404 enum machine_mode mode = GET_MODE (operand0);
29405 rtx xa, xi, TWO52, tmp, label, one, res, mask;
29406
29407 TWO52 = ix86_gen_TWO52 (mode);
29408
29409 /* Temporary for holding the result, initialized to the input
29410 operand to ease control flow. */
29411 res = gen_reg_rtx (mode);
29412 emit_move_insn (res, operand1);
29413
29414 /* xa = abs (operand1) */
29415 xa = ix86_expand_sse_fabs (res, &mask);
29416
29417 /* if (!isless (xa, TWO52)) goto label; */
29418 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29419
29420 /* xa = (double)(long)x */
29421 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29422 expand_fix (xi, res, 0);
29423 expand_float (xa, xi, 0);
29424
29425 /* generate 1.0 */
29426 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29427
29428 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29429 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29430 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29431 gen_rtx_AND (mode, one, tmp)));
29432 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
29433 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29434 emit_move_insn (res, tmp);
29435
29436 if (HONOR_SIGNED_ZEROS (mode))
29437 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29438
29439 emit_label (label);
29440 LABEL_NUSES (label) = 1;
29441
29442 emit_move_insn (operand0, res);
29443 }
29444
29445 /* Expand SSE sequence for computing round from OPERAND1 storing
29446 into OPERAND0. Sequence that works without relying on DImode truncation
29447 via cvttsd2siq that is only available on 64bit targets. */
29448 void
29449 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
29450 {
29451 /* C code for the stuff we expand below.
29452 double xa = fabs (x), xa2, x2;
29453 if (!isless (xa, TWO52))
29454 return x;
29455 Using the absolute value and copying back sign makes
29456 -0.0 -> -0.0 correct.
29457 xa2 = xa + TWO52 - TWO52;
29458 Compensate.
29459 dxa = xa2 - xa;
29460 if (dxa <= -0.5)
29461 xa2 += 1;
29462 else if (dxa > 0.5)
29463 xa2 -= 1;
29464 x2 = copysign (xa2, x);
29465 return x2;
29466 */
29467 enum machine_mode mode = GET_MODE (operand0);
29468 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
29469
29470 TWO52 = ix86_gen_TWO52 (mode);
29471
29472 /* Temporary for holding the result, initialized to the input
29473 operand to ease control flow. */
29474 res = gen_reg_rtx (mode);
29475 emit_move_insn (res, operand1);
29476
29477 /* xa = abs (operand1) */
29478 xa = ix86_expand_sse_fabs (res, &mask);
29479
29480 /* if (!isless (xa, TWO52)) goto label; */
29481 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29482
29483 /* xa2 = xa + TWO52 - TWO52; */
29484 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29485 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
29486
29487 /* dxa = xa2 - xa; */
29488 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
29489
29490 /* generate 0.5, 1.0 and -0.5 */
29491 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
29492 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
29493 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
29494 0, OPTAB_DIRECT);
29495
29496 /* Compensate. */
29497 tmp = gen_reg_rtx (mode);
29498 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
29499 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
29500 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29501 gen_rtx_AND (mode, one, tmp)));
29502 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29503 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
29504 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
29505 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29506 gen_rtx_AND (mode, one, tmp)));
29507 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29508
29509 /* res = copysign (xa2, operand1) */
29510 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
29511
29512 emit_label (label);
29513 LABEL_NUSES (label) = 1;
29514
29515 emit_move_insn (operand0, res);
29516 }
29517
29518 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29519 into OPERAND0. */
29520 void
29521 ix86_expand_trunc (rtx operand0, rtx operand1)
29522 {
29523 /* C code for SSE variant we expand below.
29524 double xa = fabs (x), x2;
29525 if (!isless (xa, TWO52))
29526 return x;
29527 x2 = (double)(long)x;
29528 if (HONOR_SIGNED_ZEROS (mode))
29529 return copysign (x2, x);
29530 return x2;
29531 */
29532 enum machine_mode mode = GET_MODE (operand0);
29533 rtx xa, xi, TWO52, label, res, mask;
29534
29535 TWO52 = ix86_gen_TWO52 (mode);
29536
29537 /* Temporary for holding the result, initialized to the input
29538 operand to ease control flow. */
29539 res = gen_reg_rtx (mode);
29540 emit_move_insn (res, operand1);
29541
29542 /* xa = abs (operand1) */
29543 xa = ix86_expand_sse_fabs (res, &mask);
29544
29545 /* if (!isless (xa, TWO52)) goto label; */
29546 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29547
29548 /* x = (double)(long)x */
29549 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29550 expand_fix (xi, res, 0);
29551 expand_float (res, xi, 0);
29552
29553 if (HONOR_SIGNED_ZEROS (mode))
29554 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29555
29556 emit_label (label);
29557 LABEL_NUSES (label) = 1;
29558
29559 emit_move_insn (operand0, res);
29560 }
29561
29562 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29563 into OPERAND0. */
29564 void
29565 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
29566 {
29567 enum machine_mode mode = GET_MODE (operand0);
29568 rtx xa, mask, TWO52, label, one, res, smask, tmp;
29569
29570 /* C code for SSE variant we expand below.
29571 double xa = fabs (x), x2;
29572 if (!isless (xa, TWO52))
29573 return x;
29574 xa2 = xa + TWO52 - TWO52;
29575 Compensate:
29576 if (xa2 > xa)
29577 xa2 -= 1.0;
29578 x2 = copysign (xa2, x);
29579 return x2;
29580 */
29581
29582 TWO52 = ix86_gen_TWO52 (mode);
29583
29584 /* Temporary for holding the result, initialized to the input
29585 operand to ease control flow. */
29586 res = gen_reg_rtx (mode);
29587 emit_move_insn (res, operand1);
29588
29589 /* xa = abs (operand1) */
29590 xa = ix86_expand_sse_fabs (res, &smask);
29591
29592 /* if (!isless (xa, TWO52)) goto label; */
29593 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29594
29595 /* res = xa + TWO52 - TWO52; */
29596 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29597 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
29598 emit_move_insn (res, tmp);
29599
29600 /* generate 1.0 */
29601 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29602
29603 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
29604 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
29605 emit_insn (gen_rtx_SET (VOIDmode, mask,
29606 gen_rtx_AND (mode, mask, one)));
29607 tmp = expand_simple_binop (mode, MINUS,
29608 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
29609 emit_move_insn (res, tmp);
29610
29611 /* res = copysign (res, operand1) */
29612 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
29613
29614 emit_label (label);
29615 LABEL_NUSES (label) = 1;
29616
29617 emit_move_insn (operand0, res);
29618 }
29619
29620 /* Expand SSE sequence for computing round from OPERAND1 storing
29621 into OPERAND0. */
29622 void
29623 ix86_expand_round (rtx operand0, rtx operand1)
29624 {
29625 /* C code for the stuff we're doing below:
29626 double xa = fabs (x);
29627 if (!isless (xa, TWO52))
29628 return x;
29629 xa = (double)(long)(xa + nextafter (0.5, 0.0));
29630 return copysign (xa, x);
29631 */
29632 enum machine_mode mode = GET_MODE (operand0);
29633 rtx res, TWO52, xa, label, xi, half, mask;
29634 const struct real_format *fmt;
29635 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29636
29637 /* Temporary for holding the result, initialized to the input
29638 operand to ease control flow. */
29639 res = gen_reg_rtx (mode);
29640 emit_move_insn (res, operand1);
29641
29642 TWO52 = ix86_gen_TWO52 (mode);
29643 xa = ix86_expand_sse_fabs (res, &mask);
29644 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29645
29646 /* load nextafter (0.5, 0.0) */
29647 fmt = REAL_MODE_FORMAT (mode);
29648 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29649 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29650
29651 /* xa = xa + 0.5 */
29652 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
29653 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
29654
29655 /* xa = (double)(int64_t)xa */
29656 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29657 expand_fix (xi, xa, 0);
29658 expand_float (xa, xi, 0);
29659
29660 /* res = copysign (xa, operand1) */
29661 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
29662
29663 emit_label (label);
29664 LABEL_NUSES (label) = 1;
29665
29666 emit_move_insn (operand0, res);
29667 }
29668
29669 \f
29670 /* Validate whether a SSE5 instruction is valid or not.
29671 OPERANDS is the array of operands.
29672 NUM is the number of operands.
29673 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
29674 NUM_MEMORY is the maximum number of memory operands to accept.
29675 when COMMUTATIVE is set, operand 1 and 2 can be swapped. */
29676
29677 bool
29678 ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
29679 bool uses_oc0, int num_memory, bool commutative)
29680 {
29681 int mem_mask;
29682 int mem_count;
29683 int i;
29684
29685 /* Count the number of memory arguments */
29686 mem_mask = 0;
29687 mem_count = 0;
29688 for (i = 0; i < num; i++)
29689 {
29690 enum machine_mode mode = GET_MODE (operands[i]);
29691 if (register_operand (operands[i], mode))
29692 ;
29693
29694 else if (memory_operand (operands[i], mode))
29695 {
29696 mem_mask |= (1 << i);
29697 mem_count++;
29698 }
29699
29700 else
29701 {
29702 rtx pattern = PATTERN (insn);
29703
29704 /* allow 0 for pcmov */
29705 if (GET_CODE (pattern) != SET
29706 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
29707 || i < 2
29708 || operands[i] != CONST0_RTX (mode))
29709 return false;
29710 }
29711 }
29712
29713 /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
29714 a memory operation. */
29715 if (num_memory < 0)
29716 {
29717 num_memory = -num_memory;
29718 if ((mem_mask & (1 << (num-1))) != 0)
29719 {
29720 mem_mask &= ~(1 << (num-1));
29721 mem_count--;
29722 }
29723 }
29724
29725 /* If there were no memory operations, allow the insn */
29726 if (mem_mask == 0)
29727 return true;
29728
29729 /* Do not allow the destination register to be a memory operand. */
29730 else if (mem_mask & (1 << 0))
29731 return false;
29732
29733 /* If there are too many memory operations, disallow the instruction. While
29734 the hardware only allows 1 memory reference, before register allocation
29735 for some insns, we allow two memory operations sometimes in order to allow
29736 code like the following to be optimized:
29737
29738 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
29739
29740 or similar cases that are vectorized into using the fmaddss
29741 instruction. */
29742 else if (mem_count > num_memory)
29743 return false;
29744
29745 /* Don't allow more than one memory operation if not optimizing. */
29746 else if (mem_count > 1 && !optimize)
29747 return false;
29748
29749 else if (num == 4 && mem_count == 1)
29750 {
29751 /* formats (destination is the first argument), example fmaddss:
29752 xmm1, xmm1, xmm2, xmm3/mem
29753 xmm1, xmm1, xmm2/mem, xmm3
29754 xmm1, xmm2, xmm3/mem, xmm1
29755 xmm1, xmm2/mem, xmm3, xmm1 */
29756 if (uses_oc0)
29757 return ((mem_mask == (1 << 1))
29758 || (mem_mask == (1 << 2))
29759 || (mem_mask == (1 << 3)));
29760
29761 /* format, example pmacsdd:
29762 xmm1, xmm2, xmm3/mem, xmm1 */
29763 if (commutative)
29764 return (mem_mask == (1 << 2) || mem_mask == (1 << 1));
29765 else
29766 return (mem_mask == (1 << 2));
29767 }
29768
29769 else if (num == 4 && num_memory == 2)
29770 {
29771 /* If there are two memory operations, we can load one of the memory ops
29772 into the destination register. This is for optimizing the
29773 multiply/add ops, which the combiner has optimized both the multiply
29774 and the add insns to have a memory operation. We have to be careful
29775 that the destination doesn't overlap with the inputs. */
29776 rtx op0 = operands[0];
29777
29778 if (reg_mentioned_p (op0, operands[1])
29779 || reg_mentioned_p (op0, operands[2])
29780 || reg_mentioned_p (op0, operands[3]))
29781 return false;
29782
29783 /* formats (destination is the first argument), example fmaddss:
29784 xmm1, xmm1, xmm2, xmm3/mem
29785 xmm1, xmm1, xmm2/mem, xmm3
29786 xmm1, xmm2, xmm3/mem, xmm1
29787 xmm1, xmm2/mem, xmm3, xmm1
29788
29789 For the oc0 case, we will load either operands[1] or operands[3] into
29790 operands[0], so any combination of 2 memory operands is ok. */
29791 if (uses_oc0)
29792 return true;
29793
29794 /* format, example pmacsdd:
29795 xmm1, xmm2, xmm3/mem, xmm1
29796
29797 For the integer multiply/add instructions be more restrictive and
29798 require operands[2] and operands[3] to be the memory operands. */
29799 if (commutative)
29800 return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3)));
29801 else
29802 return (mem_mask == ((1 << 2) | (1 << 3)));
29803 }
29804
29805 else if (num == 3 && num_memory == 1)
29806 {
29807 /* formats, example protb:
29808 xmm1, xmm2, xmm3/mem
29809 xmm1, xmm2/mem, xmm3 */
29810 if (uses_oc0)
29811 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
29812
29813 /* format, example comeq:
29814 xmm1, xmm2, xmm3/mem */
29815 else
29816 return (mem_mask == (1 << 2));
29817 }
29818
29819 else
29820 gcc_unreachable ();
29821
29822 return false;
29823 }
29824
29825 \f
29826 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
29827 hardware will allow by using the destination register to load one of the
29828 memory operations. Presently this is used by the multiply/add routines to
29829 allow 2 memory references. */
29830
29831 void
29832 ix86_expand_sse5_multiple_memory (rtx operands[],
29833 int num,
29834 enum machine_mode mode)
29835 {
29836 rtx op0 = operands[0];
29837 if (num != 4
29838 || memory_operand (op0, mode)
29839 || reg_mentioned_p (op0, operands[1])
29840 || reg_mentioned_p (op0, operands[2])
29841 || reg_mentioned_p (op0, operands[3]))
29842 gcc_unreachable ();
29843
29844 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
29845 the destination register. */
29846 if (memory_operand (operands[1], mode))
29847 {
29848 emit_move_insn (op0, operands[1]);
29849 operands[1] = op0;
29850 }
29851 else if (memory_operand (operands[3], mode))
29852 {
29853 emit_move_insn (op0, operands[3]);
29854 operands[3] = op0;
29855 }
29856 else
29857 gcc_unreachable ();
29858
29859 return;
29860 }
29861
29862 \f
29863 /* Table of valid machine attributes. */
29864 static const struct attribute_spec ix86_attribute_table[] =
29865 {
29866 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
29867 /* Stdcall attribute says callee is responsible for popping arguments
29868 if they are not variable. */
29869 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29870 /* Fastcall attribute says callee is responsible for popping arguments
29871 if they are not variable. */
29872 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29873 /* Cdecl attribute says the callee is a normal C declaration */
29874 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29875 /* Regparm attribute specifies how many integer arguments are to be
29876 passed in registers. */
29877 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
29878 /* Sseregparm attribute says we are using x86_64 calling conventions
29879 for FP arguments. */
29880 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29881 /* force_align_arg_pointer says this function realigns the stack at entry. */
29882 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
29883 false, true, true, ix86_handle_cconv_attribute },
29884 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29885 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
29886 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
29887 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
29888 #endif
29889 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
29890 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
29891 #ifdef SUBTARGET_ATTRIBUTE_TABLE
29892 SUBTARGET_ATTRIBUTE_TABLE,
29893 #endif
29894 /* ms_abi and sysv_abi calling convention function attributes. */
29895 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29896 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29897 /* End element. */
29898 { NULL, 0, 0, false, false, false, NULL }
29899 };
29900
29901 /* Implement targetm.vectorize.builtin_vectorization_cost. */
29902 static int
29903 x86_builtin_vectorization_cost (bool runtime_test)
29904 {
29905 /* If the branch of the runtime test is taken - i.e. - the vectorized
29906 version is skipped - this incurs a misprediction cost (because the
29907 vectorized version is expected to be the fall-through). So we subtract
29908 the latency of a mispredicted branch from the costs that are incured
29909 when the vectorized version is executed.
29910
29911 TODO: The values in individual target tables have to be tuned or new
29912 fields may be needed. For eg. on K8, the default branch path is the
29913 not-taken path. If the taken path is predicted correctly, the minimum
29914 penalty of going down the taken-path is 1 cycle. If the taken-path is
29915 not predicted correctly, then the minimum penalty is 10 cycles. */
29916
29917 if (runtime_test)
29918 {
29919 return (-(ix86_cost->cond_taken_branch_cost));
29920 }
29921 else
29922 return 0;
29923 }
29924
29925 /* This function returns the calling abi specific va_list type node.
29926 It returns the FNDECL specific va_list type. */
29927
29928 tree
29929 ix86_fn_abi_va_list (tree fndecl)
29930 {
29931 if (!TARGET_64BIT)
29932 return va_list_type_node;
29933 gcc_assert (fndecl != NULL_TREE);
29934
29935 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
29936 return ms_va_list_type_node;
29937 else
29938 return sysv_va_list_type_node;
29939 }
29940
29941 /* Returns the canonical va_list type specified by TYPE. If there
29942 is no valid TYPE provided, it return NULL_TREE. */
29943
29944 tree
29945 ix86_canonical_va_list_type (tree type)
29946 {
29947 tree wtype, htype;
29948
29949 /* Resolve references and pointers to va_list type. */
29950 if (INDIRECT_REF_P (type))
29951 type = TREE_TYPE (type);
29952 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
29953 type = TREE_TYPE (type);
29954
29955 if (TARGET_64BIT)
29956 {
29957 wtype = va_list_type_node;
29958 gcc_assert (wtype != NULL_TREE);
29959 htype = type;
29960 if (TREE_CODE (wtype) == ARRAY_TYPE)
29961 {
29962 /* If va_list is an array type, the argument may have decayed
29963 to a pointer type, e.g. by being passed to another function.
29964 In that case, unwrap both types so that we can compare the
29965 underlying records. */
29966 if (TREE_CODE (htype) == ARRAY_TYPE
29967 || POINTER_TYPE_P (htype))
29968 {
29969 wtype = TREE_TYPE (wtype);
29970 htype = TREE_TYPE (htype);
29971 }
29972 }
29973 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
29974 return va_list_type_node;
29975 wtype = sysv_va_list_type_node;
29976 gcc_assert (wtype != NULL_TREE);
29977 htype = type;
29978 if (TREE_CODE (wtype) == ARRAY_TYPE)
29979 {
29980 /* If va_list is an array type, the argument may have decayed
29981 to a pointer type, e.g. by being passed to another function.
29982 In that case, unwrap both types so that we can compare the
29983 underlying records. */
29984 if (TREE_CODE (htype) == ARRAY_TYPE
29985 || POINTER_TYPE_P (htype))
29986 {
29987 wtype = TREE_TYPE (wtype);
29988 htype = TREE_TYPE (htype);
29989 }
29990 }
29991 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
29992 return sysv_va_list_type_node;
29993 wtype = ms_va_list_type_node;
29994 gcc_assert (wtype != NULL_TREE);
29995 htype = type;
29996 if (TREE_CODE (wtype) == ARRAY_TYPE)
29997 {
29998 /* If va_list is an array type, the argument may have decayed
29999 to a pointer type, e.g. by being passed to another function.
30000 In that case, unwrap both types so that we can compare the
30001 underlying records. */
30002 if (TREE_CODE (htype) == ARRAY_TYPE
30003 || POINTER_TYPE_P (htype))
30004 {
30005 wtype = TREE_TYPE (wtype);
30006 htype = TREE_TYPE (htype);
30007 }
30008 }
30009 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
30010 return ms_va_list_type_node;
30011 return NULL_TREE;
30012 }
30013 return std_canonical_va_list_type (type);
30014 }
30015
30016 /* Iterate through the target-specific builtin types for va_list.
30017 IDX denotes the iterator, *PTREE is set to the result type of
30018 the va_list builtin, and *PNAME to its internal type.
30019 Returns zero if there is no element for this index, otherwise
30020 IDX should be increased upon the next call.
30021 Note, do not iterate a base builtin's name like __builtin_va_list.
30022 Used from c_common_nodes_and_builtins. */
30023
30024 int
30025 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
30026 {
30027 if (!TARGET_64BIT)
30028 return 0;
30029 switch (idx) {
30030 case 0:
30031 *ptree = ms_va_list_type_node;
30032 *pname = "__builtin_ms_va_list";
30033 break;
30034 case 1:
30035 *ptree = sysv_va_list_type_node;
30036 *pname = "__builtin_sysv_va_list";
30037 break;
30038 default:
30039 return 0;
30040 }
30041 return 1;
30042 }
30043
30044 /* Initialize the GCC target structure. */
30045 #undef TARGET_RETURN_IN_MEMORY
30046 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
30047
30048 #undef TARGET_LEGITIMIZE_ADDRESS
30049 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
30050
30051 #undef TARGET_ATTRIBUTE_TABLE
30052 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
30053 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30054 # undef TARGET_MERGE_DECL_ATTRIBUTES
30055 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
30056 #endif
30057
30058 #undef TARGET_COMP_TYPE_ATTRIBUTES
30059 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
30060
30061 #undef TARGET_INIT_BUILTINS
30062 #define TARGET_INIT_BUILTINS ix86_init_builtins
30063 #undef TARGET_EXPAND_BUILTIN
30064 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
30065
30066 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
30067 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
30068 ix86_builtin_vectorized_function
30069
30070 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
30071 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
30072
30073 #undef TARGET_BUILTIN_RECIPROCAL
30074 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
30075
30076 #undef TARGET_ASM_FUNCTION_EPILOGUE
30077 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
30078
30079 #undef TARGET_ENCODE_SECTION_INFO
30080 #ifndef SUBTARGET_ENCODE_SECTION_INFO
30081 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
30082 #else
30083 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
30084 #endif
30085
30086 #undef TARGET_ASM_OPEN_PAREN
30087 #define TARGET_ASM_OPEN_PAREN ""
30088 #undef TARGET_ASM_CLOSE_PAREN
30089 #define TARGET_ASM_CLOSE_PAREN ""
30090
30091 #undef TARGET_ASM_ALIGNED_HI_OP
30092 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
30093 #undef TARGET_ASM_ALIGNED_SI_OP
30094 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
30095 #ifdef ASM_QUAD
30096 #undef TARGET_ASM_ALIGNED_DI_OP
30097 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
30098 #endif
30099
30100 #undef TARGET_ASM_UNALIGNED_HI_OP
30101 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
30102 #undef TARGET_ASM_UNALIGNED_SI_OP
30103 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
30104 #undef TARGET_ASM_UNALIGNED_DI_OP
30105 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
30106
30107 #undef TARGET_SCHED_ADJUST_COST
30108 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
30109 #undef TARGET_SCHED_ISSUE_RATE
30110 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
30111 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
30112 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
30113 ia32_multipass_dfa_lookahead
30114
30115 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
30116 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
30117
30118 #ifdef HAVE_AS_TLS
30119 #undef TARGET_HAVE_TLS
30120 #define TARGET_HAVE_TLS true
30121 #endif
30122 #undef TARGET_CANNOT_FORCE_CONST_MEM
30123 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
30124 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
30125 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
30126
30127 #undef TARGET_DELEGITIMIZE_ADDRESS
30128 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
30129
30130 #undef TARGET_MS_BITFIELD_LAYOUT_P
30131 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
30132
30133 #if TARGET_MACHO
30134 #undef TARGET_BINDS_LOCAL_P
30135 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
30136 #endif
30137 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30138 #undef TARGET_BINDS_LOCAL_P
30139 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
30140 #endif
30141
30142 #undef TARGET_ASM_OUTPUT_MI_THUNK
30143 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
30144 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
30145 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
30146
30147 #undef TARGET_ASM_FILE_START
30148 #define TARGET_ASM_FILE_START x86_file_start
30149
30150 #undef TARGET_DEFAULT_TARGET_FLAGS
30151 #define TARGET_DEFAULT_TARGET_FLAGS \
30152 (TARGET_DEFAULT \
30153 | TARGET_SUBTARGET_DEFAULT \
30154 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
30155
30156 #undef TARGET_HANDLE_OPTION
30157 #define TARGET_HANDLE_OPTION ix86_handle_option
30158
30159 #undef TARGET_RTX_COSTS
30160 #define TARGET_RTX_COSTS ix86_rtx_costs
30161 #undef TARGET_ADDRESS_COST
30162 #define TARGET_ADDRESS_COST ix86_address_cost
30163
30164 #undef TARGET_FIXED_CONDITION_CODE_REGS
30165 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
30166 #undef TARGET_CC_MODES_COMPATIBLE
30167 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
30168
30169 #undef TARGET_MACHINE_DEPENDENT_REORG
30170 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
30171
30172 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
30173 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
30174
30175 #undef TARGET_BUILD_BUILTIN_VA_LIST
30176 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
30177
30178 #undef TARGET_FN_ABI_VA_LIST
30179 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
30180
30181 #undef TARGET_CANONICAL_VA_LIST_TYPE
30182 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
30183
30184 #undef TARGET_EXPAND_BUILTIN_VA_START
30185 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
30186
30187 #undef TARGET_MD_ASM_CLOBBERS
30188 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
30189
30190 #undef TARGET_PROMOTE_PROTOTYPES
30191 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
30192 #undef TARGET_STRUCT_VALUE_RTX
30193 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
30194 #undef TARGET_SETUP_INCOMING_VARARGS
30195 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
30196 #undef TARGET_MUST_PASS_IN_STACK
30197 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
30198 #undef TARGET_PASS_BY_REFERENCE
30199 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
30200 #undef TARGET_INTERNAL_ARG_POINTER
30201 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
30202 #undef TARGET_UPDATE_STACK_BOUNDARY
30203 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
30204 #undef TARGET_GET_DRAP_RTX
30205 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
30206 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
30207 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
30208 #undef TARGET_STRICT_ARGUMENT_NAMING
30209 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
30210
30211 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
30212 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
30213
30214 #undef TARGET_SCALAR_MODE_SUPPORTED_P
30215 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
30216
30217 #undef TARGET_VECTOR_MODE_SUPPORTED_P
30218 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
30219
30220 #undef TARGET_C_MODE_FOR_SUFFIX
30221 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
30222
30223 #ifdef HAVE_AS_TLS
30224 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
30225 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
30226 #endif
30227
30228 #ifdef SUBTARGET_INSERT_ATTRIBUTES
30229 #undef TARGET_INSERT_ATTRIBUTES
30230 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
30231 #endif
30232
30233 #undef TARGET_MANGLE_TYPE
30234 #define TARGET_MANGLE_TYPE ix86_mangle_type
30235
30236 #undef TARGET_STACK_PROTECT_FAIL
30237 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
30238
30239 #undef TARGET_FUNCTION_VALUE
30240 #define TARGET_FUNCTION_VALUE ix86_function_value
30241
30242 #undef TARGET_SECONDARY_RELOAD
30243 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
30244
30245 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
30246 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
30247
30248 #undef TARGET_SET_CURRENT_FUNCTION
30249 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
30250
30251 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
30252 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
30253
30254 #undef TARGET_OPTION_SAVE
30255 #define TARGET_OPTION_SAVE ix86_function_specific_save
30256
30257 #undef TARGET_OPTION_RESTORE
30258 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
30259
30260 #undef TARGET_OPTION_PRINT
30261 #define TARGET_OPTION_PRINT ix86_function_specific_print
30262
30263 #undef TARGET_OPTION_CAN_INLINE_P
30264 #define TARGET_OPTION_CAN_INLINE_P ix86_can_inline_p
30265
30266 #undef TARGET_EXPAND_TO_RTL_HOOK
30267 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
30268
30269 struct gcc_target targetm = TARGET_INITIALIZER;
30270 \f
30271 #include "gt-i386.h"